]>
Commit | Line | Data |
---|---|---|
bd5b749c | 1 | /* |
e29e285d | 2 | * Copyright (c) 2015 Apple Inc. All rights reserved. |
bd5b749c A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
d7384798 | 5 | * |
bd5b749c A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
d7384798 | 12 | * |
bd5b749c A |
13 | * The Original Code and all software distributed under the License are |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
d7384798 | 20 | * |
bd5b749c A |
21 | * @APPLE_LICENSE_HEADER_END@ |
22 | */ | |
f64f9b69 | 23 | |
bd5b749c A |
24 | /* |
25 | CFLocaleIdentifier.c | |
d7384798 | 26 | Copyright (c) 2002-2014, Apple Inc. All rights reserved. |
8ca704e1 | 27 | Responsibility: David Smith |
bd5b749c A |
28 | |
29 | CFLocaleIdentifier.c defines | |
30 | - enum value kLocaleIdentifierCStringMax | |
31 | - structs KeyStringToResultString, SpecialCaseUpdates | |
32 | and provides the following data for the functions | |
33 | CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes, | |
34 | CFLocaleCreateCanonicalLocaleIdentifierFromString | |
35 | CFLocaleCreateCanonicalLanguageIdentifierFromString | |
36 | ||
37 | 1. static const char * regionCodeToLocaleString[]; enum kNumRegionCodeToLocaleString; | |
38 | map RegionCode 0..kNumRegionCodeToLocaleString-1 to canonical locale string | |
39 | ||
40 | 2. static const char * langCodeToLocaleString[]; enum kNumLangCodeToLocaleString; | |
41 | map LangCode 0..kNumLangCodeToLocaleString-1 to canonical locale string | |
42 | ||
43 | 3. static const KeyStringToResultString oldAppleLocaleToCanonical[]; enum kNumOldAppleLocaleToCanonical; | |
44 | map old Apple string oldAppleLocaleToCanonical[n].key | |
45 | to canonical locale string oldAppleLocaleToCanonical[n].result | |
46 | for n = 0..kNumOldAppleLocaleToCanonical-1 | |
47 | ||
48 | 4. static const KeyStringToResultString localeStringPrefixToCanonical[]; enum kNumLocaleStringPrefixToCanonical; | |
49 | map non-canonical language prefix (3-letter, obsolete) localeStringPrefixToCanonical[].key | |
50 | to updated replacement localeStringPrefixToCanonical[].result | |
51 | for n = 0..kNumLocaleStringPrefixToCanonical-1 | |
52 | ||
53 | 5. static const SpecialCaseUpdates specialCases[]; | |
54 | various special cases for updating region codes, or for updating language codes based on region codes | |
55 | ||
56 | 6. static const KeyStringToResultString localeStringRegionToDefaults[]; enum kNumLocaleStringRegionToDefaults; | |
57 | map locale string region tag localeStringRegionToDefaults[n].key | |
58 | to default substrings to delete localeStringRegionToDefaults[n].result | |
59 | for n = 0..kNumLocaleStringRegionToDefaults-1 | |
60 | ||
61 | 7. static const KeyStringToResultString localeStringPrefixToDefaults[]; enum kNumLocaleStringPrefixToDefaults; | |
62 | map locale string initial part localeStringPrefixToDefaults[n].key | |
63 | to default substrings to delete localeStringPrefixToDefaults[n].result | |
64 | for n = 0..kNumLocaleStringPrefixToDefaults-1 | |
65 | ||
66 | 8. static const KeyStringToResultString appleLocaleToLanguageString[]; enum kNumAppleLocaleToLanguageString; | |
67 | map Apple locale string appleLocaleToLanguageString[].key | |
68 | to equivalent language string appleLocaleToLanguageString[].result | |
69 | for n = 0..kNumAppleLocaleToLanguageString-1 | |
70 | ||
71 | */ | |
72 | ||
73 | #include <CoreFoundation/CFString.h> | |
8ca704e1 | 74 | #include <CoreFoundation/CFCalendar.h> |
bd5b749c A |
75 | #include <ctype.h> |
76 | #include <string.h> | |
77 | #include <stdlib.h> | |
8ca704e1 | 78 | #include <stdio.h> |
856091c5 | 79 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX |
bd5b749c | 80 | #include <unicode/uloc.h> |
856091c5 A |
81 | #else |
82 | #define ULOC_KEYWORD_SEPARATOR '@' | |
83 | #define ULOC_FULLNAME_CAPACITY 56 | |
84 | #define ULOC_KEYWORD_AND_VALUES_CAPACITY 100 | |
85 | #endif | |
cf7d2af9 A |
86 | #include "CFInternal.h" |
87 | #include "CFLocaleInternal.h" | |
bd5b749c A |
88 | |
89 | // Max byte length of locale identifier (ASCII) as C string, including terminating null byte | |
90 | enum { | |
91 | kLocaleIdentifierCStringMax = ULOC_FULLNAME_CAPACITY + ULOC_KEYWORD_AND_VALUES_CAPACITY // currently 56 + 100 | |
92 | }; | |
93 | ||
94 | // KeyStringToResultString struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString | |
95 | struct KeyStringToResultString { | |
96 | const char * key; | |
97 | const char * result; | |
98 | }; | |
99 | typedef struct KeyStringToResultString KeyStringToResultString; | |
100 | ||
101 | // SpecialCaseUpdates struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString | |
102 | struct SpecialCaseUpdates { | |
103 | const char * lang; | |
104 | const char * reg1; | |
105 | const char * update1; | |
106 | const char * reg2; | |
107 | const char * update2; | |
108 | }; | |
109 | typedef struct SpecialCaseUpdates SpecialCaseUpdates; | |
110 | ||
111 | ||
112 | static const char * const regionCodeToLocaleString[] = { | |
113 | // map RegionCode (array index) to canonical locale string | |
114 | // | |
115 | // canon. string region code; language code; [comment] [ # __CFBundleLocaleAbbreviationsArray | |
116 | // -------- ------------ ------------------ ------------ -------- string, if different ] | |
117 | "en_US", // 0 verUS; 0 langEnglish; | |
118 | "fr_FR", // 1 verFrance; 1 langFrench; | |
119 | "en_GB", // 2 verBritain; 0 langEnglish; | |
120 | "de_DE", // 3 verGermany; 2 langGerman; | |
121 | "it_IT", // 4 verItaly; 3 langItalian; | |
122 | "nl_NL", // 5 verNetherlands; 4 langDutch; | |
123 | "nl_BE", // 6 verFlemish; 34 langFlemish (redundant, =Dutch); | |
124 | "sv_SE", // 7 verSweden; 5 langSwedish; | |
125 | "es_ES", // 8 verSpain; 6 langSpanish; | |
126 | "da_DK", // 9 verDenmark; 7 langDanish; | |
127 | "pt_PT", // 10 verPortugal; 8 langPortuguese; | |
128 | "fr_CA", // 11 verFrCanada; 1 langFrench; | |
129 | "nb_NO", // 12 verNorway; 9 langNorwegian (Bokmal); # "no_NO" | |
130 | "he_IL", // 13 verIsrael; 10 langHebrew; | |
131 | "ja_JP", // 14 verJapan; 11 langJapanese; | |
132 | "en_AU", // 15 verAustralia; 0 langEnglish; | |
133 | "ar", // 16 verArabic; 12 langArabic; | |
134 | "fi_FI", // 17 verFinland; 13 langFinnish; | |
135 | "fr_CH", // 18 verFrSwiss; 1 langFrench; | |
136 | "de_CH", // 19 verGrSwiss; 2 langGerman; | |
137 | "el_GR", // 20 verGreece; 14 langGreek (modern)-Grek-mono; | |
138 | "is_IS", // 21 verIceland; 15 langIcelandic; | |
139 | "mt_MT", // 22 verMalta; 16 langMaltese; | |
140 | "el_CY", // 23 verCyprus; 14 langGreek?; el or tr? guess el # "" | |
141 | "tr_TR", // 24 verTurkey; 17 langTurkish; | |
142 | "hr_HR", // 25 verYugoCroatian; 18 langCroatian; * one-way mapping -> verCroatia | |
143 | "nl_NL", // 26 KCHR, Netherlands; 4 langDutch; * one-way mapping | |
144 | "nl_BE", // 27 KCHR, verFlemish; 34 langFlemish; * one-way mapping | |
145 | "_CA", // 28 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA" | |
146 | "_CA", // 29 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA" | |
147 | "pt_PT", // 30 KCHR, Portugal; 8 langPortuguese; * one-way mapping | |
148 | "nb_NO", // 31 KCHR, Norway; 9 langNorwegian (Bokmal); * one-way mapping # "no_NO" | |
149 | "da_DK", // 32 KCHR, Denmark; 7 langDanish; * one-way mapping | |
150 | "hi_IN", // 33 verIndiaHindi; 21 langHindi; | |
151 | "ur_PK", // 34 verPakistanUrdu; 20 langUrdu; | |
152 | "tr_TR", // 35 verTurkishModified; 17 langTurkish; * one-way mapping | |
153 | "it_CH", // 36 verItalianSwiss; 3 langItalian; | |
154 | "en_001", // 37 verInternational; 0 langEnglish; ASCII only # "en" | |
155 | NULL, // 38 *unassigned; -1 none; * one-way mapping # "" | |
156 | "ro_RO", // 39 verRomania; 37 langRomanian; | |
157 | "grc", // 40 verGreekAncient; 148 langGreekAncient -Grek-poly; # "el_GR" | |
158 | "lt_LT", // 41 verLithuania; 24 langLithuanian; | |
159 | "pl_PL", // 42 verPoland; 25 langPolish; | |
160 | "hu_HU", // 43 verHungary; 26 langHungarian; | |
161 | "et_EE", // 44 verEstonia; 27 langEstonian; | |
162 | "lv_LV", // 45 verLatvia; 28 langLatvian; | |
163 | "se", // 46 verSami; 29 langSami; | |
164 | "fo_FO", // 47 verFaroeIsl; 30 langFaroese; | |
165 | "fa_IR", // 48 verIran; 31 langFarsi/Persian; | |
166 | "ru_RU", // 49 verRussia; 32 langRussian; | |
167 | "ga_IE", // 50 verIreland; 35 langIrishGaelic (no dots); | |
168 | "ko_KR", // 51 verKorea; 23 langKorean; | |
169 | "zh_CN", // 52 verChina; 33 langSimpChinese; | |
170 | "zh_TW", // 53 verTaiwan; 19 langTradChinese; | |
171 | "th_TH", // 54 verThailand; 22 langThai; | |
172 | "und", // 55 verScriptGeneric; -1 none; # "" // <1.9> | |
173 | "cs_CZ", // 56 verCzech; 38 langCzech; | |
174 | "sk_SK", // 57 verSlovak; 39 langSlovak; | |
175 | "und", // 58 verEastAsiaGeneric; -1 none; * one-way mapping # "" // <1.9> | |
176 | "hu_HU", // 59 verMagyar; 26 langHungarian; * one-way mapping -> verHungary | |
177 | "bn", // 60 verBengali; 67 langBengali; _IN or _BD? guess generic | |
178 | "be_BY", // 61 verBelarus; 46 langBelorussian; | |
179 | "uk_UA", // 62 verUkraine; 45 langUkrainian; | |
180 | NULL, // 63 *unused; -1 none; * one-way mapping # "" | |
181 | "el_GR", // 64 verGreeceAlt; 14 langGreek (modern)-Grek-mono; * one-way mapping | |
cf7d2af9 | 182 | "sr_RS", // 65 verSerbian; 42 langSerbian -Cyrl; // <1.18> |
bd5b749c A |
183 | "sl_SI", // 66 verSlovenian; 40 langSlovenian; |
184 | "mk_MK", // 67 verMacedonian; 43 langMacedonian; | |
185 | "hr_HR", // 68 verCroatia; 18 langCroatian; | |
186 | NULL, // 69 *unused; -1 none; * one-way mapping # "" | |
187 | "de-1996", // 70 verGermanReformed; 2 langGerman; 1996 orthogr. # "de_DE" | |
188 | "pt_BR", // 71 verBrazil; 8 langPortuguese; | |
189 | "bg_BG", // 72 verBulgaria; 44 langBulgarian; | |
190 | "ca_ES", // 73 verCatalonia; 130 langCatalan; | |
191 | "mul", // 74 verMultilingual; -1 none; # "" | |
192 | "gd", // 75 verScottishGaelic; 144 langScottishGaelic; | |
193 | "gv", // 76 verManxGaelic; 145 langManxGaelic; | |
194 | "br", // 77 verBreton; 142 langBreton; | |
195 | "iu_CA", // 78 verNunavut; 143 langInuktitut -Cans; | |
196 | "cy", // 79 verWelsh; 128 langWelsh; | |
197 | "_CA", // 80 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA" | |
198 | "ga-Latg_IE", // 81 verIrishGaelicScrip; 146 langIrishGaelicScript -dots; # "ga_IE" // <xx> | |
199 | "en_CA", // 82 verEngCanada; 0 langEnglish; | |
200 | "dz_BT", // 83 verBhutan; 137 langDzongkha; | |
201 | "hy_AM", // 84 verArmenian; 51 langArmenian; | |
202 | "ka_GE", // 85 verGeorgian; 52 langGeorgian; | |
203 | "es_419", // 86 verSpLatinAmerica; 6 langSpanish; # "es" | |
204 | "es_ES", // 87 KCHR, Spain; 6 langSpanish; * one-way mapping | |
205 | "to_TO", // 88 verTonga; 147 langTongan; | |
206 | "pl_PL", // 89 KCHR, Poland; 25 langPolish; * one-way mapping | |
207 | "ca_ES", // 90 KCHR, Catalonia; 130 langCatalan; * one-way mapping | |
208 | "fr_001", // 91 verFrenchUniversal; 1 langFrench; | |
209 | "de_AT", // 92 verAustria; 2 langGerman; | |
210 | "es_419", // 93 > verSpLatinAmerica; 6 langSpanish; * one-way mapping # "es" | |
211 | "gu_IN", // 94 verGujarati; 69 langGujarati; | |
212 | "pa", // 95 verPunjabi; 70 langPunjabi; _IN or _PK? guess generic | |
213 | "ur_IN", // 96 verIndiaUrdu; 20 langUrdu; | |
214 | "vi_VN", // 97 verVietnam; 80 langVietnamese; | |
215 | "fr_BE", // 98 verFrBelgium; 1 langFrench; | |
216 | "uz_UZ", // 99 verUzbek; 47 langUzbek; | |
217 | "en_SG", // 100 verSingapore; 0 langEnglish?; en, zh, or ms? guess en # "" | |
218 | "nn_NO", // 101 verNynorsk; 151 langNynorsk; # "" | |
219 | "af_ZA", // 102 verAfrikaans; 141 langAfrikaans; | |
220 | "eo", // 103 verEsperanto; 94 langEsperanto; | |
221 | "mr_IN", // 104 verMarathi; 66 langMarathi; | |
222 | "bo", // 105 verTibetan; 63 langTibetan; | |
223 | "ne_NP", // 106 verNepal; 64 langNepali; | |
224 | "kl", // 107 verGreenland; 149 langGreenlandic; | |
225 | "en_IE", // 108 verIrelandEnglish; 0 langEnglish; # (no entry) | |
226 | }; | |
227 | enum { | |
228 | kNumRegionCodeToLocaleString = sizeof(regionCodeToLocaleString)/sizeof(char *) | |
229 | }; | |
230 | ||
231 | static const char * const langCodeToLocaleString[] = { | |
232 | // map LangCode (array index) to canonical locale string | |
233 | // | |
234 | // canon. string language code; [ comment] [ # __CFBundleLanguageAbbreviationsArray | |
235 | // -------- -------------- ---------- -------- string, if different ] | |
236 | "en", // 0 langEnglish; | |
237 | "fr", // 1 langFrench; | |
238 | "de", // 2 langGerman; | |
239 | "it", // 3 langItalian; | |
240 | "nl", // 4 langDutch; | |
241 | "sv", // 5 langSwedish; | |
242 | "es", // 6 langSpanish; | |
243 | "da", // 7 langDanish; | |
244 | "pt", // 8 langPortuguese; | |
245 | "nb", // 9 langNorwegian (Bokmal); # "no" | |
246 | "he", // 10 langHebrew -Hebr; | |
247 | "ja", // 11 langJapanese -Jpan; | |
248 | "ar", // 12 langArabic -Arab; | |
249 | "fi", // 13 langFinnish; | |
250 | "el", // 14 langGreek (modern)-Grek-mono; | |
251 | "is", // 15 langIcelandic; | |
252 | "mt", // 16 langMaltese -Latn; | |
253 | "tr", // 17 langTurkish -Latn; | |
254 | "hr", // 18 langCroatian; | |
255 | "zh-Hant", // 19 langTradChinese; # "zh" | |
256 | "ur", // 20 langUrdu -Arab; | |
257 | "hi", // 21 langHindi -Deva; | |
258 | "th", // 22 langThai -Thai; | |
259 | "ko", // 23 langKorean -Hang; | |
260 | "lt", // 24 langLithuanian; | |
261 | "pl", // 25 langPolish; | |
262 | "hu", // 26 langHungarian; | |
263 | "et", // 27 langEstonian; | |
264 | "lv", // 28 langLatvian; | |
265 | "se", // 29 langSami; | |
266 | "fo", // 30 langFaroese; | |
267 | "fa", // 31 langFarsi/Persian -Arab; | |
268 | "ru", // 32 langRussian -Cyrl; | |
269 | "zh-Hans", // 33 langSimpChinese; # "zh" | |
270 | "nl-BE", // 34 langFlemish (redundant, =Dutch); # "nl" | |
271 | "ga", // 35 langIrishGaelic (no dots); | |
272 | "sq", // 36 langAlbanian; no region codes | |
273 | "ro", // 37 langRomanian; | |
274 | "cs", // 38 langCzech; | |
275 | "sk", // 39 langSlovak; | |
276 | "sl", // 40 langSlovenian; | |
277 | "yi", // 41 langYiddish -Hebr; no region codes | |
278 | "sr", // 42 langSerbian -Cyrl; | |
279 | "mk", // 43 langMacedonian -Cyrl; | |
280 | "bg", // 44 langBulgarian -Cyrl; | |
281 | "uk", // 45 langUkrainian -Cyrl; | |
282 | "be", // 46 langBelorussian -Cyrl; | |
9f29f3f8 | 283 | "uz", // 47 langUzbek -Cyrl; also -Latn, -Arab |
bd5b749c A |
284 | "kk", // 48 langKazakh -Cyrl; no region codes; also -Latn, -Arab |
285 | "az-Cyrl", // 49 langAzerbaijani -Cyrl; no region codes # "az" | |
286 | "az-Arab", // 50 langAzerbaijanAr -Arab; no region codes # "az" | |
287 | "hy", // 51 langArmenian -Armn; | |
288 | "ka", // 52 langGeorgian -Geor; | |
289 | "mo", // 53 langMoldavian -Cyrl; no region codes | |
290 | "ky", // 54 langKirghiz -Cyrl; no region codes; also -Latn, -Arab | |
9f29f3f8 | 291 | "tg", // 55 langTajiki -Cyrl; no region codes; also -Latn, -Arab |
bd5b749c A |
292 | "tk-Cyrl", // 56 langTurkmen -Cyrl; no region codes; also -Latn, -Arab |
293 | "mn-Mong", // 57 langMongolian -Mong; no region codes # "mn" | |
9f29f3f8 | 294 | "mn", // 58 langMongolianCyr -Cyrl; no region codes # "mn" |
bd5b749c A |
295 | "ps", // 59 langPashto -Arab; no region codes |
296 | "ku", // 60 langKurdish -Arab; no region codes | |
297 | "ks", // 61 langKashmiri -Arab; no region codes | |
298 | "sd", // 62 langSindhi -Arab; no region codes | |
299 | "bo", // 63 langTibetan -Tibt; | |
300 | "ne", // 64 langNepali -Deva; | |
301 | "sa", // 65 langSanskrit -Deva; no region codes | |
302 | "mr", // 66 langMarathi -Deva; | |
303 | "bn", // 67 langBengali -Beng; | |
304 | "as", // 68 langAssamese -Beng; no region codes | |
305 | "gu", // 69 langGujarati -Gujr; | |
306 | "pa", // 70 langPunjabi -Guru; | |
307 | "or", // 71 langOriya -Orya; no region codes | |
308 | "ml", // 72 langMalayalam -Mlym; no region codes | |
309 | "kn", // 73 langKannada -Knda; no region codes | |
310 | "ta", // 74 langTamil -Taml; no region codes | |
311 | "te", // 75 langTelugu -Telu; no region codes | |
312 | "si", // 76 langSinhalese -Sinh; no region codes | |
313 | "my", // 77 langBurmese -Mymr; no region codes | |
314 | "km", // 78 langKhmer -Khmr; no region codes | |
315 | "lo", // 79 langLao -Laoo; no region codes | |
316 | "vi", // 80 langVietnamese -Latn; | |
317 | "id", // 81 langIndonesian -Latn; no region codes | |
9f29f3f8 | 318 | "fil", // 82 langTagalog -Latn; no region codes |
bd5b749c A |
319 | "ms", // 83 langMalayRoman -Latn; no region codes # "ms" |
320 | "ms-Arab", // 84 langMalayArabic -Arab; no region codes # "ms" | |
321 | "am", // 85 langAmharic -Ethi; no region codes | |
322 | "ti", // 86 langTigrinya -Ethi; no region codes | |
323 | "om", // 87 langOromo -Ethi; no region codes | |
324 | "so", // 88 langSomali -Latn; no region codes | |
325 | "sw", // 89 langSwahili -Latn; no region codes | |
326 | "rw", // 90 langKinyarwanda -Latn; no region codes | |
327 | "rn", // 91 langRundi -Latn; no region codes | |
328 | "ny", // 92 langNyanja/Chewa -Latn; no region codes # "" | |
329 | "mg", // 93 langMalagasy -Latn; no region codes | |
330 | "eo", // 94 langEsperanto -Latn; | |
331 | NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 95 to 105 (gap) | |
332 | NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 106 to 116 (gap) | |
333 | NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 107 to 117 (gap) | |
334 | "cy", // 128 langWelsh -Latn; | |
335 | "eu", // 129 langBasque -Latn; no region codes | |
336 | "ca", // 130 langCatalan -Latn; | |
337 | "la", // 131 langLatin -Latn; no region codes | |
338 | "qu", // 132 langQuechua -Latn; no region codes | |
339 | "gn", // 133 langGuarani -Latn; no region codes | |
340 | "ay", // 134 langAymara -Latn; no region codes | |
341 | "tt-Cyrl", // 135 langTatar -Cyrl; no region codes | |
342 | "ug", // 136 langUighur -Arab; no region codes | |
343 | "dz", // 137 langDzongkha -Tibt; | |
344 | "jv", // 138 langJavaneseRom -Latn; no region codes | |
345 | "su", // 139 langSundaneseRom -Latn; no region codes | |
346 | "gl", // 140 langGalician -Latn; no region codes | |
347 | "af", // 141 langAfrikaans -Latn; | |
348 | "br", // 142 langBreton -Latn; | |
349 | "iu", // 143 langInuktitut -Cans; | |
350 | "gd", // 144 langScottishGaelic; | |
351 | "gv", // 145 langManxGaelic -Latn; | |
352 | "ga-Latg", // 146 langIrishGaelicScript -Latn-dots; # "ga" // <xx> | |
353 | "to", // 147 langTongan -Latn; | |
354 | "grc", // 148 langGreekAncient -Grek-poly; # "el" | |
355 | "kl", // 149 langGreenlandic -Latn; | |
9f29f3f8 | 356 | "az", // 150 langAzerbaijanRoman -Latn; no region codes # "az" |
bd5b749c A |
357 | "nn", // 151 langNynorsk -Latn; # (no entry) |
358 | }; | |
359 | enum { | |
360 | kNumLangCodeToLocaleString = sizeof(langCodeToLocaleString)/sizeof(char *) | |
361 | }; | |
362 | ||
363 | static const KeyStringToResultString oldAppleLocaleToCanonical[] = { | |
364 | // Map obsolete/old-style Apple strings to canonical | |
365 | // Must be sorted according to how strcmp compares the strings in the first column | |
366 | // | |
367 | // non-canonical canonical [ comment ] # source/reason for non-canonical string | |
368 | // string string | |
369 | // ------------- --------- | |
370 | { "Afrikaans", "af" }, // # __CFBundleLanguageNamesArray | |
371 | { "Albanian", "sq" }, // # __CFBundleLanguageNamesArray | |
372 | { "Amharic", "am" }, // # __CFBundleLanguageNamesArray | |
373 | { "Arabic", "ar" }, // # __CFBundleLanguageNamesArray | |
374 | { "Armenian", "hy" }, // # __CFBundleLanguageNamesArray | |
375 | { "Assamese", "as" }, // # __CFBundleLanguageNamesArray | |
376 | { "Aymara", "ay" }, // # __CFBundleLanguageNamesArray | |
377 | { "Azerbaijani", "az" }, // -Arab,-Cyrl,-Latn? # __CFBundleLanguageNamesArray (had 3 entries "Azerbaijani" for "az-Arab", "az-Cyrl", "az-Latn") | |
378 | { "Basque", "eu" }, // # __CFBundleLanguageNamesArray | |
379 | { "Belarusian", "be" }, // # handle other names | |
380 | { "Belorussian", "be" }, // # handle other names | |
381 | { "Bengali", "bn" }, // # __CFBundleLanguageNamesArray | |
382 | { "Brazilian Portugese", "pt-BR" }, // # from Installer.app Info.plist IFLanguages key, misspelled | |
383 | { "Brazilian Portuguese", "pt-BR" }, // # correct spelling for above | |
384 | { "Breton", "br" }, // # __CFBundleLanguageNamesArray | |
385 | { "Bulgarian", "bg" }, // # __CFBundleLanguageNamesArray | |
386 | { "Burmese", "my" }, // # __CFBundleLanguageNamesArray | |
387 | { "Byelorussian", "be" }, // # __CFBundleLanguageNamesArray | |
388 | { "Catalan", "ca" }, // # __CFBundleLanguageNamesArray | |
389 | { "Chewa", "ny" }, // # handle other names | |
390 | { "Chichewa", "ny" }, // # handle other names | |
391 | { "Chinese", "zh" }, // -Hans,-Hant? # __CFBundleLanguageNamesArray (had 2 entries "Chinese" for "zh-Hant", "zh-Hans") | |
392 | { "Chinese, Simplified", "zh-Hans" }, // # from Installer.app Info.plist IFLanguages key | |
393 | { "Chinese, Traditional", "zh-Hant" }, // # correct spelling for below | |
394 | { "Chinese, Tradtional", "zh-Hant" }, // # from Installer.app Info.plist IFLanguages key, misspelled | |
395 | { "Croatian", "hr" }, // # __CFBundleLanguageNamesArray | |
396 | { "Czech", "cs" }, // # __CFBundleLanguageNamesArray | |
397 | { "Danish", "da" }, // # __CFBundleLanguageNamesArray | |
398 | { "Dutch", "nl" }, // # __CFBundleLanguageNamesArray (had 2 entries "Dutch" for "nl", "nl-BE") | |
399 | { "Dzongkha", "dz" }, // # __CFBundleLanguageNamesArray | |
400 | { "English", "en" }, // # __CFBundleLanguageNamesArray | |
401 | { "Esperanto", "eo" }, // # __CFBundleLanguageNamesArray | |
402 | { "Estonian", "et" }, // # __CFBundleLanguageNamesArray | |
403 | { "Faroese", "fo" }, // # __CFBundleLanguageNamesArray | |
404 | { "Farsi", "fa" }, // # __CFBundleLanguageNamesArray | |
405 | { "Finnish", "fi" }, // # __CFBundleLanguageNamesArray | |
406 | { "Flemish", "nl-BE" }, // # handle other names | |
407 | { "French", "fr" }, // # __CFBundleLanguageNamesArray | |
408 | { "Galician", "gl" }, // # __CFBundleLanguageNamesArray | |
409 | { "Gallegan", "gl" }, // # handle other names | |
410 | { "Georgian", "ka" }, // # __CFBundleLanguageNamesArray | |
411 | { "German", "de" }, // # __CFBundleLanguageNamesArray | |
412 | { "Greek", "el" }, // # __CFBundleLanguageNamesArray (had 2 entries "Greek" for "el", "grc") | |
413 | { "Greenlandic", "kl" }, // # __CFBundleLanguageNamesArray | |
414 | { "Guarani", "gn" }, // # __CFBundleLanguageNamesArray | |
415 | { "Gujarati", "gu" }, // # __CFBundleLanguageNamesArray | |
416 | { "Hawaiian", "haw" }, // # handle new languages | |
417 | { "Hebrew", "he" }, // # __CFBundleLanguageNamesArray | |
418 | { "Hindi", "hi" }, // # __CFBundleLanguageNamesArray | |
419 | { "Hungarian", "hu" }, // # __CFBundleLanguageNamesArray | |
420 | { "Icelandic", "is" }, // # __CFBundleLanguageNamesArray | |
421 | { "Indonesian", "id" }, // # __CFBundleLanguageNamesArray | |
422 | { "Inuktitut", "iu" }, // # __CFBundleLanguageNamesArray | |
423 | { "Irish", "ga" }, // # __CFBundleLanguageNamesArray (had 2 entries "Irish" for "ga", "ga-dots") | |
424 | { "Italian", "it" }, // # __CFBundleLanguageNamesArray | |
425 | { "Japanese", "ja" }, // # __CFBundleLanguageNamesArray | |
426 | { "Javanese", "jv" }, // # __CFBundleLanguageNamesArray | |
427 | { "Kalaallisut", "kl" }, // # handle other names | |
428 | { "Kannada", "kn" }, // # __CFBundleLanguageNamesArray | |
429 | { "Kashmiri", "ks" }, // # __CFBundleLanguageNamesArray | |
430 | { "Kazakh", "kk" }, // # __CFBundleLanguageNamesArray | |
431 | { "Khmer", "km" }, // # __CFBundleLanguageNamesArray | |
432 | { "Kinyarwanda", "rw" }, // # __CFBundleLanguageNamesArray | |
433 | { "Kirghiz", "ky" }, // # __CFBundleLanguageNamesArray | |
434 | { "Korean", "ko" }, // # __CFBundleLanguageNamesArray | |
435 | { "Kurdish", "ku" }, // # __CFBundleLanguageNamesArray | |
436 | { "Lao", "lo" }, // # __CFBundleLanguageNamesArray | |
437 | { "Latin", "la" }, // # __CFBundleLanguageNamesArray | |
438 | { "Latvian", "lv" }, // # __CFBundleLanguageNamesArray | |
439 | { "Lithuanian", "lt" }, // # __CFBundleLanguageNamesArray | |
440 | { "Macedonian", "mk" }, // # __CFBundleLanguageNamesArray | |
441 | { "Malagasy", "mg" }, // # __CFBundleLanguageNamesArray | |
442 | { "Malay", "ms" }, // -Latn,-Arab? # __CFBundleLanguageNamesArray (had 2 entries "Malay" for "ms-Latn", "ms-Arab") | |
443 | { "Malayalam", "ml" }, // # __CFBundleLanguageNamesArray | |
444 | { "Maltese", "mt" }, // # __CFBundleLanguageNamesArray | |
445 | { "Manx", "gv" }, // # __CFBundleLanguageNamesArray | |
446 | { "Marathi", "mr" }, // # __CFBundleLanguageNamesArray | |
447 | { "Moldavian", "mo" }, // # __CFBundleLanguageNamesArray | |
448 | { "Mongolian", "mn" }, // -Mong,-Cyrl? # __CFBundleLanguageNamesArray (had 2 entries "Mongolian" for "mn-Mong", "mn-Cyrl") | |
449 | { "Nepali", "ne" }, // # __CFBundleLanguageNamesArray | |
450 | { "Norwegian", "nb" }, // # __CFBundleLanguageNamesArray (had "Norwegian" mapping to "no") | |
451 | { "Nyanja", "ny" }, // # __CFBundleLanguageNamesArray | |
452 | { "Nynorsk", "nn" }, // # handle other names (no entry in __CFBundleLanguageNamesArray) | |
453 | { "Oriya", "or" }, // # __CFBundleLanguageNamesArray | |
454 | { "Oromo", "om" }, // # __CFBundleLanguageNamesArray | |
455 | { "Panjabi", "pa" }, // # handle other names | |
456 | { "Pashto", "ps" }, // # __CFBundleLanguageNamesArray | |
457 | { "Persian", "fa" }, // # handle other names | |
458 | { "Polish", "pl" }, // # __CFBundleLanguageNamesArray | |
459 | { "Portuguese", "pt" }, // # __CFBundleLanguageNamesArray | |
460 | { "Portuguese, Brazilian", "pt-BR" }, // # handle other names | |
461 | { "Punjabi", "pa" }, // # __CFBundleLanguageNamesArray | |
462 | { "Pushto", "ps" }, // # handle other names | |
463 | { "Quechua", "qu" }, // # __CFBundleLanguageNamesArray | |
464 | { "Romanian", "ro" }, // # __CFBundleLanguageNamesArray | |
465 | { "Ruanda", "rw" }, // # handle other names | |
466 | { "Rundi", "rn" }, // # __CFBundleLanguageNamesArray | |
467 | { "Russian", "ru" }, // # __CFBundleLanguageNamesArray | |
468 | { "Sami", "se" }, // # __CFBundleLanguageNamesArray | |
469 | { "Sanskrit", "sa" }, // # __CFBundleLanguageNamesArray | |
470 | { "Scottish", "gd" }, // # __CFBundleLanguageNamesArray | |
471 | { "Serbian", "sr" }, // # __CFBundleLanguageNamesArray | |
472 | { "Simplified Chinese", "zh-Hans" }, // # handle other names | |
473 | { "Sindhi", "sd" }, // # __CFBundleLanguageNamesArray | |
474 | { "Sinhalese", "si" }, // # __CFBundleLanguageNamesArray | |
475 | { "Slovak", "sk" }, // # __CFBundleLanguageNamesArray | |
476 | { "Slovenian", "sl" }, // # __CFBundleLanguageNamesArray | |
477 | { "Somali", "so" }, // # __CFBundleLanguageNamesArray | |
478 | { "Spanish", "es" }, // # __CFBundleLanguageNamesArray | |
479 | { "Sundanese", "su" }, // # __CFBundleLanguageNamesArray | |
480 | { "Swahili", "sw" }, // # __CFBundleLanguageNamesArray | |
481 | { "Swedish", "sv" }, // # __CFBundleLanguageNamesArray | |
9f29f3f8 | 482 | { "Tagalog", "fil" }, // # __CFBundleLanguageNamesArray |
bd5b749c A |
483 | { "Tajik", "tg" }, // # handle other names |
484 | { "Tajiki", "tg" }, // # __CFBundleLanguageNamesArray | |
485 | { "Tamil", "ta" }, // # __CFBundleLanguageNamesArray | |
486 | { "Tatar", "tt" }, // # __CFBundleLanguageNamesArray | |
487 | { "Telugu", "te" }, // # __CFBundleLanguageNamesArray | |
488 | { "Thai", "th" }, // # __CFBundleLanguageNamesArray | |
489 | { "Tibetan", "bo" }, // # __CFBundleLanguageNamesArray | |
490 | { "Tigrinya", "ti" }, // # __CFBundleLanguageNamesArray | |
491 | { "Tongan", "to" }, // # __CFBundleLanguageNamesArray | |
492 | { "Traditional Chinese", "zh-Hant" }, // # handle other names | |
493 | { "Turkish", "tr" }, // # __CFBundleLanguageNamesArray | |
494 | { "Turkmen", "tk" }, // # __CFBundleLanguageNamesArray | |
495 | { "Uighur", "ug" }, // # __CFBundleLanguageNamesArray | |
496 | { "Ukrainian", "uk" }, // # __CFBundleLanguageNamesArray | |
497 | { "Urdu", "ur" }, // # __CFBundleLanguageNamesArray | |
498 | { "Uzbek", "uz" }, // # __CFBundleLanguageNamesArray | |
499 | { "Vietnamese", "vi" }, // # __CFBundleLanguageNamesArray | |
500 | { "Welsh", "cy" }, // # __CFBundleLanguageNamesArray | |
501 | { "Yiddish", "yi" }, // # __CFBundleLanguageNamesArray | |
502 | { "ar_??", "ar" }, // # from old MapScriptInfoAndISOCodes | |
503 | { "az.Ar", "az-Arab" }, // # from old LocaleRefGetPartString | |
504 | { "az.Cy", "az-Cyrl" }, // # from old LocaleRefGetPartString | |
9f29f3f8 | 505 | { "az.La", "az" }, // # from old LocaleRefGetPartString |
bd5b749c A |
506 | { "be_??", "be_BY" }, // # from old MapScriptInfoAndISOCodes |
507 | { "bn_??", "bn" }, // # from old LocaleRefGetPartString | |
508 | { "bo_??", "bo" }, // # from old MapScriptInfoAndISOCodes | |
509 | { "br_??", "br" }, // # from old MapScriptInfoAndISOCodes | |
510 | { "cy_??", "cy" }, // # from old MapScriptInfoAndISOCodes | |
511 | { "de-96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9> | |
512 | { "de_96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9> | |
513 | { "de_??", "de-1996" }, // # from old MapScriptInfoAndISOCodes | |
514 | { "el.El-P", "grc" }, // # from old LocaleRefGetPartString | |
515 | { "en-ascii", "en_001" }, // # from earlier version of tables in this file! | |
516 | { "en_??", "en_001" }, // # from old MapScriptInfoAndISOCodes | |
517 | { "eo_??", "eo" }, // # from old MapScriptInfoAndISOCodes | |
518 | { "es_??", "es_419" }, // # from old MapScriptInfoAndISOCodes | |
519 | { "es_XL", "es_419" }, // # from earlier version of tables in this file! | |
520 | { "fr_??", "fr_001" }, // # from old MapScriptInfoAndISOCodes | |
521 | { "ga-dots", "ga-Latg" }, // # from earlier version of tables in this file! // <1.8> | |
522 | { "ga-dots_IE", "ga-Latg_IE" }, // # from earlier version of tables in this file! // <1.8> | |
523 | { "ga.Lg", "ga-Latg" }, // # from old LocaleRefGetPartString // <1.8> | |
524 | { "ga.Lg_IE", "ga-Latg_IE" }, // # from old LocaleRefGetPartString // <1.8> | |
525 | { "gd_??", "gd" }, // # from old MapScriptInfoAndISOCodes | |
526 | { "gv_??", "gv" }, // # from old MapScriptInfoAndISOCodes | |
527 | { "jv.La", "jv" }, // # logical extension // <1.9> | |
528 | { "jw.La", "jv" }, // # from old LocaleRefGetPartString | |
529 | { "kk.Cy", "kk" }, // # from old LocaleRefGetPartString | |
530 | { "kl.La", "kl" }, // # from old LocaleRefGetPartString | |
531 | { "kl.La_GL", "kl_GL" }, // # from old LocaleRefGetPartString // <1.9> | |
532 | { "lp_??", "se" }, // # from old MapScriptInfoAndISOCodes | |
533 | { "mk_??", "mk_MK" }, // # from old MapScriptInfoAndISOCodes | |
9f29f3f8 | 534 | { "mn.Cy", "mn" }, // # from old LocaleRefGetPartString |
bd5b749c A |
535 | { "mn.Mn", "mn-Mong" }, // # from old LocaleRefGetPartString |
536 | { "ms.Ar", "ms-Arab" }, // # from old LocaleRefGetPartString | |
537 | { "ms.La", "ms" }, // # from old LocaleRefGetPartString | |
538 | { "nl-be", "nl-BE" }, // # from old LocaleRefGetPartString | |
539 | { "nl-be_BE", "nl_BE" }, // # from old LocaleRefGetPartString | |
cf7d2af9 A |
540 | { "no-NO", "nb-NO" }, // # not handled by localeStringPrefixToCanonical |
541 | { "no-NO_NO", "nb-NO_NO" }, // # not handled by localeStringPrefixToCanonical | |
bd5b749c A |
542 | // { "no-bok_NO", "nb_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical |
543 | // { "no-nyn_NO", "nn_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical | |
544 | // { "nya", "ny" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical | |
545 | { "pa_??", "pa" }, // # from old LocaleRefGetPartString | |
546 | { "sa.Dv", "sa" }, // # from old LocaleRefGetPartString | |
547 | { "sl_??", "sl_SI" }, // # from old MapScriptInfoAndISOCodes | |
cf7d2af9 | 548 | { "sr_??", "sr_RS" }, // # from old MapScriptInfoAndISOCodes // <1.18> |
bd5b749c A |
549 | { "su.La", "su" }, // # from old LocaleRefGetPartString |
550 | { "yi.He", "yi" }, // # from old LocaleRefGetPartString | |
551 | { "zh-simp", "zh-Hans" }, // # from earlier version of tables in this file! | |
552 | { "zh-trad", "zh-Hant" }, // # from earlier version of tables in this file! | |
553 | { "zh.Ha-S", "zh-Hans" }, // # from old LocaleRefGetPartString | |
554 | { "zh.Ha-S_CN", "zh_CN" }, // # from old LocaleRefGetPartString | |
555 | { "zh.Ha-T", "zh-Hant" }, // # from old LocaleRefGetPartString | |
556 | { "zh.Ha-T_TW", "zh_TW" }, // # from old LocaleRefGetPartString | |
557 | }; | |
558 | enum { | |
559 | kNumOldAppleLocaleToCanonical = sizeof(oldAppleLocaleToCanonical)/sizeof(KeyStringToResultString) | |
560 | }; | |
561 | ||
562 | static const KeyStringToResultString localeStringPrefixToCanonical[] = { | |
563 | // Map 3-letter & obsolete ISO 639 codes, plus obsolete RFC 3066 codes, to 2-letter ISO 639 code. | |
564 | // (special cases for 'sh' handled separately) | |
565 | // First column must be all lowercase; must be sorted according to how strcmp compares the strings in the first column. | |
566 | // | |
567 | // non-canonical canonical [ comment ] # source/reason for non-canonical string | |
568 | // prefix prefix | |
569 | // ------------- --------- | |
570 | ||
9f29f3f8 A |
571 | { "aar", "aa" }, // Afar |
572 | // { "aa_SAAHO", "ssy" }, // Saho # deprecated/grandfathered, handled as a special case | |
573 | { "abk", "ab" }, // Abkhazian | |
bd5b749c | 574 | { "afr", "af" }, // Afrikaans |
9f29f3f8 A |
575 | { "aju", "jrb" }, // Moroccan Judeo-Arabic -> Judeo-Arabic (macrolang.) |
576 | { "aka", "ak" }, // Akan | |
bd5b749c | 577 | { "alb", "sq" }, // Albanian |
9f29f3f8 | 578 | { "als", "sq" }, // Tosk Albanian -> Albanian (macrolang.) |
bd5b749c A |
579 | { "amh", "am" }, // Amharic |
580 | { "ara", "ar" }, // Arabic | |
9f29f3f8 A |
581 | { "arb", "ar" }, // Std Arabic -> Arabic (macrolang.) |
582 | { "arg", "an" }, // Aragonese | |
bd5b749c | 583 | { "arm", "hy" }, // Armenian |
9f29f3f8 | 584 | { "art-lojban", "jbo" }, // Lojban # deprecated/grandfathered |
bd5b749c | 585 | { "asm", "as" }, // Assamese |
9f29f3f8 A |
586 | { "ava", "av" }, // Avaric |
587 | { "ave", "ae" }, // Avestan | |
bd5b749c | 588 | { "aym", "ay" }, // Aymara |
9f29f3f8 | 589 | { "ayr", "ay" }, // Central Aymara -> Aymara (macrolang.) |
bd5b749c | 590 | { "aze", "az" }, // Azerbaijani |
9f29f3f8 A |
591 | { "azj", "az" }, // N.Azerbaijani -> Azerbaijani (macrolang.) |
592 | { "bak", "ba" }, // Bashkir | |
593 | { "bam", "bm" }, // Bambara | |
bd5b749c | 594 | { "baq", "eu" }, // Basque |
9f29f3f8 A |
595 | { "bcc", "bal" }, // Balochi, Southern -> Baluchi (macrolang.) |
596 | { "bcl", "bik" }, // Bicolano, Central -> Bikol (macrolang.) | |
bd5b749c A |
597 | { "bel", "be" }, // Belarusian |
598 | { "ben", "bn" }, // Bengali | |
599 | { "bih", "bh" }, // Bihari | |
9f29f3f8 | 600 | { "bis", "bi" }, // Bislama |
bd5b749c A |
601 | { "bod", "bo" }, // Tibetan |
602 | { "bos", "bs" }, // Bosnian | |
603 | { "bre", "br" }, // Breton | |
604 | { "bul", "bg" }, // Bulgarian | |
605 | { "bur", "my" }, // Burmese | |
9f29f3f8 A |
606 | { "bxk", "luy" }, // Lubukusu -> Luyia (macrolang.) |
607 | { "bxr", "bua" }, // Buriat, Russia -> Buriat (macrolang.) | |
bd5b749c A |
608 | { "cat", "ca" }, // Catalan |
609 | { "ces", "cs" }, // Czech | |
9f29f3f8 | 610 | { "cha", "ch" }, // Chamorro |
bd5b749c A |
611 | { "che", "ce" }, // Chechen |
612 | { "chi", "zh" }, // Chinese | |
9f29f3f8 A |
613 | { "chu", "cu" }, // Church Slavic, Church Slavonic, Old Bulgarian, Old Church Slavonic, Old Slavonic |
614 | { "chv", "cv" }, // Chuvash | |
615 | { "cld", "syr" }, // Chaldean Neo-Aramaic -> Syriac (macrolang.) | |
616 | { "cmn", "zh" }, // Mandarin -> Chinese (macrolang.) | |
bd5b749c A |
617 | { "cor", "kw" }, // Cornish |
618 | { "cos", "co" }, // Corsican | |
9f29f3f8 A |
619 | { "cre", "cr" }, // Cree |
620 | { "cwd", "cr" }, // Cree, Woods -> Cree (macrolang.) | |
bd5b749c A |
621 | { "cym", "cy" }, // Welsh |
622 | { "cze", "cs" }, // Czech | |
623 | { "dan", "da" }, // Danish | |
624 | { "deu", "de" }, // German | |
9f29f3f8 A |
625 | { "dgo", "doi" }, // Dogri -> Dogri (macrolang.) |
626 | { "dhd", "mwr" }, // Dhundari -> Marwari (macrolang.) | |
627 | { "dik", "din" }, // Southwestern Dinka -> Dinka (macrolang.) | |
628 | { "diq", "zza" }, // Dimli -> Zaza (macrolang.) | |
629 | { "div", "dv" }, // Dhivehi, Divehi, Maldivian | |
bd5b749c A |
630 | { "dut", "nl" }, // Dutch |
631 | { "dzo", "dz" }, // Dzongkha | |
9f29f3f8 | 632 | { "ekk", "et" }, // Std Estonian -> Estonian (macrolang.) |
bd5b749c | 633 | { "ell", "el" }, // Greek, Modern (1453-) |
9f29f3f8 | 634 | { "emk", "man" }, // Maninkakan, Eastern -> Mandingo (macrolang.) |
bd5b749c A |
635 | { "eng", "en" }, // English |
636 | { "epo", "eo" }, // Esperanto | |
9f29f3f8 | 637 | { "esk", "ik" }, // Northwest Alaska Inupiatun -> Inupiaq (macrolang.) |
bd5b749c A |
638 | { "est", "et" }, // Estonian |
639 | { "eus", "eu" }, // Basque | |
9f29f3f8 | 640 | { "ewe", "ee" }, // Ewe |
bd5b749c A |
641 | { "fao", "fo" }, // Faroese |
642 | { "fas", "fa" }, // Persian | |
9f29f3f8 A |
643 | { "fat", "ak" }, // Fanti -> Akan (macrolang.) |
644 | { "fij", "fj" }, // Fijian | |
bd5b749c A |
645 | { "fin", "fi" }, // Finnish |
646 | { "fra", "fr" }, // French | |
647 | { "fre", "fr" }, // French | |
9f29f3f8 A |
648 | { "fry", "fy" }, // Western Frisian |
649 | { "fuc", "ff" }, // Pular -> Fulah (macrolang.) | |
650 | { "ful", "ff" }, // Fulah | |
651 | { "gaz", "om" }, // W.Central Oromo -> Oromo (macrolang.) | |
652 | { "gbo", "grb" }, // Northern Grebo -> Grebo (macrolang.) | |
bd5b749c A |
653 | { "geo", "ka" }, // Georgian |
654 | { "ger", "de" }, // German | |
655 | { "gla", "gd" }, // Gaelic,Scottish | |
656 | { "gle", "ga" }, // Irish | |
657 | { "glg", "gl" }, // Gallegan | |
658 | { "glv", "gv" }, // Manx | |
9f29f3f8 | 659 | { "gno", "gon" }, // Northern Gondi -> Gondi (macrolang.) |
bd5b749c A |
660 | { "gre", "el" }, // Greek, Modern (1453-) |
661 | { "grn", "gn" }, // Guarani | |
9f29f3f8 | 662 | { "gug", "gn" }, // Paraguayan Guarani -> Guarani (macrolang.) |
bd5b749c | 663 | { "guj", "gu" }, // Gujarati |
9f29f3f8 A |
664 | { "gya", "gba" }, // Northwest Gbaya -> Gbaya (Cent. Afr. Rep.) (macrolang.) |
665 | { "hat", "ht" }, // Haitian, Haitian Creole | |
666 | { "hau", "ha" }, // Hausa | |
667 | { "hbs", "sr_Latn" }, // Serbo-Croatian | |
668 | { "hdn", "hai" }, // Northern Haida -> Haida (macrolang.) | |
669 | { "hea", "hmn" }, // Northern Qiandong Miao -> Hmong (macrolang.) | |
bd5b749c | 670 | { "heb", "he" }, // Hebrew |
9f29f3f8 A |
671 | { "her", "hz" }, // Herero |
672 | { "him", "srx" }, // Himachali -> Sirmauri (= Pahari, Himachali) (macrolang.) | |
bd5b749c | 673 | { "hin", "hi" }, // Hindi |
9f29f3f8 | 674 | { "hmo", "ho" }, // Hiri Motu |
bd5b749c A |
675 | { "hrv", "hr" }, // Croatian |
676 | { "hun", "hu" }, // Hungarian | |
677 | { "hye", "hy" }, // Armenian | |
9f29f3f8 A |
678 | { "i-ami", "ami" }, // Amis # deprecated/grandfathered |
679 | { "i-bnn", "bnn" }, // Bunun # deprecated/grandfathered | |
680 | { "i-hak", "hak" }, // Hakka # deprecated RFC 3066 | |
681 | { "i-klingon", "tlh" }, // Klingon # deprecated/grandfathered | |
bd5b749c A |
682 | { "i-lux", "lb" }, // Luxembourgish # deprecated RFC 3066 |
683 | { "i-navajo", "nv" }, // Navajo # deprecated RFC 3066 | |
9f29f3f8 A |
684 | { "i-pwn", "pwn" }, // Paiwan # deprecated/grandfathered |
685 | { "i-tao", "tao" }, // Tao # deprecated/grandfathered | |
686 | { "i-tay", "tay" }, // Tayal # deprecated/grandfathered | |
687 | { "i-tsu", "tsu" }, // Tsou # deprecated/grandfathered | |
688 | { "ibo", "ig" }, // Igbo | |
bd5b749c | 689 | { "ice", "is" }, // Icelandic |
9f29f3f8 A |
690 | { "ido", "io" }, // Ido |
691 | { "iii", "ii" }, // Sichuan Yi, Nuosu | |
692 | { "ike", "iu" }, // E.Canada Inuktitut -> Inuktitut (macrolang.) | |
bd5b749c A |
693 | { "iku", "iu" }, // Inuktitut |
694 | { "ile", "ie" }, // Interlingue | |
695 | { "in", "id" }, // Indonesian # deprecated 639 code in -> id (1989) | |
696 | { "ina", "ia" }, // Interlingua | |
697 | { "ind", "id" }, // Indonesian | |
9f29f3f8 | 698 | { "ipk", "ik" }, // Inupiaq |
bd5b749c A |
699 | { "isl", "is" }, // Icelandic |
700 | { "ita", "it" }, // Italian | |
701 | { "iw", "he" }, // Hebrew # deprecated 639 code iw -> he (1989) | |
702 | { "jav", "jv" }, // Javanese | |
703 | { "jaw", "jv" }, // Javanese # deprecated 639 code jaw -> jv (2001) | |
704 | { "ji", "yi" }, // Yiddish # deprecated 639 code ji -> yi (1989) | |
705 | { "jpn", "ja" }, // Japanese | |
9f29f3f8 | 706 | { "jw", "jv" }, // Javanese # deprecated |
bd5b749c A |
707 | { "kal", "kl" }, // Kalaallisut |
708 | { "kan", "kn" }, // Kannada | |
709 | { "kas", "ks" }, // Kashmiri | |
710 | { "kat", "ka" }, // Georgian | |
9f29f3f8 | 711 | { "kau", "kr" }, // Kanuri |
bd5b749c | 712 | { "kaz", "kk" }, // Kazakh |
9f29f3f8 | 713 | { "khk", "mn" }, // Halh Mongolian [mainly Cyrl] -> Mongolian (macrolang.) |
bd5b749c | 714 | { "khm", "km" }, // Khmer |
9f29f3f8 | 715 | { "kik", "ki" }, // Kikuyu, Gikuyu |
bd5b749c A |
716 | { "kin", "rw" }, // Kinyarwanda |
717 | { "kir", "ky" }, // Kirghiz | |
9f29f3f8 A |
718 | { "kmr", "ku" }, // Northern Kurdish -> Kurdish (macrolang.) |
719 | { "knc", "kr" }, // Central Kanuri -> Kanuri (macrolang.) | |
720 | { "kng", "kg" }, // Koongo -> Kongo (macrolang.) | |
721 | { "knn", "kok" }, // Konkani (individ.lang) -> Konkani (macrolang.) | |
722 | { "kom", "kv" }, // Komi | |
723 | { "kon", "kg" }, // Kongo | |
bd5b749c | 724 | { "kor", "ko" }, // Korean |
9f29f3f8 A |
725 | { "kpv", "kv" }, // Komi-Zyrian -> Komi (macrolang.) |
726 | { "kua", "kj" }, // Kuanyama, Kwanyama | |
bd5b749c A |
727 | { "kur", "ku" }, // Kurdish |
728 | { "lao", "lo" }, // Lao | |
729 | { "lat", "la" }, // Latin | |
730 | { "lav", "lv" }, // Latvian | |
9f29f3f8 A |
731 | { "lbk", "bnc" }, // Central Bontok -> Bontok (macrolang.) |
732 | { "lim", "li" }, // Limburgan, Limburger, Limburgish | |
733 | { "lin", "ln" }, // Lingala | |
bd5b749c A |
734 | { "lit", "lt" }, // Lithuanian |
735 | { "ltz", "lb" }, // Letzeburgesch | |
9f29f3f8 A |
736 | { "lub", "lu" }, // Luba-Katanga |
737 | { "lug", "lg" }, // Ganda | |
738 | { "lvs", "lv" }, // Std Latvian -> Latvian (macrolang.) | |
bd5b749c A |
739 | { "mac", "mk" }, // Macedonian |
740 | { "mal", "ml" }, // Malayalam | |
741 | { "mar", "mr" }, // Marathi | |
742 | { "may", "ms" }, // Malay | |
9f29f3f8 | 743 | { "mhr", "chm" }, // Mari, Eastern -> Mari (Russia) (macrolang.) |
bd5b749c A |
744 | { "mkd", "mk" }, // Macedonian |
745 | { "mlg", "mg" }, // Malagasy | |
746 | { "mlt", "mt" }, // Maltese | |
747 | { "mol", "mo" }, // Moldavian | |
748 | { "mon", "mn" }, // Mongolian | |
749 | { "msa", "ms" }, // Malay | |
9f29f3f8 | 750 | { "mup", "raj" }, // Malvi -> Rajasthani (macrolang.) |
bd5b749c | 751 | { "mya", "my" }, // Burmese |
9f29f3f8 A |
752 | { "nau", "na" }, // Nauru |
753 | { "nav", "nv" }, // Navajo, Navaho | |
754 | { "nbl", "nr" }, // South Ndebele | |
755 | { "nde", "nd" }, // North Ndebele | |
756 | { "ndo", "ng" }, // Ndonga | |
bd5b749c A |
757 | { "nep", "ne" }, // Nepali |
758 | { "nld", "nl" }, // Dutch | |
759 | { "nno", "nn" }, // Norwegian Nynorsk | |
760 | { "no", "nb" }, // Norwegian generic # ambiguous 639 code no -> nb | |
761 | { "no-bok", "nb" }, // Norwegian Bokmal # deprecated RFC 3066 tag - used in old LocaleRefGetPartString | |
762 | { "no-nyn", "nn" }, // Norwegian Nynorsk # deprecated RFC 3066 tag - used in old LocaleRefGetPartString | |
763 | { "nob", "nb" }, // Norwegian Bokmal | |
764 | { "nor", "nb" }, // Norwegian generic # ambiguous 639 code nor -> nb | |
9f29f3f8 A |
765 | // { "no_BOKMAL", "nb" }, // Norwegian Bokmal # deprecated/grandfathered, handled as a special case |
766 | // { "no_NYNORSK", "nn" }, // Norwegian Nynorsk # deprecated/grandfathered, handled as a special case | |
bd5b749c A |
767 | { "nya", "ny" }, // Nyanja/Chewa/Chichewa # 3-letter code used in old LocaleRefGetPartString |
768 | { "oci", "oc" }, // Occitan/Provencal | |
9f29f3f8 A |
769 | { "ojg", "oj" }, // Ojibwa, Eastern -> Ojibwa (macrolang.) |
770 | { "oji", "oj" }, // Ojibwa | |
bd5b749c A |
771 | { "ori", "or" }, // Oriya |
772 | { "orm", "om" }, // Oromo,Galla | |
9f29f3f8 | 773 | { "oss", "os" }, // Ossetian, Ossetic |
bd5b749c | 774 | { "pan", "pa" }, // Panjabi |
9f29f3f8 | 775 | { "pbu", "ps" }, // N.Pashto, -> Pushto (macrolang.) |
bd5b749c | 776 | { "per", "fa" }, // Persian |
9f29f3f8 A |
777 | { "pes", "fa" }, // W.Farsi -> Persian (macrolang.) |
778 | { "pli", "pi" }, // Pali | |
779 | { "plt", "mg" }, // Plateau Malagasy -> Malagasy (macrolang.) | |
780 | { "pnb", "lah" }, // W.Panjabi -> Lahnda (macrolang.) | |
bd5b749c A |
781 | { "pol", "pl" }, // Polish |
782 | { "por", "pt" }, // Portuguese | |
783 | { "pus", "ps" }, // Pushto | |
784 | { "que", "qu" }, // Quechua | |
9f29f3f8 A |
785 | { "qxp", "qu" }, // Puno Quechua -> Quechua (macrolang.) |
786 | { "rmy", "rom" }, // Vlax Romani -> Romany (macrolang.) | |
bd5b749c A |
787 | { "roh", "rm" }, // Raeto-Romance |
788 | { "ron", "ro" }, // Romanian | |
789 | { "rum", "ro" }, // Romanian | |
790 | { "run", "rn" }, // Rundi | |
791 | { "rus", "ru" }, // Russian | |
9f29f3f8 | 792 | { "sag", "sg" }, // Sango |
bd5b749c A |
793 | { "san", "sa" }, // Sanskrit |
794 | { "scc", "sr" }, // Serbian | |
795 | { "scr", "hr" }, // Croatian | |
9f29f3f8 A |
796 | { "sgn-be-fr", "sfb" }, // Belgian-French Sign Lang. # deprecated/grandfathered |
797 | { "sgn-be-nl", "vgt" }, // Belgian-Flemish Sign Lang. # deprecated/grandfathered | |
798 | { "sgn-ch-de", "sgg" }, // Swiss German Sign Lang. # deprecated/grandfathered | |
bd5b749c A |
799 | { "sin", "si" }, // Sinhalese |
800 | { "slk", "sk" }, // Slovak | |
801 | { "slo", "sk" }, // Slovak | |
802 | { "slv", "sl" }, // Slovenian | |
803 | { "sme", "se" }, // Sami,Northern | |
9f29f3f8 A |
804 | { "smo", "sm" }, // Samoan |
805 | { "sna", "sn" }, // Shona | |
bd5b749c A |
806 | { "snd", "sd" }, // Sindhi |
807 | { "som", "so" }, // Somali | |
9f29f3f8 | 808 | { "sot", "st" }, // Southern Sotho |
bd5b749c | 809 | { "spa", "es" }, // Spanish |
9f29f3f8 | 810 | { "spy", "kln" }, // Sabaot -> Kalenjin (macrolang.) |
bd5b749c | 811 | { "sqi", "sq" }, // Albanian |
9f29f3f8 A |
812 | { "src", "sc" }, // Sardinian, Logudorese -> Sardinian (macrolang.) |
813 | { "srd", "sc" }, // Sardinian | |
bd5b749c | 814 | { "srp", "sr" }, // Serbian |
9f29f3f8 | 815 | { "ssw", "ss" }, // Swati |
bd5b749c A |
816 | { "sun", "su" }, // Sundanese |
817 | { "swa", "sw" }, // Swahili | |
818 | { "swe", "sv" }, // Swedish | |
9f29f3f8 A |
819 | { "swh", "sw" }, // Swahili (individ.lang) -> Swahili (macrolang.) |
820 | { "tah", "ty" }, // Tahitian | |
bd5b749c A |
821 | { "tam", "ta" }, // Tamil |
822 | { "tat", "tt" }, // Tatar | |
823 | { "tel", "te" }, // Telugu | |
824 | { "tgk", "tg" }, // Tajik | |
d7384798 | 825 | { "tgl", "fil" }, // Tagalog |
bd5b749c A |
826 | { "tha", "th" }, // Thai |
827 | { "tib", "bo" }, // Tibetan | |
828 | { "tir", "ti" }, // Tigrinya | |
9f29f3f8 | 829 | { "tl", "fil" }, // Tagalog # legacy |
bd5b749c | 830 | { "ton", "to" }, // Tongan |
9f29f3f8 A |
831 | { "tsn", "tn" }, // Tswana |
832 | { "tso", "ts" }, // Tsonga | |
833 | { "ttq", "tmh" }, // Tamajaq, Tawallammat -> Tamashek (macrolang.) | |
bd5b749c A |
834 | { "tuk", "tk" }, // Turkmen |
835 | { "tur", "tr" }, // Turkish | |
9f29f3f8 A |
836 | { "tw", "ak" }, // Twi -> Akan (macrolang.) |
837 | { "twi", "ak" }, // Twi | |
bd5b749c A |
838 | { "uig", "ug" }, // Uighur |
839 | { "ukr", "uk" }, // Ukrainian | |
9f29f3f8 | 840 | { "umu", "del" }, // Munsee -> Delaware (macrolang.) |
bd5b749c A |
841 | { "urd", "ur" }, // Urdu |
842 | { "uzb", "uz" }, // Uzbek | |
9f29f3f8 A |
843 | { "uzn", "uz" }, // N. Uzbek -> Uzbek (macrolang.) |
844 | { "ven", "ve" }, // Venda | |
bd5b749c | 845 | { "vie", "vi" }, // Vietnamese |
9f29f3f8 | 846 | { "vol", "vo" }, // Volapük |
bd5b749c | 847 | { "wel", "cy" }, // Welsh |
9f29f3f8 A |
848 | { "wln", "wa" }, // Walloon |
849 | { "wol", "wo" }, // Wolof | |
850 | { "xho", "xh" }, // Xhosa | |
851 | { "xpe", "kpe" }, // Kpelle, Liberia -> Kpelle (macrolang.) | |
852 | { "xsl", "den" }, // Slavey, South -> Slave (Athapascan) (macrolang.) | |
853 | { "ydd", "yi" }, // Yiddish,E. -> Yiddish (macrolang.) | |
bd5b749c | 854 | { "yid", "yi" }, // Yiddish |
9f29f3f8 A |
855 | { "yor", "yo" }, // Yoruba |
856 | { "zai", "zap" }, // Zapotec, Isthmus -> Zapotec (macrolang.) | |
857 | { "zh-cdo", "cdo" }, // Chinese, Min Dong # extlang | |
858 | { "zh-cjy", "cjy" }, // Chinese, Jinyu # extlang | |
859 | { "zh-cmn", "zh" }, // Chinese, Mandarin # extlang | |
860 | { "zh-cpx", "cpx" }, // Chinese, Pu-Xian # extlang | |
861 | { "zh-czh", "czh" }, // Chinese, Huizhou # extlang | |
862 | { "zh-czo", "czo" }, // Chinese, Min Zhong # extlang | |
863 | { "zh-gan", "gan" }, // Chinese, Gan # extlang | |
864 | { "zh-guoyu", "zh" }, // Mandarin/Std Chinese # deprecated | |
865 | { "zh-hak", "hak" }, // Chinese, Hakka # extlang | |
866 | { "zh-hakka", "hak" }, // Hakka # deprecated | |
867 | { "zh-hsn", "hsn" }, // Chinese, Xiang # extlang | |
868 | { "zh-min-nan", "nan" }, // Minnan,Hokkien,Taiwanese,So. Fujian # deprecated | |
869 | { "zh-mnp", "mnp" }, // Chinese, Min Bei # extlang | |
870 | { "zh-nan", "nan" }, // Chinese, Min Nan # extlang | |
871 | { "zh-wuu", "wuu" }, // Chinese, Wu # extlang | |
872 | { "zh-xiang", "hsn" }, // Xiang/Hunanese # deprecated | |
873 | { "zh-yue", "yue" }, // Chinese, Yue # extlang | |
874 | { "zha", "za" }, // Zhuang, Chuang | |
bd5b749c | 875 | { "zho", "zh" }, // Chinese |
9f29f3f8 A |
876 | { "zsm", "ms" }, // Std Malay -> Malay (macrolang.) |
877 | { "zul", "zu" }, // Zulu | |
878 | { "zyb", "za" }, // Yongbei Zhuang -> Zhuang (macrolang.) | |
bd5b749c A |
879 | }; |
880 | enum { | |
881 | kNumLocaleStringPrefixToCanonical = sizeof(localeStringPrefixToCanonical)/sizeof(KeyStringToResultString) | |
882 | }; | |
883 | ||
884 | ||
885 | static const SpecialCaseUpdates specialCases[] = { | |
886 | // Data for special cases | |
887 | // a) The 3166 code CS was used for Czechoslovakia until 1993, when that country split and the code was | |
888 | // replaced by CZ and SK. Then in 2003-07, the code YU (formerly designating all of Yugoslavia, then after | |
cf7d2af9 A |
889 | // the 1990s breakup just designating what is now Serbia and Montenegro) was changed to CS! Then after |
890 | // Serbia and Montenegro split, the code CS was replaced in 2006-09 with separate codes RS and ME. If we | |
891 | // see CS but a language of cs or sk, we change CS to CZ or SK. Otherwise, we change CS (and old YU) to RS. | |
bd5b749c A |
892 | // b) The 639 code sh for Serbo-Croatian was also replaced in the 1990s by separate codes hr and sr, and |
893 | // deprecated in 2000. We guess which one to map it to as follows: If there is a region tag of HR we use | |
cf7d2af9 | 894 | // hr; if there is a region tag of (now) RS we use sr; else we do not change it (not enough info). |
bd5b749c A |
895 | // c) There are other codes that have been updated without these issues (eg. TP to TL), plus among the |
896 | // "exceptionally reserved" codes some are just alternates for standard codes (eg. UK for GB). | |
897 | { NULL, "-UK", "GB", NULL, NULL }, // always change UK to GB (UK is "exceptionally reserved" to mean GB) | |
898 | { NULL, "-TP", "TL", NULL, NULL }, // always change TP to TL (East Timor, code changed 2002-05) | |
899 | { "cs", "-CS", "CZ", NULL, NULL }, // if language is cs, change CS (pre-1993 Czechoslovakia) to CZ (Czech Republic) | |
900 | { "sk", "-CS", "SK", NULL, NULL }, // if language is sk, change CS (pre-1993 Czechoslovakia) to SK (Slovakia) | |
cf7d2af9 A |
901 | { NULL, "-CS", "RS", NULL, NULL }, // otherwise map CS (assume Serbia+Montenegro) to RS (Serbia) |
902 | { NULL, "-YU", "RS", NULL, NULL }, // also map old YU (assume Serbia+Montenegro) to RS (Serbia) | |
903 | { "sh", "-HR", "hr", "-RS", "sr" }, // then if language is old 'sh' (SerboCroatian), change it to 'hr' (Croatian) | |
904 | // if we find HR (Croatia) or to 'sr' (Serbian) if we find RS (Serbia). | |
905 | // Note: Do this after changing YU/CS toRS as above. | |
bd5b749c A |
906 | { NULL, NULL, NULL, NULL, NULL } // terminator |
907 | }; | |
908 | ||
909 | ||
910 | static const KeyStringToResultString localeStringRegionToDefaults[] = { | |
911 | // For some region-code suffixes, there are default substrings to strip off for canonical string. | |
912 | // Must be sorted according to how strcmp compares the strings in the first column | |
913 | // | |
914 | // region default writing | |
915 | // suffix system tags, strip comment | |
916 | // -------- ------------- --------- | |
917 | { "_CN", "-Hans" }, // mainland China, default is simplified | |
918 | { "_HK", "-Hant" }, // Hong Kong, default is traditional | |
919 | { "_MO", "-Hant" }, // Macao, default is traditional | |
920 | { "_SG", "-Hans" }, // Singapore, default is simplified | |
921 | { "_TW", "-Hant" }, // Taiwan, default is traditional | |
922 | }; | |
923 | enum { | |
924 | kNumLocaleStringRegionToDefaults = sizeof(localeStringRegionToDefaults)/sizeof(KeyStringToResultString) | |
925 | }; | |
926 | ||
927 | static const KeyStringToResultString localeStringPrefixToDefaults[] = { | |
928 | // For some initial portions of language tag, there are default substrings to strip off for canonical string. | |
929 | // Must be sorted according to how strcmp compares the strings in the first column | |
930 | // | |
931 | // language default writing | |
932 | // tag prefix system tags, strip comment | |
933 | // -------- ------------- --------- | |
934 | { "ab-", "-Cyrl" }, // Abkhazian | |
935 | { "af-", "-Latn" }, // Afrikaans | |
9f29f3f8 A |
936 | { "agq-", "-Latn" }, // Aghem |
937 | { "ak-", "-Latn" }, // Akan | |
bd5b749c A |
938 | { "am-", "-Ethi" }, // Amharic |
939 | { "ar-", "-Arab" }, // Arabic | |
940 | { "as-", "-Beng" }, // Assamese | |
9f29f3f8 | 941 | { "asa-", "-Latn" }, // Asu |
bd5b749c | 942 | { "ay-", "-Latn" }, // Aymara |
9f29f3f8 A |
943 | { "az-", "-Latn" }, // Azerbaijani |
944 | { "bas-", "-Latn" }, // Basaa | |
bd5b749c | 945 | { "be-", "-Cyrl" }, // Belarusian |
9f29f3f8 A |
946 | { "bem-", "-Latn" }, // Bemba |
947 | { "bez-", "-Latn" }, // Bena | |
bd5b749c | 948 | { "bg-", "-Cyrl" }, // Bulgarian |
9f29f3f8 | 949 | { "bm-", "-Latn" }, // Bambara |
bd5b749c A |
950 | { "bn-", "-Beng" }, // Bengali |
951 | { "bo-", "-Tibt" }, // Tibetan (? not Suppress-Script) | |
952 | { "br-", "-Latn" }, // Breton (? not Suppress-Script) | |
9f29f3f8 | 953 | { "brx-", "-Deva" }, // Bodo |
bd5b749c A |
954 | { "bs-", "-Latn" }, // Bosnian |
955 | { "ca-", "-Latn" }, // Catalan | |
9f29f3f8 A |
956 | { "cgg-", "-Latn" }, // Chiga |
957 | { "chr-", "-Cher" }, // Cherokee | |
bd5b749c A |
958 | { "cs-", "-Latn" }, // Czech |
959 | { "cy-", "-Latn" }, // Welsh | |
960 | { "da-", "-Latn" }, // Danish | |
9f29f3f8 | 961 | { "dav-", "-Latn" }, // Taita |
bd5b749c | 962 | { "de-", "-Latn -1901" }, // German, traditional orthography |
9f29f3f8 A |
963 | { "dje-", "-Latn" }, // Zarma |
964 | { "dua-", "-Latn" }, // Duala | |
bd5b749c | 965 | { "dv-", "-Thaa" }, // Divehi/Maldivian |
9f29f3f8 | 966 | { "dyo-", "-Latn" }, // Jola-Fonyi |
bd5b749c | 967 | { "dz-", "-Tibt" }, // Dzongkha |
9f29f3f8 A |
968 | { "ebu-", "-Latn" }, // Embu |
969 | { "ee-", "-Latn" }, // Ewe | |
bd5b749c A |
970 | { "el-", "-Grek" }, // Greek (modern, monotonic) |
971 | { "en-", "-Latn" }, // English | |
972 | { "eo-", "-Latn" }, // Esperanto | |
973 | { "es-", "-Latn" }, // Spanish | |
974 | { "et-", "-Latn" }, // Estonian | |
975 | { "eu-", "-Latn" }, // Basque | |
9f29f3f8 | 976 | { "ewo-", "-Latn" }, // Ewondo |
bd5b749c | 977 | { "fa-", "-Arab" }, // Farsi |
9f29f3f8 | 978 | { "ff-", "-Latn" }, // Fulah |
bd5b749c | 979 | { "fi-", "-Latn" }, // Finnish |
9f29f3f8 | 980 | { "fil-", "-Latn" }, // Tagalog |
bd5b749c A |
981 | { "fo-", "-Latn" }, // Faroese |
982 | { "fr-", "-Latn" }, // French | |
983 | { "ga-", "-Latn" }, // Irish | |
984 | { "gd-", "-Latn" }, // Scottish Gaelic (? not Suppress-Script) | |
985 | { "gl-", "-Latn" }, // Galician | |
986 | { "gn-", "-Latn" }, // Guarani | |
9f29f3f8 | 987 | { "gsw-", "-Latn" }, // Swiss German |
bd5b749c | 988 | { "gu-", "-Gujr" }, // Gujarati |
9f29f3f8 | 989 | { "guz-", "-Latn" }, // Gusii |
bd5b749c | 990 | { "gv-", "-Latn" }, // Manx |
9f29f3f8 | 991 | { "ha-", "-Latn" }, // Hausa |
bd5b749c A |
992 | { "haw-", "-Latn" }, // Hawaiian (? not Suppress-Script) |
993 | { "he-", "-Hebr" }, // Hebrew | |
994 | { "hi-", "-Deva" }, // Hindi | |
995 | { "hr-", "-Latn" }, // Croatian | |
996 | { "hu-", "-Latn" }, // Hungarian | |
997 | { "hy-", "-Armn" }, // Armenian | |
998 | { "id-", "-Latn" }, // Indonesian | |
9f29f3f8 A |
999 | { "ig-", "-Latn" }, // Igbo |
1000 | { "ii-", "-Yiii" }, // Sichuan Yi | |
bd5b749c A |
1001 | { "is-", "-Latn" }, // Icelandic |
1002 | { "it-", "-Latn" }, // Italian | |
1003 | { "ja-", "-Jpan" }, // Japanese | |
9f29f3f8 | 1004 | { "jmc-", "-Latn" }, // Machame |
bd5b749c | 1005 | { "ka-", "-Geor" }, // Georgian |
9f29f3f8 A |
1006 | { "kab-", "-Latn" }, // Kabyle |
1007 | { "kam-", "-Latn" }, // Kamba | |
1008 | { "kde-", "-Latn" }, // Makonde | |
1009 | { "kea-", "-Latn" }, // Kabuverdianu | |
1010 | { "khq-", "-Latn" }, // Koyra Chiini | |
1011 | { "ki-", "-Latn" }, // Kikuyu | |
bd5b749c A |
1012 | { "kk-", "-Cyrl" }, // Kazakh |
1013 | { "kl-", "-Latn" }, // Kalaallisut/Greenlandic | |
1014 | { "km-", "-Khmr" }, // Central Khmer | |
1015 | { "kn-", "-Knda" }, // Kannada | |
1016 | { "ko-", "-Hang" }, // Korean (? not Suppress-Script) | |
1017 | { "kok-", "-Deva" }, // Konkani | |
9f29f3f8 A |
1018 | { "ksb-", "-Latn" }, // Shambala |
1019 | { "ksf-", "-Latn" }, // Bafia | |
1020 | { "kw-", "-Latn" }, // Cornish | |
1021 | { "ky-", "-Cyrl" }, // Kirghiz | |
bd5b749c | 1022 | { "la-", "-Latn" }, // Latin |
9f29f3f8 | 1023 | { "lag-", "-Latn" }, // Langi |
bd5b749c | 1024 | { "lb-", "-Latn" }, // Luxembourgish |
9f29f3f8 A |
1025 | { "lg-", "-Latn" }, // Ganda |
1026 | { "ln-", "-Latn" }, // Lingala | |
bd5b749c A |
1027 | { "lo-", "-Laoo" }, // Lao |
1028 | { "lt-", "-Latn" }, // Lithuanian | |
9f29f3f8 A |
1029 | { "lu-", "-Latn" }, // Luba-Katanga |
1030 | { "luo-", "-Latn" }, // Luo | |
1031 | { "luy-", "-Latn" }, // Luyia | |
bd5b749c | 1032 | { "lv-", "-Latn" }, // Latvian |
9f29f3f8 A |
1033 | { "mas-", "-Latn" }, // Masai |
1034 | { "mer-", "-Latn" }, // Meru | |
1035 | { "mfe-", "-Latn" }, // Morisyen | |
bd5b749c | 1036 | { "mg-", "-Latn" }, // Malagasy |
9f29f3f8 | 1037 | { "mgh-", "-Latn" }, // Makhuwa-Meetto |
bd5b749c A |
1038 | { "mk-", "-Cyrl" }, // Macedonian |
1039 | { "ml-", "-Mlym" }, // Malayalam | |
9f29f3f8 | 1040 | { "mn-", "-Cyrl" }, // Mongolian |
bd5b749c A |
1041 | { "mo-", "-Latn" }, // Moldavian |
1042 | { "mr-", "-Deva" }, // Marathi | |
1043 | { "ms-", "-Latn" }, // Malay | |
1044 | { "mt-", "-Latn" }, // Maltese | |
9f29f3f8 | 1045 | { "mua-", "-Latn" }, // Mundang |
bd5b749c | 1046 | { "my-", "-Mymr" }, // Burmese/Myanmar |
9f29f3f8 | 1047 | { "naq-", "-Latn" }, // Nama |
bd5b749c | 1048 | { "nb-", "-Latn" }, // Norwegian Bokmal |
9f29f3f8 | 1049 | { "nd-", "-Latn" }, // North Ndebele |
bd5b749c A |
1050 | { "ne-", "-Deva" }, // Nepali |
1051 | { "nl-", "-Latn" }, // Dutch | |
9f29f3f8 | 1052 | { "nmg-", "-Latn" }, // Kwasio |
bd5b749c | 1053 | { "nn-", "-Latn" }, // Norwegian Nynorsk |
9f29f3f8 | 1054 | { "nus-", "-Latn" }, // Nuer |
bd5b749c | 1055 | { "ny-", "-Latn" }, // Chichewa/Nyanja |
9f29f3f8 | 1056 | { "nyn-", "-Latn" }, // Nyankole |
bd5b749c A |
1057 | { "om-", "-Latn" }, // Oromo |
1058 | { "or-", "-Orya" }, // Oriya | |
1059 | { "pa-", "-Guru" }, // Punjabi | |
1060 | { "pl-", "-Latn" }, // Polish | |
1061 | { "ps-", "-Arab" }, // Pushto | |
1062 | { "pt-", "-Latn" }, // Portuguese | |
1063 | { "qu-", "-Latn" }, // Quechua | |
9f29f3f8 | 1064 | { "rm-", "-Latn" }, // Romansh |
bd5b749c A |
1065 | { "rn-", "-Latn" }, // Rundi |
1066 | { "ro-", "-Latn" }, // Romanian | |
9f29f3f8 | 1067 | { "rof-", "-Latn" }, // Rombo |
bd5b749c A |
1068 | { "ru-", "-Cyrl" }, // Russian |
1069 | { "rw-", "-Latn" }, // Kinyarwanda | |
9f29f3f8 | 1070 | { "rwk-", "-Latn" }, // Rwa |
bd5b749c | 1071 | { "sa-", "-Deva" }, // Sanskrit (? not Suppress-Script) |
9f29f3f8 A |
1072 | { "saq-", "-Latn" }, // Samburu |
1073 | { "sbp-", "-Latn" }, // Sangu | |
bd5b749c | 1074 | { "se-", "-Latn" }, // Sami (? not Suppress-Script) |
9f29f3f8 A |
1075 | { "seh-", "-Latn" }, // Sena |
1076 | { "ses-", "-Latn" }, // Koyraboro Senni | |
1077 | { "sg-", "-Latn" }, // Sango | |
1078 | { "shi-", "-Latn" }, // Tachelhit | |
bd5b749c A |
1079 | { "si-", "-Sinh" }, // Sinhala |
1080 | { "sk-", "-Latn" }, // Slovak | |
1081 | { "sl-", "-Latn" }, // Slovenian | |
9f29f3f8 | 1082 | { "sn-", "-Latn" }, // Shona |
bd5b749c A |
1083 | { "so-", "-Latn" }, // Somali |
1084 | { "sq-", "-Latn" }, // Albanian | |
9f29f3f8 | 1085 | { "sr-", "-Cyrl" }, // Serbian |
bd5b749c A |
1086 | { "sv-", "-Latn" }, // Swedish |
1087 | { "sw-", "-Latn" }, // Swahili | |
9f29f3f8 | 1088 | { "swc-", "-Latn" }, // Congo Swahili |
bd5b749c A |
1089 | { "ta-", "-Taml" }, // Tamil |
1090 | { "te-", "-Telu" }, // Telugu | |
9f29f3f8 A |
1091 | { "teo-", "-Latn" }, // Teso |
1092 | { "tg-", "-Cyrl" }, // Tajik | |
bd5b749c A |
1093 | { "th-", "-Thai" }, // Thai |
1094 | { "ti-", "-Ethi" }, // Tigrinya | |
9f29f3f8 | 1095 | { "tk-", "-Latn" }, // Turkmen |
bd5b749c A |
1096 | { "tn-", "-Latn" }, // Tswana |
1097 | { "to-", "-Latn" }, // Tonga of Tonga Islands | |
1098 | { "tr-", "-Latn" }, // Turkish | |
9f29f3f8 A |
1099 | { "twq-", "-Latn" }, // Tasawaq |
1100 | { "tzm-", "-Latn" }, // Central Morocco Tamazight | |
bd5b749c A |
1101 | { "uk-", "-Cyrl" }, // Ukrainian |
1102 | { "ur-", "-Arab" }, // Urdu | |
9f29f3f8 A |
1103 | { "uz-", "-Cyrl" }, // Uzbek |
1104 | { "vai-", "-Vaii" }, // Vai | |
bd5b749c | 1105 | { "vi-", "-Latn" }, // Vietnamese |
9f29f3f8 | 1106 | { "vun-", "-Latn" }, // Vunjo |
bd5b749c A |
1107 | { "wo-", "-Latn" }, // Wolof |
1108 | { "xh-", "-Latn" }, // Xhosa | |
9f29f3f8 A |
1109 | { "xog-", "-Latn" }, // Soga |
1110 | { "yav-", "-Latn" }, // Yangben | |
bd5b749c | 1111 | { "yi-", "-Hebr" }, // Yiddish |
9f29f3f8 | 1112 | { "yo-", "-Latn" }, // Yoruba |
bd5b749c A |
1113 | { "zh-", "-Hani" }, // Chinese (? not Suppress-Script) |
1114 | { "zu-", "-Latn" }, // Zulu | |
1115 | }; | |
1116 | enum { | |
1117 | kNumLocaleStringPrefixToDefaults = sizeof(localeStringPrefixToDefaults)/sizeof(KeyStringToResultString) | |
1118 | }; | |
1119 | ||
1120 | static const KeyStringToResultString appleLocaleToLanguageString[] = { | |
1121 | // Map locale strings that Apple uses as language IDs to real language strings. | |
1122 | // Must be sorted according to how strcmp compares the strings in the first column. | |
1123 | // Note: Now we remove all transforms of the form ll_RR -> ll-RR, they are now | |
1124 | // handled in the code. <1.19> | |
1125 | // | |
1126 | // locale lang [ comment ] | |
1127 | // string string | |
1128 | // ------- ------- | |
1129 | { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752] | |
1130 | { "zh_CN", "zh-Hans" }, // mainland China => simplified | |
d7384798 A |
1131 | { "zh_HK", "zh-HK" }, // Hong Kong => traditional, not currently used |
1132 | { "zh_MO", "zh-MO" }, // Macao => traditional, not currently used | |
1133 | { "zh_SG", "zh-SG" }, // Singapore => simplified, not currently used | |
bd5b749c A |
1134 | { "zh_TW", "zh-Hant" }, // Taiwan => traditional |
1135 | }; | |
1136 | enum { | |
1137 | kNumAppleLocaleToLanguageString = sizeof(appleLocaleToLanguageString)/sizeof(KeyStringToResultString) | |
1138 | }; | |
1139 | ||
d7384798 | 1140 | /* |
bd5b749c A |
1141 | static const KeyStringToResultString appleLocaleToLanguageStringForCFBundle[] = { |
1142 | // Map locale strings that Apple uses as language IDs to real language strings. | |
1143 | // Must be sorted according to how strcmp compares the strings in the first column. | |
1144 | // | |
1145 | // locale lang [ comment ] | |
1146 | // string string | |
1147 | // ------- ------- | |
1148 | { "de_AT", "de-AT" }, // Austrian German | |
1149 | { "de_CH", "de-CH" }, // Swiss German | |
1150 | // { "de_DE", "de-DE" }, // German for Germany (default), not currently used | |
1151 | { "en_AU", "en-AU" }, // Australian English | |
1152 | { "en_CA", "en-CA" }, // Canadian English | |
1153 | { "en_GB", "en-GB" }, // British English | |
1154 | // { "en_IE", "en-IE" }, // Irish English, not currently used | |
1155 | { "en_US", "en-US" }, // U.S. English | |
1156 | { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752] | |
1157 | // { "fr_BE", "fr-BE" }, // Belgian French, not currently used | |
1158 | { "fr_CA", "fr-CA" }, // Canadian French | |
1159 | { "fr_CH", "fr-CH" }, // Swiss French | |
1160 | // { "fr_FR", "fr-FR" }, // French for France (default), not currently used | |
1161 | { "nl_BE", "nl-BE" }, // Flemish = Vlaams, Dutch for Belgium | |
1162 | // { "nl_NL", "nl-NL" }, // Dutch for Netherlands (default), not currently used | |
1163 | { "pt_BR", "pt-BR" }, // Brazilian Portuguese | |
1164 | { "pt_PT", "pt-PT" }, // Portuguese for Portugal | |
1165 | { "zh_CN", "zh-Hans" }, // mainland China => simplified | |
1166 | { "zh_HK", "zh-Hant" }, // Hong Kong => traditional, not currently used | |
1167 | { "zh_MO", "zh-Hant" }, // Macao => traditional, not currently used | |
1168 | { "zh_SG", "zh-Hans" }, // Singapore => simplified, not currently used | |
1169 | { "zh_TW", "zh-Hant" }, // Taiwan => traditional | |
1170 | }; | |
1171 | enum { | |
1172 | kNumAppleLocaleToLanguageStringForCFBundle = sizeof(appleLocaleToLanguageStringForCFBundle)/sizeof(KeyStringToResultString) | |
1173 | }; | |
d7384798 | 1174 | */ |
bd5b749c | 1175 | |
d7384798 | 1176 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX |
bd5b749c A |
1177 | |
1178 | struct LocaleToLegacyCodes { | |
1179 | const char * locale; // reduced to language plus one other component (script, region, variant), separators normalized to'_' | |
1180 | RegionCode regCode; | |
1181 | LangCode langCode; | |
1182 | CFStringEncoding encoding; | |
1183 | }; | |
1184 | typedef struct LocaleToLegacyCodes LocaleToLegacyCodes; | |
1185 | ||
1186 | static const LocaleToLegacyCodes localeToLegacyCodes[] = { | |
1187 | // locale RegionCode LangCode CFStringEncoding | |
1188 | { "af"/*ZA*/, 102/*verAfrikaans*/, 141/*langAfrikaans*/, 0/*Roman*/ }, // Latn | |
1189 | { "am", -1, 85/*langAmharic*/, 28/*Ethiopic*/ }, // Ethi | |
1190 | { "ar", 16/*verArabic*/, 12/*langArabic*/, 4/*Arabic*/ }, // Arab; | |
1191 | { "as", -1, 68/*langAssamese*/, 13/*Bengali*/ }, // Beng; | |
1192 | { "ay", -1, 134/*langAymara*/, 0/*Roman*/ }, // Latn; | |
9f29f3f8 | 1193 | { "az", -1, 150/*langAzerbaijanRoman*/, 0/*Roman*/ }, // "az" defaults to -Latn |
bd5b749c A |
1194 | { "az_Arab", -1, 50/*langAzerbaijanAr*/, 4/*Arabic*/ }, // Arab; |
1195 | { "az_Cyrl", -1, 49/*langAzerbaijani*/, 7/*Cyrillic*/ }, // Cyrl; | |
1196 | { "az_Latn", -1, 150/*langAzerbaijanRoman*/, 0/*Roman*/ }, // Latn; | |
1197 | { "be"/*BY*/, 61/*verBelarus*/, 46/*langBelorussian*/, 7/*Cyrillic*/ }, // Cyrl; | |
1198 | { "bg"/*BG*/, 72/*verBulgaria*/, 44/*langBulgarian*/, 7/*Cyrillic*/ }, // Cyrl; | |
1199 | { "bn", 60/*verBengali*/, 67/*langBengali*/, 13/*Bengali*/ }, // Beng; | |
1200 | { "bo", 105/*verTibetan*/, 63/*langTibetan*/, 26/*Tibetan*/ }, // Tibt; | |
1201 | { "br", 77/*verBreton*/, 142/*langBreton*/, 39/*Celtic*/ }, // Latn; | |
1202 | { "ca"/*ES*/, 73/*verCatalonia*/, 130/*langCatalan*/, 0/*Roman*/ }, // Latn; | |
1203 | { "cs"/*CZ*/, 56/*verCzech*/, 38/*langCzech*/, 29/*CentralEurRoman*/ }, // Latn; | |
1204 | { "cy", 79/*verWelsh*/, 128/*langWelsh*/, 39/*Celtic*/ }, // Latn; | |
1205 | { "da"/*DK*/, 9/*verDenmark*/, 7/*langDanish*/, 0/*Roman*/ }, // Latn; | |
1206 | { "de", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ }, // assume "de" defaults to verGermany | |
1207 | { "de_1996", 70/*verGermanReformed*/, 2/*langGerman*/, 0/*Roman*/ }, | |
1208 | { "de_AT", 92/*verAustria*/, 2/*langGerman*/, 0/*Roman*/ }, | |
1209 | { "de_CH", 19/*verGrSwiss*/, 2/*langGerman*/, 0/*Roman*/ }, | |
1210 | { "de_DE", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ }, | |
1211 | { "dz"/*BT*/, 83/*verBhutan*/, 137/*langDzongkha*/, 26/*Tibetan*/ }, // Tibt; | |
1212 | { "el", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // assume "el" defaults to verGreece | |
1213 | { "el_CY", 23/*verCyprus*/, 14/*langGreek*/, 6/*Greek*/ }, | |
1214 | { "el_GR", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // modern monotonic | |
1215 | { "en", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ }, // "en" defaults to verUS (per Chris Hansten) | |
1216 | { "en_001", 37/*verInternational*/, 0/*langEnglish*/, 0/*Roman*/ }, | |
1217 | { "en_AU", 15/*verAustralia*/, 0/*langEnglish*/, 0/*Roman*/ }, | |
1218 | { "en_CA", 82/*verEngCanada*/, 0/*langEnglish*/, 0/*Roman*/ }, | |
1219 | { "en_GB", 2/*verBritain*/, 0/*langEnglish*/, 0/*Roman*/ }, | |
1220 | { "en_IE", 108/*verIrelandEnglish*/, 0/*langEnglish*/, 0/*Roman*/ }, | |
1221 | { "en_SG", 100/*verSingapore*/, 0/*langEnglish*/, 0/*Roman*/ }, | |
1222 | { "en_US", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ }, | |
1223 | { "eo", 103/*verEsperanto*/, 94/*langEsperanto*/, 0/*Roman*/ }, // Latn; | |
1224 | { "es", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ }, // "es" defaults to verSpain (per Chris Hansten) | |
1225 | { "es_419", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, // new BCP 47 tag | |
1226 | { "es_ES", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ }, | |
1227 | { "es_MX", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, | |
1228 | { "es_US", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, | |
1229 | { "et"/*EE*/, 44/*verEstonia*/, 27/*langEstonian*/, 29/*CentralEurRoman*/ }, | |
1230 | { "eu", -1, 129/*langBasque*/, 0/*Roman*/ }, // Latn; | |
1231 | { "fa"/*IR*/, 48/*verIran*/, 31/*langFarsi/Persian*/, 0x8C/*Farsi*/ }, // Arab; | |
1232 | { "fi"/*FI*/, 17/*verFinland*/, 13/*langFinnish*/, 0/*Roman*/ }, | |
9f29f3f8 | 1233 | { "fil", -1, 82/*langTagalog*/, 0/*Roman*/ }, // Latn; |
bd5b749c A |
1234 | { "fo"/*FO*/, 47/*verFaroeIsl*/, 30/*langFaroese*/, 37/*Icelandic*/ }, |
1235 | { "fr", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ }, // "fr" defaults to verFrance (per Chris Hansten) | |
1236 | { "fr_001", 91/*verFrenchUniversal*/, 1/*langFrench*/, 0/*Roman*/ }, | |
1237 | { "fr_BE", 98/*verFrBelgium*/, 1/*langFrench*/, 0/*Roman*/ }, | |
1238 | { "fr_CA", 11/*verFrCanada*/, 1/*langFrench*/, 0/*Roman*/ }, | |
1239 | { "fr_CH", 18/*verFrSwiss*/, 1/*langFrench*/, 0/*Roman*/ }, | |
1240 | { "fr_FR", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ }, | |
1241 | { "ga"/*IE*/, 50/*verIreland*/, 35/*langIrishGaelic*/, 0/*Roman*/ }, // no dots (h after) | |
1242 | { "ga_Latg"/*IE*/, 81/*verIrishGaelicScrip*/, 146/*langIrishGaelicScript*/, 40/*Gaelic*/ }, // using dots | |
1243 | { "gd", 75/*verScottishGaelic*/, 144/*langScottishGaelic*/, 39/*Celtic*/ }, | |
1244 | { "gl", -1, 140/*langGalician*/, 0/*Roman*/ }, // Latn; | |
1245 | { "gn", -1, 133/*langGuarani*/, 0/*Roman*/ }, // Latn; | |
1246 | { "grc", 40/*verGreekAncient*/, 148/*langGreekAncient*/, 6/*Greek*/ }, // polytonic (MacGreek doesn't actually support it) | |
1247 | { "gu"/*IN*/, 94/*verGujarati*/, 69/*langGujarati*/, 11/*Gujarati*/ }, // Gujr; | |
1248 | { "gv", 76/*verManxGaelic*/, 145/*langManxGaelic*/, 39/*Celtic*/ }, // Latn; | |
1249 | { "he"/*IL*/, 13/*verIsrael*/, 10/*langHebrew*/, 5/*Hebrew*/ }, // Hebr; | |
1250 | { "hi"/*IN*/, 33/*verIndiaHindi*/, 21/*langHindi*/, 9/*Devanagari*/ }, // Deva; | |
1251 | { "hr"/*HR*/, 68/*verCroatia*/, 18/*langCroatian*/, 36/*Croatian*/ }, | |
1252 | { "hu"/*HU*/, 43/*verHungary*/, 26/*langHungarian*/, 29/*CentralEurRoman*/ }, | |
1253 | { "hy"/*AM*/, 84/*verArmenian*/, 51/*langArmenian*/, 24/*Armenian*/ }, // Armn; | |
1254 | { "id", -1, 81/*langIndonesian*/, 0/*Roman*/ }, // Latn; | |
1255 | { "is"/*IS*/, 21/*verIceland*/, 15/*langIcelandic*/, 37/*Icelandic*/ }, | |
1256 | { "it", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ }, // "it" defaults to verItaly | |
1257 | { "it_CH", 36/*verItalianSwiss*/, 3/*langItalian*/, 0/*Roman*/ }, | |
1258 | { "it_IT", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ }, | |
1259 | { "iu"/*CA*/, 78/*verNunavut*/, 143/*langInuktitut*/, 0xEC/*Inuit*/ }, // Cans; | |
1260 | { "ja"/*JP*/, 14/*verJapan*/, 11/*langJapanese*/, 1/*Japanese*/ }, // Jpan; | |
1261 | { "jv", -1, 138/*langJavaneseRom*/, 0/*Roman*/ }, // Latn; | |
1262 | { "ka"/*GE*/, 85/*verGeorgian*/, 52/*langGeorgian*/, 23/*Georgian*/ }, // Geor; | |
1263 | { "kk", -1, 48/*langKazakh*/, 7/*Cyrillic*/ }, // "kk" defaults to -Cyrl; also have -Latn, -Arab | |
1264 | { "kl", 107/*verGreenland*/, 149/*langGreenlandic*/, 0/*Roman*/ }, // Latn; | |
1265 | { "km", -1, 78/*langKhmer*/, 20/*Khmer*/ }, // Khmr; | |
1266 | { "kn", -1, 73/*langKannada*/, 16/*Kannada*/ }, // Knda; | |
1267 | { "ko"/*KR*/, 51/*verKorea*/, 23/*langKorean*/, 3/*Korean*/ }, // Hang; | |
1268 | { "ks", -1, 61/*langKashmiri*/, 4/*Arabic*/ }, // Arab; | |
1269 | { "ku", -1, 60/*langKurdish*/, 4/*Arabic*/ }, // Arab; | |
1270 | { "ky", -1, 54/*langKirghiz*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab | |
1271 | { "la", -1, 131/*langLatin*/, 0/*Roman*/ }, // Latn; | |
1272 | { "lo", -1, 79/*langLao*/, 22/*Laotian*/ }, // Laoo; | |
1273 | { "lt"/*LT*/, 41/*verLithuania*/, 24/*langLithuanian*/, 29/*CentralEurRoman*/ }, | |
1274 | { "lv"/*LV*/, 45/*verLatvia*/, 28/*langLatvian*/, 29/*CentralEurRoman*/ }, | |
1275 | { "mg", -1, 93/*langMalagasy*/, 0/*Roman*/ }, // Latn; | |
1276 | { "mk"/*MK*/, 67/*verMacedonian*/, 43/*langMacedonian*/, 7/*Cyrillic*/ }, // Cyrl; | |
1277 | { "ml", -1, 72/*langMalayalam*/, 17/*Malayalam*/ }, // Mlym; | |
9f29f3f8 | 1278 | { "mn", -1, 58/*langMongolianCyr*/, 7/*Cyrillic*/ }, // "mn" defaults to -Cyrl |
bd5b749c A |
1279 | { "mn_Cyrl", -1, 58/*langMongolianCyr*/, 7/*Cyrillic*/ }, // Cyrl; |
1280 | { "mn_Mong", -1, 57/*langMongolian*/, 27/*Mongolian*/ }, // Mong; | |
1281 | { "mo", -1, 53/*langMoldavian*/, 7/*Cyrillic*/ }, // Cyrl; | |
1282 | { "mr"/*IN*/, 104/*verMarathi*/, 66/*langMarathi*/, 9/*Devanagari*/ }, // Deva; | |
1283 | { "ms", -1, 83/*langMalayRoman*/, 0/*Roman*/ }, // "ms" defaults to -Latn; | |
1284 | { "ms_Arab", -1, 84/*langMalayArabic*/, 4/*Arabic*/ }, // Arab; | |
1285 | { "mt"/*MT*/, 22/*verMalta*/, 16/*langMaltese*/, 0/*Roman*/ }, // Latn; | |
1286 | { "mul", 74/*verMultilingual*/, -1, 0 }, | |
1287 | { "my", -1, 77/*langBurmese*/, 19/*Burmese*/ }, // Mymr; | |
1288 | { "nb"/*NO*/, 12/*verNorway*/, 9/*langNorwegian*/, 0/*Roman*/ }, | |
1289 | { "ne"/*NP*/, 106/*verNepal*/, 64/*langNepali*/, 9/*Devanagari*/ }, // Deva; | |
1290 | { "nl", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ }, // "nl" defaults to verNetherlands | |
1291 | { "nl_BE", 6/*verFlemish*/, 34/*langFlemish*/, 0/*Roman*/ }, | |
1292 | { "nl_NL", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ }, | |
1293 | { "nn"/*NO*/, 101/*verNynorsk*/, 151/*langNynorsk*/, 0/*Roman*/ }, | |
1294 | { "ny", -1, 92/*langNyanja/Chewa*/, 0/*Roman*/ }, // Latn; | |
1295 | { "om", -1, 87/*langOromo*/, 28/*Ethiopic*/ }, // Ethi; | |
1296 | { "or", -1, 71/*langOriya*/, 12/*Oriya*/ }, // Orya; | |
1297 | { "pa", 95/*verPunjabi*/, 70/*langPunjabi*/, 10/*Gurmukhi*/ }, // Guru; | |
1298 | { "pl"/*PL*/, 42/*verPoland*/, 25/*langPolish*/, 29/*CentralEurRoman*/ }, | |
1299 | { "ps", -1, 59/*langPashto*/, 0x8C/*Farsi*/ }, // Arab; | |
1300 | { "pt", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ }, // "pt" defaults to verBrazil (per Chris Hansten) | |
1301 | { "pt_BR", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ }, | |
1302 | { "pt_PT", 10/*verPortugal*/, 8/*langPortuguese*/, 0/*Roman*/ }, | |
1303 | { "qu", -1, 132/*langQuechua*/, 0/*Roman*/ }, // Latn; | |
1304 | { "rn", -1, 91/*langRundi*/, 0/*Roman*/ }, // Latn; | |
1305 | { "ro"/*RO*/, 39/*verRomania*/, 37/*langRomanian*/, 38/*Romanian*/ }, | |
1306 | { "ru"/*RU*/, 49/*verRussia*/, 32/*langRussian*/, 7/*Cyrillic*/ }, // Cyrl; | |
1307 | { "rw", -1, 90/*langKinyarwanda*/, 0/*Roman*/ }, // Latn; | |
1308 | { "sa", -1, 65/*langSanskrit*/, 9/*Devanagari*/ }, // Deva; | |
1309 | { "sd", -1, 62/*langSindhi*/, 0x8C/*Farsi*/ }, // Arab; | |
1310 | { "se", 46/*verSami*/, 29/*langSami*/, 0/*Roman*/ }, | |
1311 | { "si", -1, 76/*langSinhalese*/, 18/*Sinhalese*/ }, // Sinh; | |
1312 | { "sk"/*SK*/, 57/*verSlovak*/, 39/*langSlovak*/, 29/*CentralEurRoman*/ }, | |
1313 | { "sl"/*SI*/, 66/*verSlovenian*/, 40/*langSlovenian*/, 36/*Croatian*/ }, | |
1314 | { "so", -1, 88/*langSomali*/, 0/*Roman*/ }, // Latn; | |
1315 | { "sq", -1, 36/*langAlbanian*/, 0/*Roman*/ }, | |
1316 | { "sr"/*CS,RS*/, 65/*verSerbian*/, 42/*langSerbian*/, 7/*Cyrillic*/ }, // Cyrl; | |
1317 | { "su", -1, 139/*langSundaneseRom*/, 0/*Roman*/ }, // Latn; | |
1318 | { "sv"/*SE*/, 7/*verSweden*/, 5/*langSwedish*/, 0/*Roman*/ }, | |
1319 | { "sw", -1, 89/*langSwahili*/, 0/*Roman*/ }, // Latn; | |
1320 | { "ta", -1, 74/*langTamil*/, 14/*Tamil*/ }, // Taml; | |
1321 | { "te", -1, 75/*langTelugu*/, 15/*Telugu*/ }, // Telu | |
1322 | { "tg", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // "tg" defaults to "Cyrl" | |
1323 | { "tg_Cyrl", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab | |
1324 | { "th"/*TH*/, 54/*verThailand*/, 22/*langThai*/, 21/*Thai*/ }, // Thai; | |
1325 | { "ti", -1, 86/*langTigrinya*/, 28/*Ethiopic*/ }, // Ethi; | |
1326 | { "tk", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // "tk" defaults to Cyrl | |
1327 | { "tk_Cyrl", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab | |
1328 | { "tl", -1, 82/*langTagalog*/, 0/*Roman*/ }, // Latn; | |
1329 | { "to"/*TO*/, 88/*verTonga*/, 147/*langTongan*/, 0/*Roman*/ }, // Latn; | |
1330 | { "tr"/*TR*/, 24/*verTurkey*/, 17/*langTurkish*/, 35/*Turkish*/ }, // Latn; | |
1331 | { "tt", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl; | |
1332 | { "tt_Cyrl", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl; | |
1333 | { "ug", -1, 136/*langUighur*/, 4/*Arabic*/ }, // Arab; | |
1334 | { "uk"/*UA*/, 62/*verUkraine*/, 45/*langUkrainian*/, 7/*Cyrillic*/ }, // Cyrl; | |
1335 | { "und", 55/*verScriptGeneric*/, -1, 0 }, | |
1336 | { "ur", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // "ur" defaults to verPakistanUrdu | |
1337 | { "ur_IN", 96/*verIndiaUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab | |
1338 | { "ur_PK", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab | |
1339 | { "uz"/*UZ*/, 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab | |
1340 | { "uz_Cyrl", 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ }, | |
1341 | { "vi"/*VN*/, 97/*verVietnam*/, 80/*langVietnamese*/, 30/*Vietnamese*/ }, // Latn | |
1342 | { "yi", -1, 41/*langYiddish*/, 5/*Hebrew*/ }, // Hebr; | |
1343 | { "zh", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, // "zh" defaults to verChina, langSimpChinese | |
1344 | { "zh_CN", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, | |
1345 | { "zh_HK", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, | |
1346 | { "zh_Hans", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, | |
1347 | { "zh_Hant", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, | |
1348 | { "zh_MO", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, | |
1349 | { "zh_SG", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, | |
1350 | { "zh_TW", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, | |
1351 | }; | |
1352 | enum { | |
1353 | kNumLocaleToLegacyCodes = sizeof(localeToLegacyCodes)/sizeof(localeToLegacyCodes[0]) | |
1354 | }; | |
1355 | ||
d7384798 A |
1356 | #endif |
1357 | ||
bd5b749c A |
1358 | /* |
1359 | For reference here is a list of ICU locales with variants and how some | |
1360 | of them are canonicalized with the ICU function uloc_canonicalize: | |
1361 | ||
1362 | ICU 3.0 has: | |
1363 | en_US_POSIX x no change | |
1364 | hy_AM_REVISED x no change | |
1365 | ja_JP_TRADITIONAL -> ja_JP@calendar=japanese | |
1366 | th_TH_TRADITIONAL -> th_TH@calendar=buddhist | |
1367 | ||
1368 | ICU 2.8 also had the following (now obsolete): | |
1369 | ca_ES_PREEURO | |
1370 | de__PHONEBOOK -> de@collation=phonebook | |
1371 | de_AT_PREEURO | |
1372 | de_DE_PREEURO | |
1373 | de_LU_PREEURO | |
1374 | el_GR_PREEURO | |
1375 | en_BE_PREEURO | |
1376 | en_GB_EURO -> en_GB@currency=EUR | |
1377 | en_IE_PREEURO -> en_IE@currency=IEP | |
1378 | es__TRADITIONAL -> es@collation=traditional | |
1379 | es_ES_PREEURO | |
1380 | eu_ES_PREEURO | |
1381 | fi_FI_PREEURO | |
1382 | fr_BE_PREEURO | |
1383 | fr_FR_PREEURO -> fr_FR@currency=FRF | |
1384 | fr_LU_PREEURO | |
1385 | ga_IE_PREEURO | |
1386 | gl_ES_PREEURO | |
1387 | hi__DIRECT -> hi@collation=direct | |
1388 | it_IT_PREEURO | |
1389 | nl_BE_PREEURO | |
1390 | nl_NL_PREEURO | |
1391 | pt_PT_PREEURO | |
1392 | zh__PINYIN -> zh@collation=pinyin | |
1393 | zh_TW_STROKE -> zh_TW@collation=stroke | |
1394 | ||
1395 | */ | |
1396 | ||
1397 | // _CompareTestEntryToTableEntryKey | |
1398 | // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) | |
1399 | // comparison function for bsearch | |
1400 | static int _CompareTestEntryToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) { | |
1401 | return strcmp( ((const KeyStringToResultString *)testEntryPtr)->key, ((const KeyStringToResultString *)tableEntryKeyPtr)->key ); | |
1402 | } | |
1403 | ||
1404 | // _CompareTestEntryPrefixToTableEntryKey | |
1405 | // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) | |
1406 | // Comparison function for bsearch. Assumes prefix IS terminated with '-' or '_'. | |
1407 | // Do the following instead of strlen & strncmp so we don't walk tableEntry key twice. | |
1408 | static int _CompareTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) { | |
1409 | const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key; | |
1410 | const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key; | |
1411 | ||
1412 | while ( *testPtr == *tablePtr && *tablePtr != 0 ) { | |
1413 | testPtr++; tablePtr++; | |
1414 | } | |
1415 | if ( *tablePtr != 0 ) { | |
1416 | // strings are different, and the string in the table has not run out; | |
1417 | // i.e. the table entry is not a prefix of the text string. | |
1418 | return ( *testPtr < *tablePtr )? -1: 1; | |
1419 | } | |
1420 | return 0; | |
1421 | } | |
1422 | ||
1423 | // _CompareLowerTestEntryPrefixToTableEntryKey | |
1424 | // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) | |
1425 | // Comparison function for bsearch. Assumes prefix NOT terminated with '-' or '_'. | |
1426 | // Lowercases the test string before comparison (the table should already have lowercased entries). | |
1427 | static int _CompareLowerTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) { | |
1428 | const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key; | |
1429 | const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key; | |
1430 | char lowerTestChar; | |
1431 | ||
1432 | while ( (lowerTestChar = tolower(*testPtr)) == *tablePtr && *tablePtr != 0 && lowerTestChar != '_' ) { // <1.9> | |
1433 | testPtr++; tablePtr++; | |
1434 | } | |
1435 | if ( *tablePtr != 0 ) { | |
1436 | // strings are different, and the string in the table has not run out; | |
1437 | // i.e. the table entry is not a prefix of the text string. | |
1438 | if (lowerTestChar == '_') // <1.9> | |
1439 | return -1; // <1.9> | |
1440 | return ( lowerTestChar < *tablePtr )? -1: 1; | |
1441 | } | |
1442 | // The string in the table has run out. If the test string char is not alnum, | |
1443 | // then the string matches, else the test string sorts after. | |
1444 | return ( !isalnum(lowerTestChar) )? 0: 1; | |
1445 | } | |
1446 | ||
1447 | // _DeleteCharsAtPointer | |
1448 | // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) | |
1449 | // remove _length_ characters from the beginning of the string indicated by _stringPtr_ | |
1450 | // (we know that the string has at least _length_ characters in it) | |
1451 | static void _DeleteCharsAtPointer(char *stringPtr, int length) { | |
1452 | do { | |
1453 | *stringPtr = stringPtr[length]; | |
1454 | } while (*stringPtr++ != 0); | |
1455 | } | |
1456 | ||
1457 | // _CopyReplacementAtPointer | |
1458 | // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) | |
1459 | // Copy replacement string (*excluding* terminating NULL byte) to the place indicated by stringPtr | |
1460 | static void _CopyReplacementAtPointer(char *stringPtr, const char *replacementPtr) { | |
1461 | while (*replacementPtr != 0) { | |
1462 | *stringPtr++ = *replacementPtr++; | |
1463 | } | |
1464 | } | |
1465 | ||
1466 | // _CheckForTag | |
1467 | // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) | |
1468 | static Boolean _CheckForTag(const char *localeStringPtr, const char *tagPtr, int tagLen) { | |
1469 | return ( strncmp(localeStringPtr, tagPtr, tagLen) == 0 && !isalnum(localeStringPtr[tagLen]) ); | |
1470 | } | |
1471 | ||
1472 | // _ReplacePrefix | |
1473 | // Move this code from _UpdateFullLocaleString into separate function // <1.10> | |
1474 | static void _ReplacePrefix(char locString[], int locStringMaxLen, int oldPrefixLen, const char *newPrefix) { | |
1475 | int newPrefixLen = strlen(newPrefix); | |
1476 | int lengthDelta = newPrefixLen - oldPrefixLen; | |
1477 | ||
1478 | if (lengthDelta < 0) { | |
1479 | // replacement is shorter, delete chars by shifting tail of string | |
1480 | _DeleteCharsAtPointer(locString + newPrefixLen, -lengthDelta); | |
1481 | } else if (lengthDelta > 0) { | |
1482 | // replacement is longer... | |
1483 | int stringLen = strlen(locString); | |
1484 | ||
1485 | if (stringLen + lengthDelta < locStringMaxLen) { | |
1486 | // make room by shifting tail of string | |
1487 | char * tailShiftPtr = locString + stringLen; | |
1488 | char * tailStartPtr = locString + oldPrefixLen; // pointer to tail of string to shift | |
1489 | ||
1490 | while (tailShiftPtr >= tailStartPtr) { | |
1491 | tailShiftPtr[lengthDelta] = *tailShiftPtr; | |
1492 | tailShiftPtr--; | |
1493 | } | |
1494 | } else { | |
1495 | // no room, can't do substitution | |
1496 | newPrefix = NULL; | |
1497 | } | |
1498 | } | |
1499 | ||
1500 | if (newPrefix) { | |
1501 | // do the substitution | |
1502 | _CopyReplacementAtPointer(locString, newPrefix); | |
1503 | } | |
1504 | } | |
1505 | ||
1506 | // _UpdateFullLocaleString | |
1507 | // Given a locale string that uses standard codes (not a special old-style Apple string), | |
1508 | // update all the language codes and region codes to latest versions, map 3-letter | |
1509 | // language codes to 2-letter codes if possible, and normalize casing. If requested, return | |
1510 | // pointers to a language-region variant subtag (if present) and a region tag (if present). | |
1511 | // (add locStringMaxLen parameter) // <1.10> | |
1512 | static void _UpdateFullLocaleString(char inLocaleString[], int locStringMaxLen, | |
1513 | char **langRegSubtagRef, char **regionTagRef, | |
1514 | char varKeyValueString[]) // <1.17> | |
1515 | { | |
1516 | KeyStringToResultString testEntry; | |
1517 | KeyStringToResultString * foundEntry; | |
1518 | const SpecialCaseUpdates * specialCasePtr; | |
1519 | char * inLocalePtr; | |
1520 | char * subtagPtr; | |
1521 | char * langRegSubtag = NULL; | |
1522 | char * regionTag = NULL; | |
1523 | char * variantTag = NULL; | |
1524 | Boolean subtagHasDigits, pastPrimarySubtag, hadRegion; | |
1525 | ||
1526 | // 1. First replace any non-canonical prefix (case insensitive) with canonical | |
1527 | // (change 3-letter ISO 639 code to 2-letter, update obsolete ISO 639 codes & RFC 3066 tags, etc.) | |
1528 | ||
1529 | testEntry.key = inLocaleString; | |
1530 | foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToCanonical, kNumLocaleStringPrefixToCanonical, | |
1531 | sizeof(KeyStringToResultString), _CompareLowerTestEntryPrefixToTableEntryKey ); | |
1532 | if (foundEntry) { | |
1533 | // replace key (at beginning of string) with result | |
1534 | _ReplacePrefix(inLocaleString, locStringMaxLen, strlen(foundEntry->key), foundEntry->result); // <1.10> | |
1535 | } | |
1536 | ||
1537 | // 2. Walk through input string, normalizing case & marking use of ISO 3166 codes | |
1538 | ||
1539 | inLocalePtr = inLocaleString; | |
1540 | subtagPtr = inLocaleString; | |
1541 | subtagHasDigits = false; | |
1542 | pastPrimarySubtag = false; | |
1543 | hadRegion = false; | |
1544 | ||
1545 | while ( true ) { | |
1546 | if ( isalpha(*inLocalePtr) ) { | |
1547 | // if not past a region tag, then lowercase, else uppercase | |
1548 | *inLocalePtr = (!hadRegion)? tolower(*inLocalePtr): toupper(*inLocalePtr); | |
1549 | } else if ( isdigit(*inLocalePtr) ) { | |
1550 | subtagHasDigits = true; | |
1551 | } else { | |
1552 | ||
1553 | if (!pastPrimarySubtag) { | |
1554 | // may have a NULL primary subtag | |
1555 | if (subtagHasDigits) { | |
1556 | break; | |
1557 | } | |
1558 | pastPrimarySubtag = true; | |
1559 | } else if (!hadRegion) { | |
1560 | // We are after any primary language subtag, but not past any region tag. | |
1561 | // This subtag is preceded by '-' or '_'. | |
1562 | int subtagLength = inLocalePtr - subtagPtr; // includes leading '-' or '_' | |
1563 | ||
1564 | if (subtagLength == 3 && !subtagHasDigits) { | |
1565 | // potential ISO 3166 code for region or language variant; if so, needs uppercasing | |
1566 | if (*subtagPtr == '_') { | |
1567 | regionTag = subtagPtr; | |
1568 | hadRegion = true; | |
1569 | subtagPtr[1] = toupper(subtagPtr[1]); | |
1570 | subtagPtr[2] = toupper(subtagPtr[2]); | |
1571 | } else if (langRegSubtag == NULL) { | |
1572 | langRegSubtag = subtagPtr; | |
1573 | subtagPtr[1] = toupper(subtagPtr[1]); | |
1574 | subtagPtr[2] = toupper(subtagPtr[2]); | |
1575 | } | |
1576 | } else if (subtagLength == 4 && subtagHasDigits) { | |
1577 | // potential UN M.49 region code | |
1578 | if (*subtagPtr == '_') { | |
1579 | regionTag = subtagPtr; | |
1580 | hadRegion = true; | |
1581 | } else if (langRegSubtag == NULL) { | |
1582 | langRegSubtag = subtagPtr; | |
1583 | } | |
1584 | } else if (subtagLength == 5 && !subtagHasDigits) { | |
1585 | // ISO 15924 script code, uppercase just the first letter | |
1586 | subtagPtr[1] = toupper(subtagPtr[1]); | |
1587 | } else if (subtagLength == 1 && *subtagPtr == '_') { // <1.17> | |
1588 | hadRegion = true; | |
1589 | } | |
1590 | ||
1591 | if (!hadRegion) { | |
1592 | // convert improper '_' to '-' | |
1593 | *subtagPtr = '-'; | |
1594 | } | |
1595 | } else { | |
1596 | variantTag = subtagPtr; // <1.17> | |
1597 | } | |
1598 | ||
1599 | if (*inLocalePtr == '-' || *inLocalePtr == '_') { | |
1600 | subtagPtr = inLocalePtr; | |
1601 | subtagHasDigits = false; | |
1602 | } else { | |
1603 | break; | |
1604 | } | |
1605 | } | |
1606 | ||
1607 | inLocalePtr++; | |
1608 | } | |
1609 | ||
1610 | // 3 If there is a variant tag, see if ICU canonicalizes it to keywords. // <1.17> [3577669] | |
1611 | // If so, copy the keywords to varKeyValueString and delete the variant tag | |
1612 | // from the original string (but don't otherwise use the ICU canonicalization). | |
1613 | varKeyValueString[0] = 0; | |
856091c5 | 1614 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX |
bd5b749c A |
1615 | if (variantTag) { |
1616 | UErrorCode icuStatus; | |
1617 | int icuCanonStringLen; | |
1618 | char * varKeyValueStringPtr = varKeyValueString; | |
1619 | ||
1620 | icuStatus = U_ZERO_ERROR; | |
1621 | icuCanonStringLen = uloc_canonicalize( inLocaleString, varKeyValueString, locStringMaxLen, &icuStatus ); | |
1622 | if ( U_SUCCESS(icuStatus) ) { | |
1623 | char * icuCanonStringPtr = varKeyValueString; | |
1624 | ||
1625 | if (icuCanonStringLen >= locStringMaxLen) | |
1626 | icuCanonStringLen = locStringMaxLen - 1; | |
1627 | varKeyValueString[icuCanonStringLen] = 0; | |
1628 | while (*icuCanonStringPtr != 0 && *icuCanonStringPtr != ULOC_KEYWORD_SEPARATOR) | |
1629 | ++icuCanonStringPtr; | |
1630 | if (*icuCanonStringPtr != 0) { | |
1631 | // the canonicalized string has keywords | |
1632 | // delete the variant tag in the original string (and other trailing '_' or '-') | |
1633 | *variantTag-- = 0; | |
1634 | while (*variantTag == '_') | |
1635 | *variantTag-- = 0; | |
1636 | // delete all of the canonicalized string except the keywords | |
1637 | while (*icuCanonStringPtr != 0) | |
1638 | *varKeyValueStringPtr++ = *icuCanonStringPtr++; | |
1639 | } | |
1640 | *varKeyValueStringPtr = 0; | |
1641 | } | |
1642 | } | |
856091c5 | 1643 | #endif |
bd5b749c A |
1644 | |
1645 | // 4. Handle special cases of updating region codes, or updating language codes based on | |
1646 | // region code. | |
1647 | for (specialCasePtr = specialCases; specialCasePtr->reg1 != NULL; specialCasePtr++) { | |
1648 | if ( specialCasePtr->lang == NULL || _CheckForTag(inLocaleString, specialCasePtr->lang, 2) ) { | |
1649 | // OK, we matched any language specified. Now what needs updating? | |
1650 | char * foundTag; | |
1651 | ||
1652 | if ( isupper(specialCasePtr->update1[0]) ) { | |
1653 | // updating a region code | |
1654 | if ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) { | |
1655 | _CopyReplacementAtPointer(foundTag+1, specialCasePtr->update1); | |
1656 | } | |
1657 | if ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) { | |
1658 | _CopyReplacementAtPointer(regionTag+1, specialCasePtr->update1); | |
1659 | } | |
1660 | ||
1661 | } else { | |
1662 | // updating the language, there will be two choices based on region | |
1663 | if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) || | |
1664 | ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) ) { | |
1665 | _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update1); | |
1666 | } else if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg2 + 1, 2) ) || | |
1667 | ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg2) ) && !isalnum(foundTag[3]) ) ) { | |
1668 | _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update2); | |
1669 | } | |
1670 | } | |
1671 | } | |
1672 | } | |
1673 | ||
1674 | // 5. return pointers if requested. | |
1675 | if (langRegSubtagRef != NULL) { | |
1676 | *langRegSubtagRef = langRegSubtag; | |
1677 | } | |
1678 | if (regionTagRef != NULL) { | |
1679 | *regionTagRef = regionTag; | |
1680 | } | |
1681 | } | |
1682 | ||
1683 | ||
1684 | // _RemoveSubstringsIfPresent | |
1685 | // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) | |
1686 | // substringList is a list of space-separated substrings to strip if found in localeString | |
1687 | static void _RemoveSubstringsIfPresent(char *localeString, const char *substringList) { | |
1688 | while (*substringList != 0) { | |
1689 | char currentSubstring[kLocaleIdentifierCStringMax]; | |
1690 | int substringLength = 0; | |
1691 | char * foundSubstring; | |
1692 | ||
1693 | // copy current substring & get its length | |
1694 | while ( isgraph(*substringList) ) { | |
1695 | currentSubstring[substringLength++] = *substringList++; | |
1696 | } | |
1697 | // move to next substring | |
1698 | while ( isspace(*substringList) ) { | |
1699 | substringList++; | |
1700 | } | |
1701 | ||
1702 | // search for current substring in locale string | |
1703 | if (substringLength == 0) | |
1704 | continue; | |
1705 | currentSubstring[substringLength] = 0; | |
1706 | foundSubstring = strstr(localeString, currentSubstring); | |
1707 | ||
1708 | // if substring is found, delete it | |
1709 | if (foundSubstring) { | |
1710 | _DeleteCharsAtPointer(foundSubstring, substringLength); | |
1711 | } | |
1712 | } | |
1713 | } | |
1714 | ||
1715 | ||
1716 | // _GetKeyValueString // <1.10> | |
1717 | // Removes any key-value string from inLocaleString, puts canonized version in keyValueString | |
1718 | ||
1719 | static void _GetKeyValueString(char inLocaleString[], char keyValueString[]) { | |
1720 | char * inLocalePtr = inLocaleString; | |
1721 | ||
1722 | while (*inLocalePtr != 0 && *inLocalePtr != ULOC_KEYWORD_SEPARATOR) { | |
1723 | inLocalePtr++; | |
1724 | } | |
1725 | if (*inLocalePtr != 0) { // we found a key-value section | |
1726 | char * keyValuePtr = keyValueString; | |
1727 | ||
1728 | *keyValuePtr = *inLocalePtr; | |
1729 | *inLocalePtr = 0; | |
1730 | do { | |
1731 | if ( *(++inLocalePtr) != ' ' ) { | |
1732 | *(++keyValuePtr) = *inLocalePtr; // remove "tolower() for *inLocalePtr" // <1.11> | |
1733 | } | |
1734 | } while (*inLocalePtr != 0); | |
1735 | } else { | |
1736 | keyValueString[0] = 0; | |
1737 | } | |
1738 | } | |
1739 | ||
1740 | static void _AppendKeyValueString(char inLocaleString[], int locStringMaxLen, char keyValueString[]) { | |
856091c5 | 1741 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX |
bd5b749c A |
1742 | if (keyValueString[0] != 0) { |
1743 | UErrorCode uerr = U_ZERO_ERROR; | |
1744 | UEnumeration * uenum = uloc_openKeywords(keyValueString, &uerr); | |
1745 | if ( uenum != NULL ) { | |
1746 | const char * keyword; | |
1747 | int32_t length; | |
1748 | char value[ULOC_KEYWORDS_CAPACITY]; // use as max for keyword value | |
1749 | while ( U_SUCCESS(uerr) ) { | |
1750 | keyword = uenum_next(uenum, &length, &uerr); | |
1751 | if ( keyword == NULL ) { | |
1752 | break; | |
1753 | } | |
1754 | length = uloc_getKeywordValue( keyValueString, keyword, value, sizeof(value), &uerr ); | |
1755 | length = uloc_setKeywordValue( keyword, value, inLocaleString, locStringMaxLen, &uerr ); | |
1756 | } | |
1757 | uenum_close(uenum); | |
1758 | } | |
1759 | } | |
856091c5 | 1760 | #endif |
bd5b749c A |
1761 | } |
1762 | ||
cf7d2af9 | 1763 | // __private_extern__ CFStringRef _CFLocaleCreateCanonicalLanguageIdentifierForCFBundle(CFAllocatorRef allocator, CFStringRef localeIdentifier) {} |
bd5b749c A |
1764 | |
1765 | CFStringRef CFLocaleCreateCanonicalLanguageIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) { | |
1766 | char inLocaleString[kLocaleIdentifierCStringMax]; | |
1767 | CFStringRef outStringRef = NULL; | |
1768 | ||
1769 | if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) { | |
1770 | KeyStringToResultString testEntry; | |
1771 | KeyStringToResultString * foundEntry; | |
1772 | char keyValueString[sizeof(inLocaleString)]; // <1.10> | |
1773 | char varKeyValueString[sizeof(inLocaleString)]; // <1.17> | |
1774 | ||
1775 | _GetKeyValueString(inLocaleString, keyValueString); // <1.10> | |
1776 | testEntry.result = NULL; | |
1777 | ||
9f29f3f8 A |
1778 | // A. Special case aa_SAAHO, no_BOKMAL, and no_NYNORSK since they are legacy identifiers that don't follow the normal rules (http://unicode.org/cldr/trac/browser/trunk/common/supplemental/supplementalMetadata.xml) |
1779 | ||
bd5b749c | 1780 | testEntry.key = inLocaleString; |
9f29f3f8 A |
1781 | KeyStringToResultString specialCase = testEntry; |
1782 | foundEntry = &specialCase; | |
1783 | ||
1784 | if (strncmp("aa_SAAHO", testEntry.key, strlen("aa_SAAHO")) == 0) { | |
1785 | foundEntry->result = "ssy"; | |
1786 | } else if (strncmp("no_BOKMAL", testEntry.key, strlen("no_BOKMAL")) == 0) { | |
1787 | foundEntry->result = "nb"; | |
1788 | } else if (strncmp("no_NYNORSK", testEntry.key, strlen("no_NYNORSK")) == 0) { | |
1789 | foundEntry->result = "nn"; | |
1790 | } else { | |
1791 | // B. First check if input string matches an old-style string that has a replacement | |
1792 | // (do this before case normalization) | |
1793 | foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical, | |
1794 | sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); | |
1795 | } | |
bd5b749c A |
1796 | if (foundEntry) { |
1797 | // It does match, so replace old string with new | |
1798 | strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); | |
1799 | varKeyValueString[0] = 0; | |
1800 | } else { | |
1801 | char * langRegSubtag = NULL; | |
1802 | char * regionTag = NULL; | |
1803 | ||
9f29f3f8 | 1804 | // C. No match with an old-style string, use input string but update codes, normalize case, etc. |
bd5b749c A |
1805 | _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, ®ionTag, varKeyValueString); // <1.10><1.17><1.19> |
1806 | ||
1807 | // if the language part already includes a regional variant, then delete any region tag. <1.19> | |
1808 | if (langRegSubtag && regionTag) | |
1809 | *regionTag = 0; | |
1810 | } | |
1811 | ||
9f29f3f8 | 1812 | // D. Now we have an up-to-date locale string, but we need to strip defaults and turn it into a language string |
bd5b749c A |
1813 | |
1814 | // 1. Strip defaults in input string based on initial part of locale string | |
1815 | // (mainly to strip default script tag for a language) | |
1816 | testEntry.key = inLocaleString; | |
1817 | foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults, | |
1818 | sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey ); | |
1819 | if (foundEntry) { | |
1820 | // The input string begins with a character sequence for which | |
1821 | // there are default substrings which should be stripped if present | |
1822 | _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); | |
1823 | } | |
1824 | ||
1825 | // 2. If the string matches a locale string used by Apple as a language string, turn it into a language string | |
1826 | testEntry.key = inLocaleString; | |
1827 | foundEntry = (KeyStringToResultString *)bsearch( &testEntry, appleLocaleToLanguageString, kNumAppleLocaleToLanguageString, | |
1828 | sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); | |
1829 | if (foundEntry) { | |
1830 | // it does match | |
1831 | strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); | |
1832 | } else { | |
1833 | // skip to any region tag or java-type variant | |
1834 | char * inLocalePtr = inLocaleString; | |
1835 | while (*inLocalePtr != 0 && *inLocalePtr != '_') { | |
1836 | inLocalePtr++; | |
1837 | } | |
1838 | // if there is still a region tag, turn it into a language variant <1.19> | |
1839 | if (*inLocalePtr == '_') { | |
1840 | // handle 3-digit regions in addition to 2-letter ones | |
1841 | char * regionTag = inLocalePtr++; | |
1842 | long expectedLength = 0; | |
1843 | if ( isalpha(*inLocalePtr) ) { | |
1844 | while ( isalpha(*(++inLocalePtr)) ) | |
1845 | ; | |
1846 | expectedLength = 3; | |
1847 | } else if ( isdigit(*inLocalePtr) ) { | |
1848 | while ( isdigit(*(++inLocalePtr)) ) | |
1849 | ; | |
1850 | expectedLength = 4; | |
1851 | } | |
1852 | *regionTag = (inLocalePtr - regionTag == expectedLength)? '-': 0; | |
1853 | } | |
1854 | // anything else at/after '_' just gets deleted | |
1855 | *inLocalePtr = 0; | |
1856 | } | |
1857 | ||
9f29f3f8 | 1858 | // E. Re-append any key-value strings, now canonical // <1.10><1.17> |
bd5b749c A |
1859 | _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString ); |
1860 | _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString ); | |
1861 | ||
1862 | // All done, return what we came up with. | |
1863 | outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII); | |
1864 | } | |
1865 | ||
1866 | return outStringRef; | |
1867 | } | |
1868 | ||
1869 | ||
1870 | CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) { | |
1871 | char inLocaleString[kLocaleIdentifierCStringMax]; | |
1872 | CFStringRef outStringRef = NULL; | |
1873 | ||
1874 | if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) { | |
1875 | KeyStringToResultString testEntry; | |
1876 | KeyStringToResultString * foundEntry; | |
1877 | char keyValueString[sizeof(inLocaleString)]; // <1.10> | |
1878 | char varKeyValueString[sizeof(inLocaleString)]; // <1.17> | |
1879 | ||
1880 | _GetKeyValueString(inLocaleString, keyValueString); // <1.10> | |
1881 | testEntry.result = NULL; | |
1882 | ||
1883 | // A. First check if input string matches an old-style Apple string that has a replacement | |
1884 | // (do this before case normalization) | |
1885 | testEntry.key = inLocaleString; | |
1886 | foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical, | |
1887 | sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); | |
1888 | if (foundEntry) { | |
1889 | // It does match, so replace old string with new // <1.10> | |
1890 | strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); | |
1891 | varKeyValueString[0] = 0; | |
1892 | } else { | |
1893 | char * langRegSubtag = NULL; | |
1894 | char * regionTag = NULL; | |
1895 | ||
1896 | // B. No match with an old-style string, use input string but update codes, normalize case, etc. | |
1897 | _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, ®ionTag, varKeyValueString); // <1.10><1.17> | |
1898 | ||
1899 | ||
1900 | // C. Now strip defaults that are implied by other fields. | |
1901 | ||
1902 | // 1. If an ISO 3166 region tag matches an ISO 3166 regional language variant subtag, strip the latter. | |
1903 | if ( langRegSubtag && regionTag && strncmp(langRegSubtag+1, regionTag+1, 2) == 0 ) { | |
1904 | _DeleteCharsAtPointer(langRegSubtag, 3); | |
1905 | } | |
1906 | ||
1907 | // 2. Strip defaults in input string based on final region tag in locale string | |
1908 | // (mainly for Chinese, to strip -Hans for _CN/_SG, -Hant for _TW/_HK/_MO) | |
1909 | if ( regionTag ) { | |
1910 | testEntry.key = regionTag; | |
1911 | foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringRegionToDefaults, kNumLocaleStringRegionToDefaults, | |
1912 | sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); | |
1913 | if (foundEntry) { | |
1914 | _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); | |
1915 | } | |
1916 | } | |
1917 | ||
1918 | // 3. Strip defaults in input string based on initial part of locale string | |
1919 | // (mainly to strip default script tag for a language) | |
1920 | testEntry.key = inLocaleString; | |
1921 | foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults, | |
1922 | sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey ); | |
1923 | if (foundEntry) { | |
1924 | // The input string begins with a character sequence for which | |
1925 | // there are default substrings which should be stripped if present | |
1926 | _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); | |
1927 | } | |
1928 | } | |
1929 | ||
1930 | // D. Re-append any key-value strings, now canonical // <1.10><1.17> | |
1931 | _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString ); | |
1932 | _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString ); | |
1933 | ||
1934 | // Now create the CFString (even if empty!) | |
1935 | outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII); | |
1936 | } | |
1937 | ||
1938 | return outStringRef; | |
1939 | } | |
1940 | ||
1941 | // CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes, based on | |
1942 | // the first part of the SPI CFBundleCopyLocalizationForLocalizationInfo in CFBundle_Resources.c | |
1943 | CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes(CFAllocatorRef allocator, LangCode lcode, RegionCode rcode) { | |
1944 | CFStringRef result = NULL; | |
1945 | if (0 <= rcode && rcode < kNumRegionCodeToLocaleString) { | |
1946 | const char *localeString = regionCodeToLocaleString[rcode]; | |
1947 | if (localeString != NULL && *localeString != '\0') { | |
1948 | result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull); | |
1949 | } | |
1950 | } | |
1951 | if (result) return result; | |
1952 | if (0 <= lcode && lcode < kNumLangCodeToLocaleString) { | |
1953 | const char *localeString = langCodeToLocaleString[lcode]; | |
1954 | if (localeString != NULL && *localeString != '\0') { | |
1955 | result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull); | |
1956 | } | |
1957 | } | |
1958 | return result; | |
1959 | } | |
1960 | ||
1961 | ||
cf7d2af9 A |
1962 | /* |
1963 | SPI: CFLocaleGetLanguageRegionEncodingForLocaleIdentifier gets the appropriate language and region codes, | |
1964 | and the default legacy script code and encoding, for the specified locale (or language) string. | |
1965 | Returns false if CFLocale has no information about the given locale (in which case none of the by-reference return values are set); | |
1966 | otherwise may set *langCode and/or *regCode to -1 if there is no appropriate legacy value for the locale. | |
1967 | This is a replacement for the CFBundle SPI CFBundleGetLocalizationInfoForLocalization (which was intended to be temporary and transitional); | |
1968 | this function is more up-to-date in its handling of locale strings, and is in CFLocale where this functionality should belong. Compared | |
1969 | to CFBundleGetLocalizationInfoForLocalization, this function does not spcially interpret a NULL localeIdentifier to mean use the single most | |
1970 | preferred localization in the current context (this function returns NO for a NULL localeIdentifier); and in this function | |
1971 | langCode, regCode, and scriptCode are all SInt16* (not SInt32* like the equivalent parameters in CFBundleGetLocalizationInfoForLocalization). | |
1972 | */ | |
856091c5 | 1973 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX |
cf7d2af9 | 1974 | static int CompareLocaleToLegacyCodesEntries( const void *entry1, const void *entry2 ); |
856091c5 | 1975 | #endif |
cf7d2af9 A |
1976 | |
1977 | Boolean CFLocaleGetLanguageRegionEncodingForLocaleIdentifier(CFStringRef localeIdentifier, LangCode *langCode, RegionCode *regCode, ScriptCode *scriptCode, CFStringEncoding *stringEncoding) { | |
856091c5 | 1978 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX |
cf7d2af9 A |
1979 | Boolean returnValue = false; |
1980 | CFStringRef canonicalIdentifier = CFLocaleCreateCanonicalLocaleIdentifierFromString(NULL, localeIdentifier); | |
1981 | if (canonicalIdentifier) { | |
1982 | char localeCString[kLocaleIdentifierCStringMax]; | |
1983 | if ( CFStringGetCString(canonicalIdentifier, localeCString, sizeof(localeCString), kCFStringEncodingASCII) ) { | |
1984 | UErrorCode icuStatus = U_ZERO_ERROR; | |
1985 | int32_t languagelength; | |
1986 | char searchString[ULOC_LANG_CAPACITY + ULOC_FULLNAME_CAPACITY]; | |
1987 | ||
1988 | languagelength = uloc_getLanguage( localeCString, searchString, ULOC_LANG_CAPACITY, &icuStatus ); | |
1989 | if ( U_SUCCESS(icuStatus) && languagelength > 0 ) { | |
1990 | // OK, here we have at least a language code, check for other components in order | |
1991 | LocaleToLegacyCodes searchEntry = { (const char *)searchString, 0, 0, 0 }; | |
1992 | const LocaleToLegacyCodes * foundEntryPtr; | |
1993 | int32_t componentLength; | |
1994 | char componentString[ULOC_FULLNAME_CAPACITY]; | |
1995 | ||
1996 | languagelength = strlen(searchString); // in case it got truncated | |
1997 | icuStatus = U_ZERO_ERROR; | |
1998 | componentLength = uloc_getScript( localeCString, componentString, sizeof(componentString), &icuStatus ); | |
1999 | if ( U_FAILURE(icuStatus) || componentLength == 0 ) { | |
2000 | icuStatus = U_ZERO_ERROR; | |
2001 | componentLength = uloc_getCountry( localeCString, componentString, sizeof(componentString), &icuStatus ); | |
2002 | if ( U_FAILURE(icuStatus) || componentLength == 0 ) { | |
2003 | icuStatus = U_ZERO_ERROR; | |
2004 | componentLength = uloc_getVariant( localeCString, componentString, sizeof(componentString), &icuStatus ); | |
2005 | if ( U_FAILURE(icuStatus) ) { | |
2006 | componentLength = 0; | |
2007 | } | |
2008 | } | |
2009 | } | |
2010 | ||
2011 | // Append whichever other component we first found | |
2012 | if (componentLength > 0) { | |
2013 | strlcat(searchString, "_", sizeof(searchString)); | |
2014 | strlcat(searchString, componentString, sizeof(searchString)); | |
2015 | } | |
2016 | ||
2017 | // Search | |
2018 | foundEntryPtr = (const LocaleToLegacyCodes *)bsearch( &searchEntry, localeToLegacyCodes, kNumLocaleToLegacyCodes, sizeof(LocaleToLegacyCodes), CompareLocaleToLegacyCodesEntries ); | |
2019 | if (foundEntryPtr == NULL && (int32_t) strlen(searchString) > languagelength) { | |
2020 | // truncate to language al;one and try again | |
2021 | searchString[languagelength] = 0; | |
2022 | foundEntryPtr = (const LocaleToLegacyCodes *)bsearch( &searchEntry, localeToLegacyCodes, kNumLocaleToLegacyCodes, sizeof(LocaleToLegacyCodes), CompareLocaleToLegacyCodesEntries ); | |
2023 | } | |
2024 | ||
2025 | // If found a matching entry, return requested values | |
2026 | if (foundEntryPtr) { | |
2027 | returnValue = true; | |
2028 | if (langCode) *langCode = foundEntryPtr->langCode; | |
2029 | if (regCode) *regCode = foundEntryPtr->regCode; | |
2030 | if (stringEncoding) *stringEncoding = foundEntryPtr->encoding; | |
2031 | if (scriptCode) { | |
2032 | // map CFStringEncoding to ScriptCode | |
2033 | if (foundEntryPtr->encoding < 33/*kCFStringEncodingMacSymbol*/) { | |
2034 | *scriptCode = foundEntryPtr->encoding; | |
2035 | } else { | |
2036 | switch (foundEntryPtr->encoding) { | |
2037 | case 0x8C/*kCFStringEncodingMacFarsi*/: *scriptCode = 4/*smArabic*/; break; | |
2038 | case 0x98/*kCFStringEncodingMacUkrainian*/: *scriptCode = 7/*smCyrillic*/; break; | |
2039 | case 0xEC/*kCFStringEncodingMacInuit*/: *scriptCode = 28/*smEthiopic*/; break; | |
2040 | case 0xFC/*kCFStringEncodingMacVT100*/: *scriptCode = 32/*smUninterp*/; break; | |
2041 | default: *scriptCode = 0/*smRoman*/; break; | |
2042 | } | |
2043 | } | |
2044 | } | |
2045 | } | |
2046 | } | |
2047 | } | |
2048 | CFRelease(canonicalIdentifier); | |
2049 | } | |
2050 | return returnValue; | |
856091c5 A |
2051 | #else |
2052 | return false; | |
2053 | #endif | |
cf7d2af9 A |
2054 | } |
2055 | ||
856091c5 | 2056 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX |
cf7d2af9 A |
2057 | static int CompareLocaleToLegacyCodesEntries( const void *entry1, const void *entry2 ) { |
2058 | const char * localeString1 = ((const LocaleToLegacyCodes *)entry1)->locale; | |
2059 | const char * localeString2 = ((const LocaleToLegacyCodes *)entry2)->locale; | |
2060 | return strcmp(localeString1, localeString2); | |
2061 | } | |
856091c5 | 2062 | #endif |
cf7d2af9 | 2063 | |
bd5b749c | 2064 | CFDictionaryRef CFLocaleCreateComponentsFromLocaleIdentifier(CFAllocatorRef allocator, CFStringRef localeID) { |
856091c5 A |
2065 | CFMutableDictionaryRef working = CFDictionaryCreateMutable(allocator, 10, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); |
2066 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX | |
bd5b749c A |
2067 | char cLocaleID[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
2068 | char buffer[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY]; | |
bd5b749c A |
2069 | |
2070 | UErrorCode icuStatus = U_ZERO_ERROR; | |
2071 | int32_t length = 0; | |
856091c5 A |
2072 | |
2073 | if (!localeID) goto out; | |
2074 | ||
bd5b749c A |
2075 | // Extract the C string locale ID, for ICU |
2076 | CFIndex outBytes = 0; | |
2077 | CFStringGetBytes(localeID, CFRangeMake(0, CFStringGetLength(localeID)), kCFStringEncodingASCII, (UInt8) '?', true, (unsigned char *)cLocaleID, sizeof(cLocaleID)/sizeof(char) - 1, &outBytes); | |
2078 | cLocaleID[outBytes] = '\0'; | |
2079 | ||
2080 | // Get the components | |
2081 | length = uloc_getLanguage(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); | |
2082 | if (U_SUCCESS(icuStatus) && length > 0) | |
2083 | { | |
2084 | CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); | |
cf7d2af9 | 2085 | CFDictionaryAddValue(working, kCFLocaleLanguageCodeKey, string); |
bd5b749c A |
2086 | CFRelease(string); |
2087 | } | |
2088 | icuStatus = U_ZERO_ERROR; | |
2089 | ||
2090 | length = uloc_getScript(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); | |
2091 | if (U_SUCCESS(icuStatus) && length > 0) | |
2092 | { | |
2093 | CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); | |
cf7d2af9 | 2094 | CFDictionaryAddValue(working, kCFLocaleScriptCodeKey, string); |
bd5b749c A |
2095 | CFRelease(string); |
2096 | } | |
2097 | icuStatus = U_ZERO_ERROR; | |
2098 | ||
2099 | length = uloc_getCountry(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); | |
2100 | if (U_SUCCESS(icuStatus) && length > 0) | |
2101 | { | |
2102 | CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); | |
cf7d2af9 | 2103 | CFDictionaryAddValue(working, kCFLocaleCountryCodeKey, string); |
bd5b749c A |
2104 | CFRelease(string); |
2105 | } | |
2106 | icuStatus = U_ZERO_ERROR; | |
2107 | ||
2108 | length = uloc_getVariant(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); | |
2109 | if (U_SUCCESS(icuStatus) && length > 0) | |
2110 | { | |
2111 | CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); | |
cf7d2af9 | 2112 | CFDictionaryAddValue(working, kCFLocaleVariantCodeKey, string); |
bd5b749c A |
2113 | CFRelease(string); |
2114 | } | |
2115 | icuStatus = U_ZERO_ERROR; | |
2116 | ||
2117 | // Now get the keywords; open an enumerator on them | |
2118 | UEnumeration *iter = uloc_openKeywords(cLocaleID, &icuStatus); | |
2119 | const char *locKey = NULL; | |
2120 | int32_t locKeyLen = 0; | |
2121 | while ((locKey = uenum_next(iter, &locKeyLen, &icuStatus)) && U_SUCCESS(icuStatus)) | |
2122 | { | |
2123 | char locValue[ULOC_KEYWORD_AND_VALUES_CAPACITY]; | |
2124 | ||
2125 | // Get the value for this keyword | |
2126 | if (uloc_getKeywordValue(cLocaleID, locKey, locValue, sizeof(locValue)/sizeof(char), &icuStatus) > 0 | |
2127 | && U_SUCCESS(icuStatus)) | |
2128 | { | |
2129 | CFStringRef key = CFStringCreateWithBytes(allocator, (UInt8 *)locKey, strlen(locKey), kCFStringEncodingASCII, true); | |
2130 | CFStringRef value = CFStringCreateWithBytes(allocator, (UInt8 *)locValue, strlen(locValue), kCFStringEncodingASCII, true); | |
2131 | if (key && value) | |
2132 | CFDictionaryAddValue(working, key, value); | |
2133 | if (key) | |
2134 | CFRelease(key); | |
2135 | if (value) | |
2136 | CFRelease(value); | |
2137 | } | |
2138 | } | |
2139 | uenum_close(iter); | |
2140 | ||
856091c5 A |
2141 | out:; |
2142 | #endif | |
bd5b749c A |
2143 | // Convert to an immutable dictionary and return |
2144 | CFDictionaryRef result = CFDictionaryCreateCopy(allocator, working); | |
2145 | CFRelease(working); | |
2146 | return result; | |
2147 | } | |
2148 | ||
cf7d2af9 A |
2149 | static char *__CStringFromString(CFStringRef str) { |
2150 | if (!str) return NULL; | |
2151 | CFRange rg = CFRangeMake(0, CFStringGetLength(str)); | |
2152 | CFIndex neededLength = 0; | |
2153 | CFStringGetBytes(str, rg, kCFStringEncodingASCII, (UInt8)'?', false, NULL, 0, &neededLength); | |
2154 | char *buf = (char *)malloc(neededLength + 1); | |
2155 | CFStringGetBytes(str, rg, kCFStringEncodingASCII, (UInt8)'?', false, (uint8_t *)buf, neededLength, &neededLength); | |
2156 | buf[neededLength] = '\0'; | |
2157 | return buf; | |
bd5b749c A |
2158 | } |
2159 | ||
2160 | CFStringRef CFLocaleCreateLocaleIdentifierFromComponents(CFAllocatorRef allocator, CFDictionaryRef dictionary) { | |
856091c5 A |
2161 | if (!dictionary) return NULL; |
2162 | ||
cf7d2af9 A |
2163 | CFIndex cnt = CFDictionaryGetCount(dictionary); |
2164 | STACK_BUFFER_DECL(CFStringRef, values, cnt); | |
2165 | STACK_BUFFER_DECL(CFStringRef, keys, cnt); | |
2166 | CFDictionaryGetKeysAndValues(dictionary, (const void **)keys, (const void **)values); | |
2167 | ||
2168 | char *language = NULL, *script = NULL, *country = NULL, *variant = NULL; | |
2169 | for (CFIndex idx = 0; idx < cnt; idx++) { | |
2170 | if (CFEqual(kCFLocaleLanguageCodeKey, keys[idx])) { | |
2171 | language = __CStringFromString(values[idx]); | |
2172 | keys[idx] = NULL; | |
2173 | } else if (CFEqual(kCFLocaleScriptCodeKey, keys[idx])) { | |
2174 | script = __CStringFromString(values[idx]); | |
2175 | keys[idx] = NULL; | |
2176 | } else if (CFEqual(kCFLocaleCountryCodeKey, keys[idx])) { | |
2177 | country = __CStringFromString(values[idx]); | |
2178 | keys[idx] = NULL; | |
2179 | } else if (CFEqual(kCFLocaleVariantCodeKey, keys[idx])) { | |
2180 | variant = __CStringFromString(values[idx]); | |
2181 | keys[idx] = NULL; | |
2182 | } | |
bd5b749c A |
2183 | } |
2184 | ||
cf7d2af9 A |
2185 | char *buf1 = NULL; // (|L)(|_S)(|_C|_C_V|__V) |
2186 | asprintf(&buf1, "%s%s%s%s%s%s%s", language ? language : "", script ? "_" : "", script ? script : "", (country || variant ? "_" : ""), country ? country : "", variant ? "_" : "", variant ? variant : ""); | |
2187 | ||
2188 | char cLocaleID[2 * ULOC_FULLNAME_CAPACITY + 2 * ULOC_KEYWORD_AND_VALUES_CAPACITY]; | |
2189 | strlcpy(cLocaleID, buf1, sizeof(cLocaleID)); | |
2190 | free(language); | |
2191 | free(script); | |
2192 | free(country); | |
2193 | free(variant); | |
2194 | free(buf1); | |
2195 | ||
856091c5 | 2196 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX |
cf7d2af9 A |
2197 | for (CFIndex idx = 0; idx < cnt; idx++) { |
2198 | if (keys[idx]) { | |
2199 | char *key = __CStringFromString(keys[idx]); | |
8ca704e1 A |
2200 | char *value; |
2201 | if (0 == strcmp(key, "kCFLocaleCalendarKey")) { | |
2202 | // For interchangeability convenience, we alternatively allow a | |
2203 | // calendar object to be passed in, with the alternate key, and | |
2204 | // we'll extract the identifier. | |
2205 | CFCalendarRef cal = (CFCalendarRef)values[idx]; | |
2206 | CFStringRef ident = CFCalendarGetIdentifier(cal); | |
2207 | value = __CStringFromString(ident); | |
2208 | char *oldkey = key; | |
2209 | key = strdup("calendar"); | |
2210 | free(oldkey); | |
2211 | } else { | |
2212 | value = __CStringFromString(values[idx]); | |
2213 | } | |
cf7d2af9 A |
2214 | UErrorCode status = U_ZERO_ERROR; |
2215 | uloc_setKeywordValue(key, value, cLocaleID, sizeof(cLocaleID), &status); | |
2216 | free(key); | |
2217 | free(value); | |
2218 | } | |
bd5b749c | 2219 | } |
856091c5 A |
2220 | #endif |
2221 | ||
cf7d2af9 | 2222 | return CFStringCreateWithCString(allocator, cLocaleID, kCFStringEncodingASCII); |
bd5b749c A |
2223 | } |
2224 |