]>
Commit | Line | Data |
---|---|---|
bd5b749c | 1 | /* |
8ca704e1 | 2 | * Copyright (c) 2011 Apple Inc. All rights reserved. |
bd5b749c A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
12 | * | |
13 | * The Original Code and all software distributed under the License are | |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
20 | * | |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
f64f9b69 | 23 | |
bd5b749c A |
24 | /* |
25 | CFLocaleIdentifier.c | |
8ca704e1 A |
26 | Copyright (c) 2002-2011, Apple Inc. All rights reserved. |
27 | Responsibility: David Smith | |
bd5b749c A |
28 | |
29 | CFLocaleIdentifier.c defines | |
30 | - enum value kLocaleIdentifierCStringMax | |
31 | - structs KeyStringToResultString, SpecialCaseUpdates | |
32 | and provides the following data for the functions | |
33 | CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes, | |
34 | CFLocaleCreateCanonicalLocaleIdentifierFromString | |
35 | CFLocaleCreateCanonicalLanguageIdentifierFromString | |
36 | ||
37 | 1. static const char * regionCodeToLocaleString[]; enum kNumRegionCodeToLocaleString; | |
38 | map RegionCode 0..kNumRegionCodeToLocaleString-1 to canonical locale string | |
39 | ||
40 | 2. static const char * langCodeToLocaleString[]; enum kNumLangCodeToLocaleString; | |
41 | map LangCode 0..kNumLangCodeToLocaleString-1 to canonical locale string | |
42 | ||
43 | 3. static const KeyStringToResultString oldAppleLocaleToCanonical[]; enum kNumOldAppleLocaleToCanonical; | |
44 | map old Apple string oldAppleLocaleToCanonical[n].key | |
45 | to canonical locale string oldAppleLocaleToCanonical[n].result | |
46 | for n = 0..kNumOldAppleLocaleToCanonical-1 | |
47 | ||
48 | 4. static const KeyStringToResultString localeStringPrefixToCanonical[]; enum kNumLocaleStringPrefixToCanonical; | |
49 | map non-canonical language prefix (3-letter, obsolete) localeStringPrefixToCanonical[].key | |
50 | to updated replacement localeStringPrefixToCanonical[].result | |
51 | for n = 0..kNumLocaleStringPrefixToCanonical-1 | |
52 | ||
53 | 5. static const SpecialCaseUpdates specialCases[]; | |
54 | various special cases for updating region codes, or for updating language codes based on region codes | |
55 | ||
56 | 6. static const KeyStringToResultString localeStringRegionToDefaults[]; enum kNumLocaleStringRegionToDefaults; | |
57 | map locale string region tag localeStringRegionToDefaults[n].key | |
58 | to default substrings to delete localeStringRegionToDefaults[n].result | |
59 | for n = 0..kNumLocaleStringRegionToDefaults-1 | |
60 | ||
61 | 7. static const KeyStringToResultString localeStringPrefixToDefaults[]; enum kNumLocaleStringPrefixToDefaults; | |
62 | map locale string initial part localeStringPrefixToDefaults[n].key | |
63 | to default substrings to delete localeStringPrefixToDefaults[n].result | |
64 | for n = 0..kNumLocaleStringPrefixToDefaults-1 | |
65 | ||
66 | 8. static const KeyStringToResultString appleLocaleToLanguageString[]; enum kNumAppleLocaleToLanguageString; | |
67 | map Apple locale string appleLocaleToLanguageString[].key | |
68 | to equivalent language string appleLocaleToLanguageString[].result | |
69 | for n = 0..kNumAppleLocaleToLanguageString-1 | |
70 | ||
71 | */ | |
72 | ||
73 | #include <CoreFoundation/CFString.h> | |
8ca704e1 | 74 | #include <CoreFoundation/CFCalendar.h> |
bd5b749c A |
75 | #include <ctype.h> |
76 | #include <string.h> | |
77 | #include <stdlib.h> | |
8ca704e1 | 78 | #include <stdio.h> |
bd5b749c | 79 | #include <unicode/uloc.h> |
cf7d2af9 A |
80 | #include "CFInternal.h" |
81 | #include "CFLocaleInternal.h" | |
bd5b749c A |
82 | |
83 | // Max byte length of locale identifier (ASCII) as C string, including terminating null byte | |
84 | enum { | |
85 | kLocaleIdentifierCStringMax = ULOC_FULLNAME_CAPACITY + ULOC_KEYWORD_AND_VALUES_CAPACITY // currently 56 + 100 | |
86 | }; | |
87 | ||
88 | // KeyStringToResultString struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString | |
89 | struct KeyStringToResultString { | |
90 | const char * key; | |
91 | const char * result; | |
92 | }; | |
93 | typedef struct KeyStringToResultString KeyStringToResultString; | |
94 | ||
95 | // SpecialCaseUpdates struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString | |
96 | struct SpecialCaseUpdates { | |
97 | const char * lang; | |
98 | const char * reg1; | |
99 | const char * update1; | |
100 | const char * reg2; | |
101 | const char * update2; | |
102 | }; | |
103 | typedef struct SpecialCaseUpdates SpecialCaseUpdates; | |
104 | ||
105 | ||
106 | static const char * const regionCodeToLocaleString[] = { | |
107 | // map RegionCode (array index) to canonical locale string | |
108 | // | |
109 | // canon. string region code; language code; [comment] [ # __CFBundleLocaleAbbreviationsArray | |
110 | // -------- ------------ ------------------ ------------ -------- string, if different ] | |
111 | "en_US", // 0 verUS; 0 langEnglish; | |
112 | "fr_FR", // 1 verFrance; 1 langFrench; | |
113 | "en_GB", // 2 verBritain; 0 langEnglish; | |
114 | "de_DE", // 3 verGermany; 2 langGerman; | |
115 | "it_IT", // 4 verItaly; 3 langItalian; | |
116 | "nl_NL", // 5 verNetherlands; 4 langDutch; | |
117 | "nl_BE", // 6 verFlemish; 34 langFlemish (redundant, =Dutch); | |
118 | "sv_SE", // 7 verSweden; 5 langSwedish; | |
119 | "es_ES", // 8 verSpain; 6 langSpanish; | |
120 | "da_DK", // 9 verDenmark; 7 langDanish; | |
121 | "pt_PT", // 10 verPortugal; 8 langPortuguese; | |
122 | "fr_CA", // 11 verFrCanada; 1 langFrench; | |
123 | "nb_NO", // 12 verNorway; 9 langNorwegian (Bokmal); # "no_NO" | |
124 | "he_IL", // 13 verIsrael; 10 langHebrew; | |
125 | "ja_JP", // 14 verJapan; 11 langJapanese; | |
126 | "en_AU", // 15 verAustralia; 0 langEnglish; | |
127 | "ar", // 16 verArabic; 12 langArabic; | |
128 | "fi_FI", // 17 verFinland; 13 langFinnish; | |
129 | "fr_CH", // 18 verFrSwiss; 1 langFrench; | |
130 | "de_CH", // 19 verGrSwiss; 2 langGerman; | |
131 | "el_GR", // 20 verGreece; 14 langGreek (modern)-Grek-mono; | |
132 | "is_IS", // 21 verIceland; 15 langIcelandic; | |
133 | "mt_MT", // 22 verMalta; 16 langMaltese; | |
134 | "el_CY", // 23 verCyprus; 14 langGreek?; el or tr? guess el # "" | |
135 | "tr_TR", // 24 verTurkey; 17 langTurkish; | |
136 | "hr_HR", // 25 verYugoCroatian; 18 langCroatian; * one-way mapping -> verCroatia | |
137 | "nl_NL", // 26 KCHR, Netherlands; 4 langDutch; * one-way mapping | |
138 | "nl_BE", // 27 KCHR, verFlemish; 34 langFlemish; * one-way mapping | |
139 | "_CA", // 28 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA" | |
140 | "_CA", // 29 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA" | |
141 | "pt_PT", // 30 KCHR, Portugal; 8 langPortuguese; * one-way mapping | |
142 | "nb_NO", // 31 KCHR, Norway; 9 langNorwegian (Bokmal); * one-way mapping # "no_NO" | |
143 | "da_DK", // 32 KCHR, Denmark; 7 langDanish; * one-way mapping | |
144 | "hi_IN", // 33 verIndiaHindi; 21 langHindi; | |
145 | "ur_PK", // 34 verPakistanUrdu; 20 langUrdu; | |
146 | "tr_TR", // 35 verTurkishModified; 17 langTurkish; * one-way mapping | |
147 | "it_CH", // 36 verItalianSwiss; 3 langItalian; | |
148 | "en_001", // 37 verInternational; 0 langEnglish; ASCII only # "en" | |
149 | NULL, // 38 *unassigned; -1 none; * one-way mapping # "" | |
150 | "ro_RO", // 39 verRomania; 37 langRomanian; | |
151 | "grc", // 40 verGreekAncient; 148 langGreekAncient -Grek-poly; # "el_GR" | |
152 | "lt_LT", // 41 verLithuania; 24 langLithuanian; | |
153 | "pl_PL", // 42 verPoland; 25 langPolish; | |
154 | "hu_HU", // 43 verHungary; 26 langHungarian; | |
155 | "et_EE", // 44 verEstonia; 27 langEstonian; | |
156 | "lv_LV", // 45 verLatvia; 28 langLatvian; | |
157 | "se", // 46 verSami; 29 langSami; | |
158 | "fo_FO", // 47 verFaroeIsl; 30 langFaroese; | |
159 | "fa_IR", // 48 verIran; 31 langFarsi/Persian; | |
160 | "ru_RU", // 49 verRussia; 32 langRussian; | |
161 | "ga_IE", // 50 verIreland; 35 langIrishGaelic (no dots); | |
162 | "ko_KR", // 51 verKorea; 23 langKorean; | |
163 | "zh_CN", // 52 verChina; 33 langSimpChinese; | |
164 | "zh_TW", // 53 verTaiwan; 19 langTradChinese; | |
165 | "th_TH", // 54 verThailand; 22 langThai; | |
166 | "und", // 55 verScriptGeneric; -1 none; # "" // <1.9> | |
167 | "cs_CZ", // 56 verCzech; 38 langCzech; | |
168 | "sk_SK", // 57 verSlovak; 39 langSlovak; | |
169 | "und", // 58 verEastAsiaGeneric; -1 none; * one-way mapping # "" // <1.9> | |
170 | "hu_HU", // 59 verMagyar; 26 langHungarian; * one-way mapping -> verHungary | |
171 | "bn", // 60 verBengali; 67 langBengali; _IN or _BD? guess generic | |
172 | "be_BY", // 61 verBelarus; 46 langBelorussian; | |
173 | "uk_UA", // 62 verUkraine; 45 langUkrainian; | |
174 | NULL, // 63 *unused; -1 none; * one-way mapping # "" | |
175 | "el_GR", // 64 verGreeceAlt; 14 langGreek (modern)-Grek-mono; * one-way mapping | |
cf7d2af9 | 176 | "sr_RS", // 65 verSerbian; 42 langSerbian -Cyrl; // <1.18> |
bd5b749c A |
177 | "sl_SI", // 66 verSlovenian; 40 langSlovenian; |
178 | "mk_MK", // 67 verMacedonian; 43 langMacedonian; | |
179 | "hr_HR", // 68 verCroatia; 18 langCroatian; | |
180 | NULL, // 69 *unused; -1 none; * one-way mapping # "" | |
181 | "de-1996", // 70 verGermanReformed; 2 langGerman; 1996 orthogr. # "de_DE" | |
182 | "pt_BR", // 71 verBrazil; 8 langPortuguese; | |
183 | "bg_BG", // 72 verBulgaria; 44 langBulgarian; | |
184 | "ca_ES", // 73 verCatalonia; 130 langCatalan; | |
185 | "mul", // 74 verMultilingual; -1 none; # "" | |
186 | "gd", // 75 verScottishGaelic; 144 langScottishGaelic; | |
187 | "gv", // 76 verManxGaelic; 145 langManxGaelic; | |
188 | "br", // 77 verBreton; 142 langBreton; | |
189 | "iu_CA", // 78 verNunavut; 143 langInuktitut -Cans; | |
190 | "cy", // 79 verWelsh; 128 langWelsh; | |
191 | "_CA", // 80 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA" | |
192 | "ga-Latg_IE", // 81 verIrishGaelicScrip; 146 langIrishGaelicScript -dots; # "ga_IE" // <xx> | |
193 | "en_CA", // 82 verEngCanada; 0 langEnglish; | |
194 | "dz_BT", // 83 verBhutan; 137 langDzongkha; | |
195 | "hy_AM", // 84 verArmenian; 51 langArmenian; | |
196 | "ka_GE", // 85 verGeorgian; 52 langGeorgian; | |
197 | "es_419", // 86 verSpLatinAmerica; 6 langSpanish; # "es" | |
198 | "es_ES", // 87 KCHR, Spain; 6 langSpanish; * one-way mapping | |
199 | "to_TO", // 88 verTonga; 147 langTongan; | |
200 | "pl_PL", // 89 KCHR, Poland; 25 langPolish; * one-way mapping | |
201 | "ca_ES", // 90 KCHR, Catalonia; 130 langCatalan; * one-way mapping | |
202 | "fr_001", // 91 verFrenchUniversal; 1 langFrench; | |
203 | "de_AT", // 92 verAustria; 2 langGerman; | |
204 | "es_419", // 93 > verSpLatinAmerica; 6 langSpanish; * one-way mapping # "es" | |
205 | "gu_IN", // 94 verGujarati; 69 langGujarati; | |
206 | "pa", // 95 verPunjabi; 70 langPunjabi; _IN or _PK? guess generic | |
207 | "ur_IN", // 96 verIndiaUrdu; 20 langUrdu; | |
208 | "vi_VN", // 97 verVietnam; 80 langVietnamese; | |
209 | "fr_BE", // 98 verFrBelgium; 1 langFrench; | |
210 | "uz_UZ", // 99 verUzbek; 47 langUzbek; | |
211 | "en_SG", // 100 verSingapore; 0 langEnglish?; en, zh, or ms? guess en # "" | |
212 | "nn_NO", // 101 verNynorsk; 151 langNynorsk; # "" | |
213 | "af_ZA", // 102 verAfrikaans; 141 langAfrikaans; | |
214 | "eo", // 103 verEsperanto; 94 langEsperanto; | |
215 | "mr_IN", // 104 verMarathi; 66 langMarathi; | |
216 | "bo", // 105 verTibetan; 63 langTibetan; | |
217 | "ne_NP", // 106 verNepal; 64 langNepali; | |
218 | "kl", // 107 verGreenland; 149 langGreenlandic; | |
219 | "en_IE", // 108 verIrelandEnglish; 0 langEnglish; # (no entry) | |
220 | }; | |
221 | enum { | |
222 | kNumRegionCodeToLocaleString = sizeof(regionCodeToLocaleString)/sizeof(char *) | |
223 | }; | |
224 | ||
225 | static const char * const langCodeToLocaleString[] = { | |
226 | // map LangCode (array index) to canonical locale string | |
227 | // | |
228 | // canon. string language code; [ comment] [ # __CFBundleLanguageAbbreviationsArray | |
229 | // -------- -------------- ---------- -------- string, if different ] | |
230 | "en", // 0 langEnglish; | |
231 | "fr", // 1 langFrench; | |
232 | "de", // 2 langGerman; | |
233 | "it", // 3 langItalian; | |
234 | "nl", // 4 langDutch; | |
235 | "sv", // 5 langSwedish; | |
236 | "es", // 6 langSpanish; | |
237 | "da", // 7 langDanish; | |
238 | "pt", // 8 langPortuguese; | |
239 | "nb", // 9 langNorwegian (Bokmal); # "no" | |
240 | "he", // 10 langHebrew -Hebr; | |
241 | "ja", // 11 langJapanese -Jpan; | |
242 | "ar", // 12 langArabic -Arab; | |
243 | "fi", // 13 langFinnish; | |
244 | "el", // 14 langGreek (modern)-Grek-mono; | |
245 | "is", // 15 langIcelandic; | |
246 | "mt", // 16 langMaltese -Latn; | |
247 | "tr", // 17 langTurkish -Latn; | |
248 | "hr", // 18 langCroatian; | |
249 | "zh-Hant", // 19 langTradChinese; # "zh" | |
250 | "ur", // 20 langUrdu -Arab; | |
251 | "hi", // 21 langHindi -Deva; | |
252 | "th", // 22 langThai -Thai; | |
253 | "ko", // 23 langKorean -Hang; | |
254 | "lt", // 24 langLithuanian; | |
255 | "pl", // 25 langPolish; | |
256 | "hu", // 26 langHungarian; | |
257 | "et", // 27 langEstonian; | |
258 | "lv", // 28 langLatvian; | |
259 | "se", // 29 langSami; | |
260 | "fo", // 30 langFaroese; | |
261 | "fa", // 31 langFarsi/Persian -Arab; | |
262 | "ru", // 32 langRussian -Cyrl; | |
263 | "zh-Hans", // 33 langSimpChinese; # "zh" | |
264 | "nl-BE", // 34 langFlemish (redundant, =Dutch); # "nl" | |
265 | "ga", // 35 langIrishGaelic (no dots); | |
266 | "sq", // 36 langAlbanian; no region codes | |
267 | "ro", // 37 langRomanian; | |
268 | "cs", // 38 langCzech; | |
269 | "sk", // 39 langSlovak; | |
270 | "sl", // 40 langSlovenian; | |
271 | "yi", // 41 langYiddish -Hebr; no region codes | |
272 | "sr", // 42 langSerbian -Cyrl; | |
273 | "mk", // 43 langMacedonian -Cyrl; | |
274 | "bg", // 44 langBulgarian -Cyrl; | |
275 | "uk", // 45 langUkrainian -Cyrl; | |
276 | "be", // 46 langBelorussian -Cyrl; | |
277 | "uz-Cyrl", // 47 langUzbek -Cyrl; also -Latn, -Arab | |
278 | "kk", // 48 langKazakh -Cyrl; no region codes; also -Latn, -Arab | |
279 | "az-Cyrl", // 49 langAzerbaijani -Cyrl; no region codes # "az" | |
280 | "az-Arab", // 50 langAzerbaijanAr -Arab; no region codes # "az" | |
281 | "hy", // 51 langArmenian -Armn; | |
282 | "ka", // 52 langGeorgian -Geor; | |
283 | "mo", // 53 langMoldavian -Cyrl; no region codes | |
284 | "ky", // 54 langKirghiz -Cyrl; no region codes; also -Latn, -Arab | |
285 | "tg-Cyrl", // 55 langTajiki -Cyrl; no region codes; also -Latn, -Arab | |
286 | "tk-Cyrl", // 56 langTurkmen -Cyrl; no region codes; also -Latn, -Arab | |
287 | "mn-Mong", // 57 langMongolian -Mong; no region codes # "mn" | |
288 | "mn-Cyrl", // 58 langMongolianCyr -Cyrl; no region codes # "mn" | |
289 | "ps", // 59 langPashto -Arab; no region codes | |
290 | "ku", // 60 langKurdish -Arab; no region codes | |
291 | "ks", // 61 langKashmiri -Arab; no region codes | |
292 | "sd", // 62 langSindhi -Arab; no region codes | |
293 | "bo", // 63 langTibetan -Tibt; | |
294 | "ne", // 64 langNepali -Deva; | |
295 | "sa", // 65 langSanskrit -Deva; no region codes | |
296 | "mr", // 66 langMarathi -Deva; | |
297 | "bn", // 67 langBengali -Beng; | |
298 | "as", // 68 langAssamese -Beng; no region codes | |
299 | "gu", // 69 langGujarati -Gujr; | |
300 | "pa", // 70 langPunjabi -Guru; | |
301 | "or", // 71 langOriya -Orya; no region codes | |
302 | "ml", // 72 langMalayalam -Mlym; no region codes | |
303 | "kn", // 73 langKannada -Knda; no region codes | |
304 | "ta", // 74 langTamil -Taml; no region codes | |
305 | "te", // 75 langTelugu -Telu; no region codes | |
306 | "si", // 76 langSinhalese -Sinh; no region codes | |
307 | "my", // 77 langBurmese -Mymr; no region codes | |
308 | "km", // 78 langKhmer -Khmr; no region codes | |
309 | "lo", // 79 langLao -Laoo; no region codes | |
310 | "vi", // 80 langVietnamese -Latn; | |
311 | "id", // 81 langIndonesian -Latn; no region codes | |
312 | "tl", // 82 langTagalog -Latn; no region codes | |
313 | "ms", // 83 langMalayRoman -Latn; no region codes # "ms" | |
314 | "ms-Arab", // 84 langMalayArabic -Arab; no region codes # "ms" | |
315 | "am", // 85 langAmharic -Ethi; no region codes | |
316 | "ti", // 86 langTigrinya -Ethi; no region codes | |
317 | "om", // 87 langOromo -Ethi; no region codes | |
318 | "so", // 88 langSomali -Latn; no region codes | |
319 | "sw", // 89 langSwahili -Latn; no region codes | |
320 | "rw", // 90 langKinyarwanda -Latn; no region codes | |
321 | "rn", // 91 langRundi -Latn; no region codes | |
322 | "ny", // 92 langNyanja/Chewa -Latn; no region codes # "" | |
323 | "mg", // 93 langMalagasy -Latn; no region codes | |
324 | "eo", // 94 langEsperanto -Latn; | |
325 | NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 95 to 105 (gap) | |
326 | NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 106 to 116 (gap) | |
327 | NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 107 to 117 (gap) | |
328 | "cy", // 128 langWelsh -Latn; | |
329 | "eu", // 129 langBasque -Latn; no region codes | |
330 | "ca", // 130 langCatalan -Latn; | |
331 | "la", // 131 langLatin -Latn; no region codes | |
332 | "qu", // 132 langQuechua -Latn; no region codes | |
333 | "gn", // 133 langGuarani -Latn; no region codes | |
334 | "ay", // 134 langAymara -Latn; no region codes | |
335 | "tt-Cyrl", // 135 langTatar -Cyrl; no region codes | |
336 | "ug", // 136 langUighur -Arab; no region codes | |
337 | "dz", // 137 langDzongkha -Tibt; | |
338 | "jv", // 138 langJavaneseRom -Latn; no region codes | |
339 | "su", // 139 langSundaneseRom -Latn; no region codes | |
340 | "gl", // 140 langGalician -Latn; no region codes | |
341 | "af", // 141 langAfrikaans -Latn; | |
342 | "br", // 142 langBreton -Latn; | |
343 | "iu", // 143 langInuktitut -Cans; | |
344 | "gd", // 144 langScottishGaelic; | |
345 | "gv", // 145 langManxGaelic -Latn; | |
346 | "ga-Latg", // 146 langIrishGaelicScript -Latn-dots; # "ga" // <xx> | |
347 | "to", // 147 langTongan -Latn; | |
348 | "grc", // 148 langGreekAncient -Grek-poly; # "el" | |
349 | "kl", // 149 langGreenlandic -Latn; | |
350 | "az-Latn", // 150 langAzerbaijanRoman -Latn; no region codes # "az" | |
351 | "nn", // 151 langNynorsk -Latn; # (no entry) | |
352 | }; | |
353 | enum { | |
354 | kNumLangCodeToLocaleString = sizeof(langCodeToLocaleString)/sizeof(char *) | |
355 | }; | |
356 | ||
357 | static const KeyStringToResultString oldAppleLocaleToCanonical[] = { | |
358 | // Map obsolete/old-style Apple strings to canonical | |
359 | // Must be sorted according to how strcmp compares the strings in the first column | |
360 | // | |
361 | // non-canonical canonical [ comment ] # source/reason for non-canonical string | |
362 | // string string | |
363 | // ------------- --------- | |
364 | { "Afrikaans", "af" }, // # __CFBundleLanguageNamesArray | |
365 | { "Albanian", "sq" }, // # __CFBundleLanguageNamesArray | |
366 | { "Amharic", "am" }, // # __CFBundleLanguageNamesArray | |
367 | { "Arabic", "ar" }, // # __CFBundleLanguageNamesArray | |
368 | { "Armenian", "hy" }, // # __CFBundleLanguageNamesArray | |
369 | { "Assamese", "as" }, // # __CFBundleLanguageNamesArray | |
370 | { "Aymara", "ay" }, // # __CFBundleLanguageNamesArray | |
371 | { "Azerbaijani", "az" }, // -Arab,-Cyrl,-Latn? # __CFBundleLanguageNamesArray (had 3 entries "Azerbaijani" for "az-Arab", "az-Cyrl", "az-Latn") | |
372 | { "Basque", "eu" }, // # __CFBundleLanguageNamesArray | |
373 | { "Belarusian", "be" }, // # handle other names | |
374 | { "Belorussian", "be" }, // # handle other names | |
375 | { "Bengali", "bn" }, // # __CFBundleLanguageNamesArray | |
376 | { "Brazilian Portugese", "pt-BR" }, // # from Installer.app Info.plist IFLanguages key, misspelled | |
377 | { "Brazilian Portuguese", "pt-BR" }, // # correct spelling for above | |
378 | { "Breton", "br" }, // # __CFBundleLanguageNamesArray | |
379 | { "Bulgarian", "bg" }, // # __CFBundleLanguageNamesArray | |
380 | { "Burmese", "my" }, // # __CFBundleLanguageNamesArray | |
381 | { "Byelorussian", "be" }, // # __CFBundleLanguageNamesArray | |
382 | { "Catalan", "ca" }, // # __CFBundleLanguageNamesArray | |
383 | { "Chewa", "ny" }, // # handle other names | |
384 | { "Chichewa", "ny" }, // # handle other names | |
385 | { "Chinese", "zh" }, // -Hans,-Hant? # __CFBundleLanguageNamesArray (had 2 entries "Chinese" for "zh-Hant", "zh-Hans") | |
386 | { "Chinese, Simplified", "zh-Hans" }, // # from Installer.app Info.plist IFLanguages key | |
387 | { "Chinese, Traditional", "zh-Hant" }, // # correct spelling for below | |
388 | { "Chinese, Tradtional", "zh-Hant" }, // # from Installer.app Info.plist IFLanguages key, misspelled | |
389 | { "Croatian", "hr" }, // # __CFBundleLanguageNamesArray | |
390 | { "Czech", "cs" }, // # __CFBundleLanguageNamesArray | |
391 | { "Danish", "da" }, // # __CFBundleLanguageNamesArray | |
392 | { "Dutch", "nl" }, // # __CFBundleLanguageNamesArray (had 2 entries "Dutch" for "nl", "nl-BE") | |
393 | { "Dzongkha", "dz" }, // # __CFBundleLanguageNamesArray | |
394 | { "English", "en" }, // # __CFBundleLanguageNamesArray | |
395 | { "Esperanto", "eo" }, // # __CFBundleLanguageNamesArray | |
396 | { "Estonian", "et" }, // # __CFBundleLanguageNamesArray | |
397 | { "Faroese", "fo" }, // # __CFBundleLanguageNamesArray | |
398 | { "Farsi", "fa" }, // # __CFBundleLanguageNamesArray | |
399 | { "Finnish", "fi" }, // # __CFBundleLanguageNamesArray | |
400 | { "Flemish", "nl-BE" }, // # handle other names | |
401 | { "French", "fr" }, // # __CFBundleLanguageNamesArray | |
402 | { "Galician", "gl" }, // # __CFBundleLanguageNamesArray | |
403 | { "Gallegan", "gl" }, // # handle other names | |
404 | { "Georgian", "ka" }, // # __CFBundleLanguageNamesArray | |
405 | { "German", "de" }, // # __CFBundleLanguageNamesArray | |
406 | { "Greek", "el" }, // # __CFBundleLanguageNamesArray (had 2 entries "Greek" for "el", "grc") | |
407 | { "Greenlandic", "kl" }, // # __CFBundleLanguageNamesArray | |
408 | { "Guarani", "gn" }, // # __CFBundleLanguageNamesArray | |
409 | { "Gujarati", "gu" }, // # __CFBundleLanguageNamesArray | |
410 | { "Hawaiian", "haw" }, // # handle new languages | |
411 | { "Hebrew", "he" }, // # __CFBundleLanguageNamesArray | |
412 | { "Hindi", "hi" }, // # __CFBundleLanguageNamesArray | |
413 | { "Hungarian", "hu" }, // # __CFBundleLanguageNamesArray | |
414 | { "Icelandic", "is" }, // # __CFBundleLanguageNamesArray | |
415 | { "Indonesian", "id" }, // # __CFBundleLanguageNamesArray | |
416 | { "Inuktitut", "iu" }, // # __CFBundleLanguageNamesArray | |
417 | { "Irish", "ga" }, // # __CFBundleLanguageNamesArray (had 2 entries "Irish" for "ga", "ga-dots") | |
418 | { "Italian", "it" }, // # __CFBundleLanguageNamesArray | |
419 | { "Japanese", "ja" }, // # __CFBundleLanguageNamesArray | |
420 | { "Javanese", "jv" }, // # __CFBundleLanguageNamesArray | |
421 | { "Kalaallisut", "kl" }, // # handle other names | |
422 | { "Kannada", "kn" }, // # __CFBundleLanguageNamesArray | |
423 | { "Kashmiri", "ks" }, // # __CFBundleLanguageNamesArray | |
424 | { "Kazakh", "kk" }, // # __CFBundleLanguageNamesArray | |
425 | { "Khmer", "km" }, // # __CFBundleLanguageNamesArray | |
426 | { "Kinyarwanda", "rw" }, // # __CFBundleLanguageNamesArray | |
427 | { "Kirghiz", "ky" }, // # __CFBundleLanguageNamesArray | |
428 | { "Korean", "ko" }, // # __CFBundleLanguageNamesArray | |
429 | { "Kurdish", "ku" }, // # __CFBundleLanguageNamesArray | |
430 | { "Lao", "lo" }, // # __CFBundleLanguageNamesArray | |
431 | { "Latin", "la" }, // # __CFBundleLanguageNamesArray | |
432 | { "Latvian", "lv" }, // # __CFBundleLanguageNamesArray | |
433 | { "Lithuanian", "lt" }, // # __CFBundleLanguageNamesArray | |
434 | { "Macedonian", "mk" }, // # __CFBundleLanguageNamesArray | |
435 | { "Malagasy", "mg" }, // # __CFBundleLanguageNamesArray | |
436 | { "Malay", "ms" }, // -Latn,-Arab? # __CFBundleLanguageNamesArray (had 2 entries "Malay" for "ms-Latn", "ms-Arab") | |
437 | { "Malayalam", "ml" }, // # __CFBundleLanguageNamesArray | |
438 | { "Maltese", "mt" }, // # __CFBundleLanguageNamesArray | |
439 | { "Manx", "gv" }, // # __CFBundleLanguageNamesArray | |
440 | { "Marathi", "mr" }, // # __CFBundleLanguageNamesArray | |
441 | { "Moldavian", "mo" }, // # __CFBundleLanguageNamesArray | |
442 | { "Mongolian", "mn" }, // -Mong,-Cyrl? # __CFBundleLanguageNamesArray (had 2 entries "Mongolian" for "mn-Mong", "mn-Cyrl") | |
443 | { "Nepali", "ne" }, // # __CFBundleLanguageNamesArray | |
444 | { "Norwegian", "nb" }, // # __CFBundleLanguageNamesArray (had "Norwegian" mapping to "no") | |
445 | { "Nyanja", "ny" }, // # __CFBundleLanguageNamesArray | |
446 | { "Nynorsk", "nn" }, // # handle other names (no entry in __CFBundleLanguageNamesArray) | |
447 | { "Oriya", "or" }, // # __CFBundleLanguageNamesArray | |
448 | { "Oromo", "om" }, // # __CFBundleLanguageNamesArray | |
449 | { "Panjabi", "pa" }, // # handle other names | |
450 | { "Pashto", "ps" }, // # __CFBundleLanguageNamesArray | |
451 | { "Persian", "fa" }, // # handle other names | |
452 | { "Polish", "pl" }, // # __CFBundleLanguageNamesArray | |
453 | { "Portuguese", "pt" }, // # __CFBundleLanguageNamesArray | |
454 | { "Portuguese, Brazilian", "pt-BR" }, // # handle other names | |
455 | { "Punjabi", "pa" }, // # __CFBundleLanguageNamesArray | |
456 | { "Pushto", "ps" }, // # handle other names | |
457 | { "Quechua", "qu" }, // # __CFBundleLanguageNamesArray | |
458 | { "Romanian", "ro" }, // # __CFBundleLanguageNamesArray | |
459 | { "Ruanda", "rw" }, // # handle other names | |
460 | { "Rundi", "rn" }, // # __CFBundleLanguageNamesArray | |
461 | { "Russian", "ru" }, // # __CFBundleLanguageNamesArray | |
462 | { "Sami", "se" }, // # __CFBundleLanguageNamesArray | |
463 | { "Sanskrit", "sa" }, // # __CFBundleLanguageNamesArray | |
464 | { "Scottish", "gd" }, // # __CFBundleLanguageNamesArray | |
465 | { "Serbian", "sr" }, // # __CFBundleLanguageNamesArray | |
466 | { "Simplified Chinese", "zh-Hans" }, // # handle other names | |
467 | { "Sindhi", "sd" }, // # __CFBundleLanguageNamesArray | |
468 | { "Sinhalese", "si" }, // # __CFBundleLanguageNamesArray | |
469 | { "Slovak", "sk" }, // # __CFBundleLanguageNamesArray | |
470 | { "Slovenian", "sl" }, // # __CFBundleLanguageNamesArray | |
471 | { "Somali", "so" }, // # __CFBundleLanguageNamesArray | |
472 | { "Spanish", "es" }, // # __CFBundleLanguageNamesArray | |
473 | { "Sundanese", "su" }, // # __CFBundleLanguageNamesArray | |
474 | { "Swahili", "sw" }, // # __CFBundleLanguageNamesArray | |
475 | { "Swedish", "sv" }, // # __CFBundleLanguageNamesArray | |
476 | { "Tagalog", "tl" }, // # __CFBundleLanguageNamesArray | |
477 | { "Tajik", "tg" }, // # handle other names | |
478 | { "Tajiki", "tg" }, // # __CFBundleLanguageNamesArray | |
479 | { "Tamil", "ta" }, // # __CFBundleLanguageNamesArray | |
480 | { "Tatar", "tt" }, // # __CFBundleLanguageNamesArray | |
481 | { "Telugu", "te" }, // # __CFBundleLanguageNamesArray | |
482 | { "Thai", "th" }, // # __CFBundleLanguageNamesArray | |
483 | { "Tibetan", "bo" }, // # __CFBundleLanguageNamesArray | |
484 | { "Tigrinya", "ti" }, // # __CFBundleLanguageNamesArray | |
485 | { "Tongan", "to" }, // # __CFBundleLanguageNamesArray | |
486 | { "Traditional Chinese", "zh-Hant" }, // # handle other names | |
487 | { "Turkish", "tr" }, // # __CFBundleLanguageNamesArray | |
488 | { "Turkmen", "tk" }, // # __CFBundleLanguageNamesArray | |
489 | { "Uighur", "ug" }, // # __CFBundleLanguageNamesArray | |
490 | { "Ukrainian", "uk" }, // # __CFBundleLanguageNamesArray | |
491 | { "Urdu", "ur" }, // # __CFBundleLanguageNamesArray | |
492 | { "Uzbek", "uz" }, // # __CFBundleLanguageNamesArray | |
493 | { "Vietnamese", "vi" }, // # __CFBundleLanguageNamesArray | |
494 | { "Welsh", "cy" }, // # __CFBundleLanguageNamesArray | |
495 | { "Yiddish", "yi" }, // # __CFBundleLanguageNamesArray | |
496 | { "ar_??", "ar" }, // # from old MapScriptInfoAndISOCodes | |
497 | { "az.Ar", "az-Arab" }, // # from old LocaleRefGetPartString | |
498 | { "az.Cy", "az-Cyrl" }, // # from old LocaleRefGetPartString | |
499 | { "az.La", "az-Latn" }, // # from old LocaleRefGetPartString | |
500 | { "be_??", "be_BY" }, // # from old MapScriptInfoAndISOCodes | |
501 | { "bn_??", "bn" }, // # from old LocaleRefGetPartString | |
502 | { "bo_??", "bo" }, // # from old MapScriptInfoAndISOCodes | |
503 | { "br_??", "br" }, // # from old MapScriptInfoAndISOCodes | |
504 | { "cy_??", "cy" }, // # from old MapScriptInfoAndISOCodes | |
505 | { "de-96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9> | |
506 | { "de_96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9> | |
507 | { "de_??", "de-1996" }, // # from old MapScriptInfoAndISOCodes | |
508 | { "el.El-P", "grc" }, // # from old LocaleRefGetPartString | |
509 | { "en-ascii", "en_001" }, // # from earlier version of tables in this file! | |
510 | { "en_??", "en_001" }, // # from old MapScriptInfoAndISOCodes | |
511 | { "eo_??", "eo" }, // # from old MapScriptInfoAndISOCodes | |
512 | { "es_??", "es_419" }, // # from old MapScriptInfoAndISOCodes | |
513 | { "es_XL", "es_419" }, // # from earlier version of tables in this file! | |
514 | { "fr_??", "fr_001" }, // # from old MapScriptInfoAndISOCodes | |
515 | { "ga-dots", "ga-Latg" }, // # from earlier version of tables in this file! // <1.8> | |
516 | { "ga-dots_IE", "ga-Latg_IE" }, // # from earlier version of tables in this file! // <1.8> | |
517 | { "ga.Lg", "ga-Latg" }, // # from old LocaleRefGetPartString // <1.8> | |
518 | { "ga.Lg_IE", "ga-Latg_IE" }, // # from old LocaleRefGetPartString // <1.8> | |
519 | { "gd_??", "gd" }, // # from old MapScriptInfoAndISOCodes | |
520 | { "gv_??", "gv" }, // # from old MapScriptInfoAndISOCodes | |
521 | { "jv.La", "jv" }, // # logical extension // <1.9> | |
522 | { "jw.La", "jv" }, // # from old LocaleRefGetPartString | |
523 | { "kk.Cy", "kk" }, // # from old LocaleRefGetPartString | |
524 | { "kl.La", "kl" }, // # from old LocaleRefGetPartString | |
525 | { "kl.La_GL", "kl_GL" }, // # from old LocaleRefGetPartString // <1.9> | |
526 | { "lp_??", "se" }, // # from old MapScriptInfoAndISOCodes | |
527 | { "mk_??", "mk_MK" }, // # from old MapScriptInfoAndISOCodes | |
528 | { "mn.Cy", "mn-Cyrl" }, // # from old LocaleRefGetPartString | |
529 | { "mn.Mn", "mn-Mong" }, // # from old LocaleRefGetPartString | |
530 | { "ms.Ar", "ms-Arab" }, // # from old LocaleRefGetPartString | |
531 | { "ms.La", "ms" }, // # from old LocaleRefGetPartString | |
532 | { "nl-be", "nl-BE" }, // # from old LocaleRefGetPartString | |
533 | { "nl-be_BE", "nl_BE" }, // # from old LocaleRefGetPartString | |
cf7d2af9 A |
534 | { "no-NO", "nb-NO" }, // # not handled by localeStringPrefixToCanonical |
535 | { "no-NO_NO", "nb-NO_NO" }, // # not handled by localeStringPrefixToCanonical | |
bd5b749c A |
536 | // { "no-bok_NO", "nb_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical |
537 | // { "no-nyn_NO", "nn_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical | |
538 | // { "nya", "ny" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical | |
539 | { "pa_??", "pa" }, // # from old LocaleRefGetPartString | |
540 | { "sa.Dv", "sa" }, // # from old LocaleRefGetPartString | |
541 | { "sl_??", "sl_SI" }, // # from old MapScriptInfoAndISOCodes | |
cf7d2af9 | 542 | { "sr_??", "sr_RS" }, // # from old MapScriptInfoAndISOCodes // <1.18> |
bd5b749c A |
543 | { "su.La", "su" }, // # from old LocaleRefGetPartString |
544 | { "yi.He", "yi" }, // # from old LocaleRefGetPartString | |
545 | { "zh-simp", "zh-Hans" }, // # from earlier version of tables in this file! | |
546 | { "zh-trad", "zh-Hant" }, // # from earlier version of tables in this file! | |
547 | { "zh.Ha-S", "zh-Hans" }, // # from old LocaleRefGetPartString | |
548 | { "zh.Ha-S_CN", "zh_CN" }, // # from old LocaleRefGetPartString | |
549 | { "zh.Ha-T", "zh-Hant" }, // # from old LocaleRefGetPartString | |
550 | { "zh.Ha-T_TW", "zh_TW" }, // # from old LocaleRefGetPartString | |
551 | }; | |
552 | enum { | |
553 | kNumOldAppleLocaleToCanonical = sizeof(oldAppleLocaleToCanonical)/sizeof(KeyStringToResultString) | |
554 | }; | |
555 | ||
556 | static const KeyStringToResultString localeStringPrefixToCanonical[] = { | |
557 | // Map 3-letter & obsolete ISO 639 codes, plus obsolete RFC 3066 codes, to 2-letter ISO 639 code. | |
558 | // (special cases for 'sh' handled separately) | |
559 | // First column must be all lowercase; must be sorted according to how strcmp compares the strings in the first column. | |
560 | // | |
561 | // non-canonical canonical [ comment ] # source/reason for non-canonical string | |
562 | // prefix prefix | |
563 | // ------------- --------- | |
564 | ||
565 | { "afr", "af" }, // Afrikaans | |
566 | { "alb", "sq" }, // Albanian | |
567 | { "amh", "am" }, // Amharic | |
568 | { "ara", "ar" }, // Arabic | |
569 | { "arm", "hy" }, // Armenian | |
570 | { "asm", "as" }, // Assamese | |
571 | { "aym", "ay" }, // Aymara | |
572 | { "aze", "az" }, // Azerbaijani | |
573 | { "baq", "eu" }, // Basque | |
574 | { "bel", "be" }, // Belarusian | |
575 | { "ben", "bn" }, // Bengali | |
576 | { "bih", "bh" }, // Bihari | |
577 | { "bod", "bo" }, // Tibetan | |
578 | { "bos", "bs" }, // Bosnian | |
579 | { "bre", "br" }, // Breton | |
580 | { "bul", "bg" }, // Bulgarian | |
581 | { "bur", "my" }, // Burmese | |
582 | { "cat", "ca" }, // Catalan | |
583 | { "ces", "cs" }, // Czech | |
584 | { "che", "ce" }, // Chechen | |
585 | { "chi", "zh" }, // Chinese | |
586 | { "cor", "kw" }, // Cornish | |
587 | { "cos", "co" }, // Corsican | |
588 | { "cym", "cy" }, // Welsh | |
589 | { "cze", "cs" }, // Czech | |
590 | { "dan", "da" }, // Danish | |
591 | { "deu", "de" }, // German | |
592 | { "dut", "nl" }, // Dutch | |
593 | { "dzo", "dz" }, // Dzongkha | |
594 | { "ell", "el" }, // Greek, Modern (1453-) | |
595 | { "eng", "en" }, // English | |
596 | { "epo", "eo" }, // Esperanto | |
597 | { "est", "et" }, // Estonian | |
598 | { "eus", "eu" }, // Basque | |
599 | { "fao", "fo" }, // Faroese | |
600 | { "fas", "fa" }, // Persian | |
601 | { "fin", "fi" }, // Finnish | |
602 | { "fra", "fr" }, // French | |
603 | { "fre", "fr" }, // French | |
604 | { "geo", "ka" }, // Georgian | |
605 | { "ger", "de" }, // German | |
606 | { "gla", "gd" }, // Gaelic,Scottish | |
607 | { "gle", "ga" }, // Irish | |
608 | { "glg", "gl" }, // Gallegan | |
609 | { "glv", "gv" }, // Manx | |
610 | { "gre", "el" }, // Greek, Modern (1453-) | |
611 | { "grn", "gn" }, // Guarani | |
612 | { "guj", "gu" }, // Gujarati | |
613 | { "heb", "he" }, // Hebrew | |
614 | { "hin", "hi" }, // Hindi | |
615 | { "hrv", "hr" }, // Croatian | |
616 | { "hun", "hu" }, // Hungarian | |
617 | { "hye", "hy" }, // Armenian | |
618 | { "i-hak", "zh-hakka" }, // Hakka # deprecated RFC 3066 | |
619 | { "i-lux", "lb" }, // Luxembourgish # deprecated RFC 3066 | |
620 | { "i-navajo", "nv" }, // Navajo # deprecated RFC 3066 | |
621 | { "ice", "is" }, // Icelandic | |
622 | { "iku", "iu" }, // Inuktitut | |
623 | { "ile", "ie" }, // Interlingue | |
624 | { "in", "id" }, // Indonesian # deprecated 639 code in -> id (1989) | |
625 | { "ina", "ia" }, // Interlingua | |
626 | { "ind", "id" }, // Indonesian | |
627 | { "isl", "is" }, // Icelandic | |
628 | { "ita", "it" }, // Italian | |
629 | { "iw", "he" }, // Hebrew # deprecated 639 code iw -> he (1989) | |
630 | { "jav", "jv" }, // Javanese | |
631 | { "jaw", "jv" }, // Javanese # deprecated 639 code jaw -> jv (2001) | |
632 | { "ji", "yi" }, // Yiddish # deprecated 639 code ji -> yi (1989) | |
633 | { "jpn", "ja" }, // Japanese | |
634 | { "kal", "kl" }, // Kalaallisut | |
635 | { "kan", "kn" }, // Kannada | |
636 | { "kas", "ks" }, // Kashmiri | |
637 | { "kat", "ka" }, // Georgian | |
638 | { "kaz", "kk" }, // Kazakh | |
639 | { "khm", "km" }, // Khmer | |
640 | { "kin", "rw" }, // Kinyarwanda | |
641 | { "kir", "ky" }, // Kirghiz | |
642 | { "kor", "ko" }, // Korean | |
643 | { "kur", "ku" }, // Kurdish | |
644 | { "lao", "lo" }, // Lao | |
645 | { "lat", "la" }, // Latin | |
646 | { "lav", "lv" }, // Latvian | |
647 | { "lit", "lt" }, // Lithuanian | |
648 | { "ltz", "lb" }, // Letzeburgesch | |
649 | { "mac", "mk" }, // Macedonian | |
650 | { "mal", "ml" }, // Malayalam | |
651 | { "mar", "mr" }, // Marathi | |
652 | { "may", "ms" }, // Malay | |
653 | { "mkd", "mk" }, // Macedonian | |
654 | { "mlg", "mg" }, // Malagasy | |
655 | { "mlt", "mt" }, // Maltese | |
656 | { "mol", "mo" }, // Moldavian | |
657 | { "mon", "mn" }, // Mongolian | |
658 | { "msa", "ms" }, // Malay | |
659 | { "mya", "my" }, // Burmese | |
660 | { "nep", "ne" }, // Nepali | |
661 | { "nld", "nl" }, // Dutch | |
662 | { "nno", "nn" }, // Norwegian Nynorsk | |
663 | { "no", "nb" }, // Norwegian generic # ambiguous 639 code no -> nb | |
664 | { "no-bok", "nb" }, // Norwegian Bokmal # deprecated RFC 3066 tag - used in old LocaleRefGetPartString | |
665 | { "no-nyn", "nn" }, // Norwegian Nynorsk # deprecated RFC 3066 tag - used in old LocaleRefGetPartString | |
666 | { "nob", "nb" }, // Norwegian Bokmal | |
667 | { "nor", "nb" }, // Norwegian generic # ambiguous 639 code nor -> nb | |
668 | { "nya", "ny" }, // Nyanja/Chewa/Chichewa # 3-letter code used in old LocaleRefGetPartString | |
669 | { "oci", "oc" }, // Occitan/Provencal | |
670 | { "ori", "or" }, // Oriya | |
671 | { "orm", "om" }, // Oromo,Galla | |
672 | { "pan", "pa" }, // Panjabi | |
673 | { "per", "fa" }, // Persian | |
674 | { "pol", "pl" }, // Polish | |
675 | { "por", "pt" }, // Portuguese | |
676 | { "pus", "ps" }, // Pushto | |
677 | { "que", "qu" }, // Quechua | |
678 | { "roh", "rm" }, // Raeto-Romance | |
679 | { "ron", "ro" }, // Romanian | |
680 | { "rum", "ro" }, // Romanian | |
681 | { "run", "rn" }, // Rundi | |
682 | { "rus", "ru" }, // Russian | |
683 | { "san", "sa" }, // Sanskrit | |
684 | { "scc", "sr" }, // Serbian | |
685 | { "scr", "hr" }, // Croatian | |
686 | { "sin", "si" }, // Sinhalese | |
687 | { "slk", "sk" }, // Slovak | |
688 | { "slo", "sk" }, // Slovak | |
689 | { "slv", "sl" }, // Slovenian | |
690 | { "sme", "se" }, // Sami,Northern | |
691 | { "snd", "sd" }, // Sindhi | |
692 | { "som", "so" }, // Somali | |
693 | { "spa", "es" }, // Spanish | |
694 | { "sqi", "sq" }, // Albanian | |
695 | { "srp", "sr" }, // Serbian | |
696 | { "sun", "su" }, // Sundanese | |
697 | { "swa", "sw" }, // Swahili | |
698 | { "swe", "sv" }, // Swedish | |
699 | { "tam", "ta" }, // Tamil | |
700 | { "tat", "tt" }, // Tatar | |
701 | { "tel", "te" }, // Telugu | |
702 | { "tgk", "tg" }, // Tajik | |
703 | { "tgl", "tl" }, // Tagalog | |
704 | { "tha", "th" }, // Thai | |
705 | { "tib", "bo" }, // Tibetan | |
706 | { "tir", "ti" }, // Tigrinya | |
707 | { "ton", "to" }, // Tongan | |
708 | { "tuk", "tk" }, // Turkmen | |
709 | { "tur", "tr" }, // Turkish | |
710 | { "uig", "ug" }, // Uighur | |
711 | { "ukr", "uk" }, // Ukrainian | |
712 | { "urd", "ur" }, // Urdu | |
713 | { "uzb", "uz" }, // Uzbek | |
714 | { "vie", "vi" }, // Vietnamese | |
715 | { "wel", "cy" }, // Welsh | |
716 | { "yid", "yi" }, // Yiddish | |
717 | { "zho", "zh" }, // Chinese | |
718 | }; | |
719 | enum { | |
720 | kNumLocaleStringPrefixToCanonical = sizeof(localeStringPrefixToCanonical)/sizeof(KeyStringToResultString) | |
721 | }; | |
722 | ||
723 | ||
724 | static const SpecialCaseUpdates specialCases[] = { | |
725 | // Data for special cases | |
726 | // a) The 3166 code CS was used for Czechoslovakia until 1993, when that country split and the code was | |
727 | // replaced by CZ and SK. Then in 2003-07, the code YU (formerly designating all of Yugoslavia, then after | |
cf7d2af9 A |
728 | // the 1990s breakup just designating what is now Serbia and Montenegro) was changed to CS! Then after |
729 | // Serbia and Montenegro split, the code CS was replaced in 2006-09 with separate codes RS and ME. If we | |
730 | // see CS but a language of cs or sk, we change CS to CZ or SK. Otherwise, we change CS (and old YU) to RS. | |
bd5b749c A |
731 | // b) The 639 code sh for Serbo-Croatian was also replaced in the 1990s by separate codes hr and sr, and |
732 | // deprecated in 2000. We guess which one to map it to as follows: If there is a region tag of HR we use | |
cf7d2af9 | 733 | // hr; if there is a region tag of (now) RS we use sr; else we do not change it (not enough info). |
bd5b749c A |
734 | // c) There are other codes that have been updated without these issues (eg. TP to TL), plus among the |
735 | // "exceptionally reserved" codes some are just alternates for standard codes (eg. UK for GB). | |
736 | { NULL, "-UK", "GB", NULL, NULL }, // always change UK to GB (UK is "exceptionally reserved" to mean GB) | |
737 | { NULL, "-TP", "TL", NULL, NULL }, // always change TP to TL (East Timor, code changed 2002-05) | |
738 | { "cs", "-CS", "CZ", NULL, NULL }, // if language is cs, change CS (pre-1993 Czechoslovakia) to CZ (Czech Republic) | |
739 | { "sk", "-CS", "SK", NULL, NULL }, // if language is sk, change CS (pre-1993 Czechoslovakia) to SK (Slovakia) | |
cf7d2af9 A |
740 | { NULL, "-CS", "RS", NULL, NULL }, // otherwise map CS (assume Serbia+Montenegro) to RS (Serbia) |
741 | { NULL, "-YU", "RS", NULL, NULL }, // also map old YU (assume Serbia+Montenegro) to RS (Serbia) | |
742 | { "sh", "-HR", "hr", "-RS", "sr" }, // then if language is old 'sh' (SerboCroatian), change it to 'hr' (Croatian) | |
743 | // if we find HR (Croatia) or to 'sr' (Serbian) if we find RS (Serbia). | |
744 | // Note: Do this after changing YU/CS toRS as above. | |
bd5b749c A |
745 | { NULL, NULL, NULL, NULL, NULL } // terminator |
746 | }; | |
747 | ||
748 | ||
749 | static const KeyStringToResultString localeStringRegionToDefaults[] = { | |
750 | // For some region-code suffixes, there are default substrings to strip off for canonical string. | |
751 | // Must be sorted according to how strcmp compares the strings in the first column | |
752 | // | |
753 | // region default writing | |
754 | // suffix system tags, strip comment | |
755 | // -------- ------------- --------- | |
756 | { "_CN", "-Hans" }, // mainland China, default is simplified | |
757 | { "_HK", "-Hant" }, // Hong Kong, default is traditional | |
758 | { "_MO", "-Hant" }, // Macao, default is traditional | |
759 | { "_SG", "-Hans" }, // Singapore, default is simplified | |
760 | { "_TW", "-Hant" }, // Taiwan, default is traditional | |
761 | }; | |
762 | enum { | |
763 | kNumLocaleStringRegionToDefaults = sizeof(localeStringRegionToDefaults)/sizeof(KeyStringToResultString) | |
764 | }; | |
765 | ||
766 | static const KeyStringToResultString localeStringPrefixToDefaults[] = { | |
767 | // For some initial portions of language tag, there are default substrings to strip off for canonical string. | |
768 | // Must be sorted according to how strcmp compares the strings in the first column | |
769 | // | |
770 | // language default writing | |
771 | // tag prefix system tags, strip comment | |
772 | // -------- ------------- --------- | |
773 | { "ab-", "-Cyrl" }, // Abkhazian | |
774 | { "af-", "-Latn" }, // Afrikaans | |
775 | { "am-", "-Ethi" }, // Amharic | |
776 | { "ar-", "-Arab" }, // Arabic | |
777 | { "as-", "-Beng" }, // Assamese | |
778 | { "ay-", "-Latn" }, // Aymara | |
779 | { "be-", "-Cyrl" }, // Belarusian | |
780 | { "bg-", "-Cyrl" }, // Bulgarian | |
781 | { "bn-", "-Beng" }, // Bengali | |
782 | { "bo-", "-Tibt" }, // Tibetan (? not Suppress-Script) | |
783 | { "br-", "-Latn" }, // Breton (? not Suppress-Script) | |
784 | { "bs-", "-Latn" }, // Bosnian | |
785 | { "ca-", "-Latn" }, // Catalan | |
786 | { "cs-", "-Latn" }, // Czech | |
787 | { "cy-", "-Latn" }, // Welsh | |
788 | { "da-", "-Latn" }, // Danish | |
789 | { "de-", "-Latn -1901" }, // German, traditional orthography | |
790 | { "dv-", "-Thaa" }, // Divehi/Maldivian | |
791 | { "dz-", "-Tibt" }, // Dzongkha | |
792 | { "el-", "-Grek" }, // Greek (modern, monotonic) | |
793 | { "en-", "-Latn" }, // English | |
794 | { "eo-", "-Latn" }, // Esperanto | |
795 | { "es-", "-Latn" }, // Spanish | |
796 | { "et-", "-Latn" }, // Estonian | |
797 | { "eu-", "-Latn" }, // Basque | |
798 | { "fa-", "-Arab" }, // Farsi | |
799 | { "fi-", "-Latn" }, // Finnish | |
800 | { "fo-", "-Latn" }, // Faroese | |
801 | { "fr-", "-Latn" }, // French | |
802 | { "ga-", "-Latn" }, // Irish | |
803 | { "gd-", "-Latn" }, // Scottish Gaelic (? not Suppress-Script) | |
804 | { "gl-", "-Latn" }, // Galician | |
805 | { "gn-", "-Latn" }, // Guarani | |
806 | { "gu-", "-Gujr" }, // Gujarati | |
807 | { "gv-", "-Latn" }, // Manx | |
808 | { "haw-", "-Latn" }, // Hawaiian (? not Suppress-Script) | |
809 | { "he-", "-Hebr" }, // Hebrew | |
810 | { "hi-", "-Deva" }, // Hindi | |
811 | { "hr-", "-Latn" }, // Croatian | |
812 | { "hu-", "-Latn" }, // Hungarian | |
813 | { "hy-", "-Armn" }, // Armenian | |
814 | { "id-", "-Latn" }, // Indonesian | |
815 | { "is-", "-Latn" }, // Icelandic | |
816 | { "it-", "-Latn" }, // Italian | |
817 | { "ja-", "-Jpan" }, // Japanese | |
818 | { "ka-", "-Geor" }, // Georgian | |
819 | { "kk-", "-Cyrl" }, // Kazakh | |
820 | { "kl-", "-Latn" }, // Kalaallisut/Greenlandic | |
821 | { "km-", "-Khmr" }, // Central Khmer | |
822 | { "kn-", "-Knda" }, // Kannada | |
823 | { "ko-", "-Hang" }, // Korean (? not Suppress-Script) | |
824 | { "kok-", "-Deva" }, // Konkani | |
825 | { "la-", "-Latn" }, // Latin | |
826 | { "lb-", "-Latn" }, // Luxembourgish | |
827 | { "lo-", "-Laoo" }, // Lao | |
828 | { "lt-", "-Latn" }, // Lithuanian | |
829 | { "lv-", "-Latn" }, // Latvian | |
830 | { "mg-", "-Latn" }, // Malagasy | |
831 | { "mk-", "-Cyrl" }, // Macedonian | |
832 | { "ml-", "-Mlym" }, // Malayalam | |
833 | { "mo-", "-Latn" }, // Moldavian | |
834 | { "mr-", "-Deva" }, // Marathi | |
835 | { "ms-", "-Latn" }, // Malay | |
836 | { "mt-", "-Latn" }, // Maltese | |
837 | { "my-", "-Mymr" }, // Burmese/Myanmar | |
838 | { "nb-", "-Latn" }, // Norwegian Bokmal | |
839 | { "ne-", "-Deva" }, // Nepali | |
840 | { "nl-", "-Latn" }, // Dutch | |
841 | { "nn-", "-Latn" }, // Norwegian Nynorsk | |
842 | { "ny-", "-Latn" }, // Chichewa/Nyanja | |
843 | { "om-", "-Latn" }, // Oromo | |
844 | { "or-", "-Orya" }, // Oriya | |
845 | { "pa-", "-Guru" }, // Punjabi | |
846 | { "pl-", "-Latn" }, // Polish | |
847 | { "ps-", "-Arab" }, // Pushto | |
848 | { "pt-", "-Latn" }, // Portuguese | |
849 | { "qu-", "-Latn" }, // Quechua | |
850 | { "rn-", "-Latn" }, // Rundi | |
851 | { "ro-", "-Latn" }, // Romanian | |
852 | { "ru-", "-Cyrl" }, // Russian | |
853 | { "rw-", "-Latn" }, // Kinyarwanda | |
854 | { "sa-", "-Deva" }, // Sanskrit (? not Suppress-Script) | |
855 | { "se-", "-Latn" }, // Sami (? not Suppress-Script) | |
856 | { "si-", "-Sinh" }, // Sinhala | |
857 | { "sk-", "-Latn" }, // Slovak | |
858 | { "sl-", "-Latn" }, // Slovenian | |
859 | { "so-", "-Latn" }, // Somali | |
860 | { "sq-", "-Latn" }, // Albanian | |
861 | { "sv-", "-Latn" }, // Swedish | |
862 | { "sw-", "-Latn" }, // Swahili | |
863 | { "ta-", "-Taml" }, // Tamil | |
864 | { "te-", "-Telu" }, // Telugu | |
865 | { "th-", "-Thai" }, // Thai | |
866 | { "ti-", "-Ethi" }, // Tigrinya | |
867 | { "tl-", "-Latn" }, // Tagalog | |
868 | { "tn-", "-Latn" }, // Tswana | |
869 | { "to-", "-Latn" }, // Tonga of Tonga Islands | |
870 | { "tr-", "-Latn" }, // Turkish | |
871 | { "uk-", "-Cyrl" }, // Ukrainian | |
872 | { "ur-", "-Arab" }, // Urdu | |
873 | { "vi-", "-Latn" }, // Vietnamese | |
874 | { "wo-", "-Latn" }, // Wolof | |
875 | { "xh-", "-Latn" }, // Xhosa | |
876 | { "yi-", "-Hebr" }, // Yiddish | |
877 | { "zh-", "-Hani" }, // Chinese (? not Suppress-Script) | |
878 | { "zu-", "-Latn" }, // Zulu | |
879 | }; | |
880 | enum { | |
881 | kNumLocaleStringPrefixToDefaults = sizeof(localeStringPrefixToDefaults)/sizeof(KeyStringToResultString) | |
882 | }; | |
883 | ||
884 | static const KeyStringToResultString appleLocaleToLanguageString[] = { | |
885 | // Map locale strings that Apple uses as language IDs to real language strings. | |
886 | // Must be sorted according to how strcmp compares the strings in the first column. | |
887 | // Note: Now we remove all transforms of the form ll_RR -> ll-RR, they are now | |
888 | // handled in the code. <1.19> | |
889 | // | |
890 | // locale lang [ comment ] | |
891 | // string string | |
892 | // ------- ------- | |
893 | { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752] | |
894 | { "zh_CN", "zh-Hans" }, // mainland China => simplified | |
895 | { "zh_HK", "zh-Hant" }, // Hong Kong => traditional, not currently used | |
896 | { "zh_MO", "zh-Hant" }, // Macao => traditional, not currently used | |
897 | { "zh_SG", "zh-Hans" }, // Singapore => simplified, not currently used | |
898 | { "zh_TW", "zh-Hant" }, // Taiwan => traditional | |
899 | }; | |
900 | enum { | |
901 | kNumAppleLocaleToLanguageString = sizeof(appleLocaleToLanguageString)/sizeof(KeyStringToResultString) | |
902 | }; | |
903 | ||
904 | static const KeyStringToResultString appleLocaleToLanguageStringForCFBundle[] = { | |
905 | // Map locale strings that Apple uses as language IDs to real language strings. | |
906 | // Must be sorted according to how strcmp compares the strings in the first column. | |
907 | // | |
908 | // locale lang [ comment ] | |
909 | // string string | |
910 | // ------- ------- | |
911 | { "de_AT", "de-AT" }, // Austrian German | |
912 | { "de_CH", "de-CH" }, // Swiss German | |
913 | // { "de_DE", "de-DE" }, // German for Germany (default), not currently used | |
914 | { "en_AU", "en-AU" }, // Australian English | |
915 | { "en_CA", "en-CA" }, // Canadian English | |
916 | { "en_GB", "en-GB" }, // British English | |
917 | // { "en_IE", "en-IE" }, // Irish English, not currently used | |
918 | { "en_US", "en-US" }, // U.S. English | |
919 | { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752] | |
920 | // { "fr_BE", "fr-BE" }, // Belgian French, not currently used | |
921 | { "fr_CA", "fr-CA" }, // Canadian French | |
922 | { "fr_CH", "fr-CH" }, // Swiss French | |
923 | // { "fr_FR", "fr-FR" }, // French for France (default), not currently used | |
924 | { "nl_BE", "nl-BE" }, // Flemish = Vlaams, Dutch for Belgium | |
925 | // { "nl_NL", "nl-NL" }, // Dutch for Netherlands (default), not currently used | |
926 | { "pt_BR", "pt-BR" }, // Brazilian Portuguese | |
927 | { "pt_PT", "pt-PT" }, // Portuguese for Portugal | |
928 | { "zh_CN", "zh-Hans" }, // mainland China => simplified | |
929 | { "zh_HK", "zh-Hant" }, // Hong Kong => traditional, not currently used | |
930 | { "zh_MO", "zh-Hant" }, // Macao => traditional, not currently used | |
931 | { "zh_SG", "zh-Hans" }, // Singapore => simplified, not currently used | |
932 | { "zh_TW", "zh-Hant" }, // Taiwan => traditional | |
933 | }; | |
934 | enum { | |
935 | kNumAppleLocaleToLanguageStringForCFBundle = sizeof(appleLocaleToLanguageStringForCFBundle)/sizeof(KeyStringToResultString) | |
936 | }; | |
937 | ||
938 | ||
939 | struct LocaleToLegacyCodes { | |
940 | const char * locale; // reduced to language plus one other component (script, region, variant), separators normalized to'_' | |
941 | RegionCode regCode; | |
942 | LangCode langCode; | |
943 | CFStringEncoding encoding; | |
944 | }; | |
945 | typedef struct LocaleToLegacyCodes LocaleToLegacyCodes; | |
946 | ||
947 | static const LocaleToLegacyCodes localeToLegacyCodes[] = { | |
948 | // locale RegionCode LangCode CFStringEncoding | |
949 | { "af"/*ZA*/, 102/*verAfrikaans*/, 141/*langAfrikaans*/, 0/*Roman*/ }, // Latn | |
950 | { "am", -1, 85/*langAmharic*/, 28/*Ethiopic*/ }, // Ethi | |
951 | { "ar", 16/*verArabic*/, 12/*langArabic*/, 4/*Arabic*/ }, // Arab; | |
952 | { "as", -1, 68/*langAssamese*/, 13/*Bengali*/ }, // Beng; | |
953 | { "ay", -1, 134/*langAymara*/, 0/*Roman*/ }, // Latn; | |
954 | { "az", -1, 49/*langAzerbaijani*/, 7/*Cyrillic*/ }, // assume "az" defaults to -Cyrl | |
955 | { "az_Arab", -1, 50/*langAzerbaijanAr*/, 4/*Arabic*/ }, // Arab; | |
956 | { "az_Cyrl", -1, 49/*langAzerbaijani*/, 7/*Cyrillic*/ }, // Cyrl; | |
957 | { "az_Latn", -1, 150/*langAzerbaijanRoman*/, 0/*Roman*/ }, // Latn; | |
958 | { "be"/*BY*/, 61/*verBelarus*/, 46/*langBelorussian*/, 7/*Cyrillic*/ }, // Cyrl; | |
959 | { "bg"/*BG*/, 72/*verBulgaria*/, 44/*langBulgarian*/, 7/*Cyrillic*/ }, // Cyrl; | |
960 | { "bn", 60/*verBengali*/, 67/*langBengali*/, 13/*Bengali*/ }, // Beng; | |
961 | { "bo", 105/*verTibetan*/, 63/*langTibetan*/, 26/*Tibetan*/ }, // Tibt; | |
962 | { "br", 77/*verBreton*/, 142/*langBreton*/, 39/*Celtic*/ }, // Latn; | |
963 | { "ca"/*ES*/, 73/*verCatalonia*/, 130/*langCatalan*/, 0/*Roman*/ }, // Latn; | |
964 | { "cs"/*CZ*/, 56/*verCzech*/, 38/*langCzech*/, 29/*CentralEurRoman*/ }, // Latn; | |
965 | { "cy", 79/*verWelsh*/, 128/*langWelsh*/, 39/*Celtic*/ }, // Latn; | |
966 | { "da"/*DK*/, 9/*verDenmark*/, 7/*langDanish*/, 0/*Roman*/ }, // Latn; | |
967 | { "de", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ }, // assume "de" defaults to verGermany | |
968 | { "de_1996", 70/*verGermanReformed*/, 2/*langGerman*/, 0/*Roman*/ }, | |
969 | { "de_AT", 92/*verAustria*/, 2/*langGerman*/, 0/*Roman*/ }, | |
970 | { "de_CH", 19/*verGrSwiss*/, 2/*langGerman*/, 0/*Roman*/ }, | |
971 | { "de_DE", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ }, | |
972 | { "dz"/*BT*/, 83/*verBhutan*/, 137/*langDzongkha*/, 26/*Tibetan*/ }, // Tibt; | |
973 | { "el", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // assume "el" defaults to verGreece | |
974 | { "el_CY", 23/*verCyprus*/, 14/*langGreek*/, 6/*Greek*/ }, | |
975 | { "el_GR", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // modern monotonic | |
976 | { "en", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ }, // "en" defaults to verUS (per Chris Hansten) | |
977 | { "en_001", 37/*verInternational*/, 0/*langEnglish*/, 0/*Roman*/ }, | |
978 | { "en_AU", 15/*verAustralia*/, 0/*langEnglish*/, 0/*Roman*/ }, | |
979 | { "en_CA", 82/*verEngCanada*/, 0/*langEnglish*/, 0/*Roman*/ }, | |
980 | { "en_GB", 2/*verBritain*/, 0/*langEnglish*/, 0/*Roman*/ }, | |
981 | { "en_IE", 108/*verIrelandEnglish*/, 0/*langEnglish*/, 0/*Roman*/ }, | |
982 | { "en_SG", 100/*verSingapore*/, 0/*langEnglish*/, 0/*Roman*/ }, | |
983 | { "en_US", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ }, | |
984 | { "eo", 103/*verEsperanto*/, 94/*langEsperanto*/, 0/*Roman*/ }, // Latn; | |
985 | { "es", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ }, // "es" defaults to verSpain (per Chris Hansten) | |
986 | { "es_419", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, // new BCP 47 tag | |
987 | { "es_ES", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ }, | |
988 | { "es_MX", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, | |
989 | { "es_US", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, | |
990 | { "et"/*EE*/, 44/*verEstonia*/, 27/*langEstonian*/, 29/*CentralEurRoman*/ }, | |
991 | { "eu", -1, 129/*langBasque*/, 0/*Roman*/ }, // Latn; | |
992 | { "fa"/*IR*/, 48/*verIran*/, 31/*langFarsi/Persian*/, 0x8C/*Farsi*/ }, // Arab; | |
993 | { "fi"/*FI*/, 17/*verFinland*/, 13/*langFinnish*/, 0/*Roman*/ }, | |
994 | { "fo"/*FO*/, 47/*verFaroeIsl*/, 30/*langFaroese*/, 37/*Icelandic*/ }, | |
995 | { "fr", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ }, // "fr" defaults to verFrance (per Chris Hansten) | |
996 | { "fr_001", 91/*verFrenchUniversal*/, 1/*langFrench*/, 0/*Roman*/ }, | |
997 | { "fr_BE", 98/*verFrBelgium*/, 1/*langFrench*/, 0/*Roman*/ }, | |
998 | { "fr_CA", 11/*verFrCanada*/, 1/*langFrench*/, 0/*Roman*/ }, | |
999 | { "fr_CH", 18/*verFrSwiss*/, 1/*langFrench*/, 0/*Roman*/ }, | |
1000 | { "fr_FR", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ }, | |
1001 | { "ga"/*IE*/, 50/*verIreland*/, 35/*langIrishGaelic*/, 0/*Roman*/ }, // no dots (h after) | |
1002 | { "ga_Latg"/*IE*/, 81/*verIrishGaelicScrip*/, 146/*langIrishGaelicScript*/, 40/*Gaelic*/ }, // using dots | |
1003 | { "gd", 75/*verScottishGaelic*/, 144/*langScottishGaelic*/, 39/*Celtic*/ }, | |
1004 | { "gl", -1, 140/*langGalician*/, 0/*Roman*/ }, // Latn; | |
1005 | { "gn", -1, 133/*langGuarani*/, 0/*Roman*/ }, // Latn; | |
1006 | { "grc", 40/*verGreekAncient*/, 148/*langGreekAncient*/, 6/*Greek*/ }, // polytonic (MacGreek doesn't actually support it) | |
1007 | { "gu"/*IN*/, 94/*verGujarati*/, 69/*langGujarati*/, 11/*Gujarati*/ }, // Gujr; | |
1008 | { "gv", 76/*verManxGaelic*/, 145/*langManxGaelic*/, 39/*Celtic*/ }, // Latn; | |
1009 | { "he"/*IL*/, 13/*verIsrael*/, 10/*langHebrew*/, 5/*Hebrew*/ }, // Hebr; | |
1010 | { "hi"/*IN*/, 33/*verIndiaHindi*/, 21/*langHindi*/, 9/*Devanagari*/ }, // Deva; | |
1011 | { "hr"/*HR*/, 68/*verCroatia*/, 18/*langCroatian*/, 36/*Croatian*/ }, | |
1012 | { "hu"/*HU*/, 43/*verHungary*/, 26/*langHungarian*/, 29/*CentralEurRoman*/ }, | |
1013 | { "hy"/*AM*/, 84/*verArmenian*/, 51/*langArmenian*/, 24/*Armenian*/ }, // Armn; | |
1014 | { "id", -1, 81/*langIndonesian*/, 0/*Roman*/ }, // Latn; | |
1015 | { "is"/*IS*/, 21/*verIceland*/, 15/*langIcelandic*/, 37/*Icelandic*/ }, | |
1016 | { "it", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ }, // "it" defaults to verItaly | |
1017 | { "it_CH", 36/*verItalianSwiss*/, 3/*langItalian*/, 0/*Roman*/ }, | |
1018 | { "it_IT", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ }, | |
1019 | { "iu"/*CA*/, 78/*verNunavut*/, 143/*langInuktitut*/, 0xEC/*Inuit*/ }, // Cans; | |
1020 | { "ja"/*JP*/, 14/*verJapan*/, 11/*langJapanese*/, 1/*Japanese*/ }, // Jpan; | |
1021 | { "jv", -1, 138/*langJavaneseRom*/, 0/*Roman*/ }, // Latn; | |
1022 | { "ka"/*GE*/, 85/*verGeorgian*/, 52/*langGeorgian*/, 23/*Georgian*/ }, // Geor; | |
1023 | { "kk", -1, 48/*langKazakh*/, 7/*Cyrillic*/ }, // "kk" defaults to -Cyrl; also have -Latn, -Arab | |
1024 | { "kl", 107/*verGreenland*/, 149/*langGreenlandic*/, 0/*Roman*/ }, // Latn; | |
1025 | { "km", -1, 78/*langKhmer*/, 20/*Khmer*/ }, // Khmr; | |
1026 | { "kn", -1, 73/*langKannada*/, 16/*Kannada*/ }, // Knda; | |
1027 | { "ko"/*KR*/, 51/*verKorea*/, 23/*langKorean*/, 3/*Korean*/ }, // Hang; | |
1028 | { "ks", -1, 61/*langKashmiri*/, 4/*Arabic*/ }, // Arab; | |
1029 | { "ku", -1, 60/*langKurdish*/, 4/*Arabic*/ }, // Arab; | |
1030 | { "ky", -1, 54/*langKirghiz*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab | |
1031 | { "la", -1, 131/*langLatin*/, 0/*Roman*/ }, // Latn; | |
1032 | { "lo", -1, 79/*langLao*/, 22/*Laotian*/ }, // Laoo; | |
1033 | { "lt"/*LT*/, 41/*verLithuania*/, 24/*langLithuanian*/, 29/*CentralEurRoman*/ }, | |
1034 | { "lv"/*LV*/, 45/*verLatvia*/, 28/*langLatvian*/, 29/*CentralEurRoman*/ }, | |
1035 | { "mg", -1, 93/*langMalagasy*/, 0/*Roman*/ }, // Latn; | |
1036 | { "mk"/*MK*/, 67/*verMacedonian*/, 43/*langMacedonian*/, 7/*Cyrillic*/ }, // Cyrl; | |
1037 | { "ml", -1, 72/*langMalayalam*/, 17/*Malayalam*/ }, // Mlym; | |
1038 | { "mn", -1, 57/*langMongolian*/, 27/*Mongolian*/ }, // "mn" defaults to -Mong | |
1039 | { "mn_Cyrl", -1, 58/*langMongolianCyr*/, 7/*Cyrillic*/ }, // Cyrl; | |
1040 | { "mn_Mong", -1, 57/*langMongolian*/, 27/*Mongolian*/ }, // Mong; | |
1041 | { "mo", -1, 53/*langMoldavian*/, 7/*Cyrillic*/ }, // Cyrl; | |
1042 | { "mr"/*IN*/, 104/*verMarathi*/, 66/*langMarathi*/, 9/*Devanagari*/ }, // Deva; | |
1043 | { "ms", -1, 83/*langMalayRoman*/, 0/*Roman*/ }, // "ms" defaults to -Latn; | |
1044 | { "ms_Arab", -1, 84/*langMalayArabic*/, 4/*Arabic*/ }, // Arab; | |
1045 | { "mt"/*MT*/, 22/*verMalta*/, 16/*langMaltese*/, 0/*Roman*/ }, // Latn; | |
1046 | { "mul", 74/*verMultilingual*/, -1, 0 }, | |
1047 | { "my", -1, 77/*langBurmese*/, 19/*Burmese*/ }, // Mymr; | |
1048 | { "nb"/*NO*/, 12/*verNorway*/, 9/*langNorwegian*/, 0/*Roman*/ }, | |
1049 | { "ne"/*NP*/, 106/*verNepal*/, 64/*langNepali*/, 9/*Devanagari*/ }, // Deva; | |
1050 | { "nl", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ }, // "nl" defaults to verNetherlands | |
1051 | { "nl_BE", 6/*verFlemish*/, 34/*langFlemish*/, 0/*Roman*/ }, | |
1052 | { "nl_NL", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ }, | |
1053 | { "nn"/*NO*/, 101/*verNynorsk*/, 151/*langNynorsk*/, 0/*Roman*/ }, | |
1054 | { "ny", -1, 92/*langNyanja/Chewa*/, 0/*Roman*/ }, // Latn; | |
1055 | { "om", -1, 87/*langOromo*/, 28/*Ethiopic*/ }, // Ethi; | |
1056 | { "or", -1, 71/*langOriya*/, 12/*Oriya*/ }, // Orya; | |
1057 | { "pa", 95/*verPunjabi*/, 70/*langPunjabi*/, 10/*Gurmukhi*/ }, // Guru; | |
1058 | { "pl"/*PL*/, 42/*verPoland*/, 25/*langPolish*/, 29/*CentralEurRoman*/ }, | |
1059 | { "ps", -1, 59/*langPashto*/, 0x8C/*Farsi*/ }, // Arab; | |
1060 | { "pt", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ }, // "pt" defaults to verBrazil (per Chris Hansten) | |
1061 | { "pt_BR", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ }, | |
1062 | { "pt_PT", 10/*verPortugal*/, 8/*langPortuguese*/, 0/*Roman*/ }, | |
1063 | { "qu", -1, 132/*langQuechua*/, 0/*Roman*/ }, // Latn; | |
1064 | { "rn", -1, 91/*langRundi*/, 0/*Roman*/ }, // Latn; | |
1065 | { "ro"/*RO*/, 39/*verRomania*/, 37/*langRomanian*/, 38/*Romanian*/ }, | |
1066 | { "ru"/*RU*/, 49/*verRussia*/, 32/*langRussian*/, 7/*Cyrillic*/ }, // Cyrl; | |
1067 | { "rw", -1, 90/*langKinyarwanda*/, 0/*Roman*/ }, // Latn; | |
1068 | { "sa", -1, 65/*langSanskrit*/, 9/*Devanagari*/ }, // Deva; | |
1069 | { "sd", -1, 62/*langSindhi*/, 0x8C/*Farsi*/ }, // Arab; | |
1070 | { "se", 46/*verSami*/, 29/*langSami*/, 0/*Roman*/ }, | |
1071 | { "si", -1, 76/*langSinhalese*/, 18/*Sinhalese*/ }, // Sinh; | |
1072 | { "sk"/*SK*/, 57/*verSlovak*/, 39/*langSlovak*/, 29/*CentralEurRoman*/ }, | |
1073 | { "sl"/*SI*/, 66/*verSlovenian*/, 40/*langSlovenian*/, 36/*Croatian*/ }, | |
1074 | { "so", -1, 88/*langSomali*/, 0/*Roman*/ }, // Latn; | |
1075 | { "sq", -1, 36/*langAlbanian*/, 0/*Roman*/ }, | |
1076 | { "sr"/*CS,RS*/, 65/*verSerbian*/, 42/*langSerbian*/, 7/*Cyrillic*/ }, // Cyrl; | |
1077 | { "su", -1, 139/*langSundaneseRom*/, 0/*Roman*/ }, // Latn; | |
1078 | { "sv"/*SE*/, 7/*verSweden*/, 5/*langSwedish*/, 0/*Roman*/ }, | |
1079 | { "sw", -1, 89/*langSwahili*/, 0/*Roman*/ }, // Latn; | |
1080 | { "ta", -1, 74/*langTamil*/, 14/*Tamil*/ }, // Taml; | |
1081 | { "te", -1, 75/*langTelugu*/, 15/*Telugu*/ }, // Telu | |
1082 | { "tg", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // "tg" defaults to "Cyrl" | |
1083 | { "tg_Cyrl", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab | |
1084 | { "th"/*TH*/, 54/*verThailand*/, 22/*langThai*/, 21/*Thai*/ }, // Thai; | |
1085 | { "ti", -1, 86/*langTigrinya*/, 28/*Ethiopic*/ }, // Ethi; | |
1086 | { "tk", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // "tk" defaults to Cyrl | |
1087 | { "tk_Cyrl", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab | |
1088 | { "tl", -1, 82/*langTagalog*/, 0/*Roman*/ }, // Latn; | |
1089 | { "to"/*TO*/, 88/*verTonga*/, 147/*langTongan*/, 0/*Roman*/ }, // Latn; | |
1090 | { "tr"/*TR*/, 24/*verTurkey*/, 17/*langTurkish*/, 35/*Turkish*/ }, // Latn; | |
1091 | { "tt", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl; | |
1092 | { "tt_Cyrl", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl; | |
1093 | { "ug", -1, 136/*langUighur*/, 4/*Arabic*/ }, // Arab; | |
1094 | { "uk"/*UA*/, 62/*verUkraine*/, 45/*langUkrainian*/, 7/*Cyrillic*/ }, // Cyrl; | |
1095 | { "und", 55/*verScriptGeneric*/, -1, 0 }, | |
1096 | { "ur", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // "ur" defaults to verPakistanUrdu | |
1097 | { "ur_IN", 96/*verIndiaUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab | |
1098 | { "ur_PK", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab | |
1099 | { "uz"/*UZ*/, 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab | |
1100 | { "uz_Cyrl", 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ }, | |
1101 | { "vi"/*VN*/, 97/*verVietnam*/, 80/*langVietnamese*/, 30/*Vietnamese*/ }, // Latn | |
1102 | { "yi", -1, 41/*langYiddish*/, 5/*Hebrew*/ }, // Hebr; | |
1103 | { "zh", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, // "zh" defaults to verChina, langSimpChinese | |
1104 | { "zh_CN", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, | |
1105 | { "zh_HK", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, | |
1106 | { "zh_Hans", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, | |
1107 | { "zh_Hant", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, | |
1108 | { "zh_MO", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, | |
1109 | { "zh_SG", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, | |
1110 | { "zh_TW", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, | |
1111 | }; | |
1112 | enum { | |
1113 | kNumLocaleToLegacyCodes = sizeof(localeToLegacyCodes)/sizeof(localeToLegacyCodes[0]) | |
1114 | }; | |
1115 | ||
1116 | /* | |
1117 | For reference here is a list of ICU locales with variants and how some | |
1118 | of them are canonicalized with the ICU function uloc_canonicalize: | |
1119 | ||
1120 | ICU 3.0 has: | |
1121 | en_US_POSIX x no change | |
1122 | hy_AM_REVISED x no change | |
1123 | ja_JP_TRADITIONAL -> ja_JP@calendar=japanese | |
1124 | th_TH_TRADITIONAL -> th_TH@calendar=buddhist | |
1125 | ||
1126 | ICU 2.8 also had the following (now obsolete): | |
1127 | ca_ES_PREEURO | |
1128 | de__PHONEBOOK -> de@collation=phonebook | |
1129 | de_AT_PREEURO | |
1130 | de_DE_PREEURO | |
1131 | de_LU_PREEURO | |
1132 | el_GR_PREEURO | |
1133 | en_BE_PREEURO | |
1134 | en_GB_EURO -> en_GB@currency=EUR | |
1135 | en_IE_PREEURO -> en_IE@currency=IEP | |
1136 | es__TRADITIONAL -> es@collation=traditional | |
1137 | es_ES_PREEURO | |
1138 | eu_ES_PREEURO | |
1139 | fi_FI_PREEURO | |
1140 | fr_BE_PREEURO | |
1141 | fr_FR_PREEURO -> fr_FR@currency=FRF | |
1142 | fr_LU_PREEURO | |
1143 | ga_IE_PREEURO | |
1144 | gl_ES_PREEURO | |
1145 | hi__DIRECT -> hi@collation=direct | |
1146 | it_IT_PREEURO | |
1147 | nl_BE_PREEURO | |
1148 | nl_NL_PREEURO | |
1149 | pt_PT_PREEURO | |
1150 | zh__PINYIN -> zh@collation=pinyin | |
1151 | zh_TW_STROKE -> zh_TW@collation=stroke | |
1152 | ||
1153 | */ | |
1154 | ||
1155 | // _CompareTestEntryToTableEntryKey | |
1156 | // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) | |
1157 | // comparison function for bsearch | |
1158 | static int _CompareTestEntryToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) { | |
1159 | return strcmp( ((const KeyStringToResultString *)testEntryPtr)->key, ((const KeyStringToResultString *)tableEntryKeyPtr)->key ); | |
1160 | } | |
1161 | ||
1162 | // _CompareTestEntryPrefixToTableEntryKey | |
1163 | // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) | |
1164 | // Comparison function for bsearch. Assumes prefix IS terminated with '-' or '_'. | |
1165 | // Do the following instead of strlen & strncmp so we don't walk tableEntry key twice. | |
1166 | static int _CompareTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) { | |
1167 | const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key; | |
1168 | const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key; | |
1169 | ||
1170 | while ( *testPtr == *tablePtr && *tablePtr != 0 ) { | |
1171 | testPtr++; tablePtr++; | |
1172 | } | |
1173 | if ( *tablePtr != 0 ) { | |
1174 | // strings are different, and the string in the table has not run out; | |
1175 | // i.e. the table entry is not a prefix of the text string. | |
1176 | return ( *testPtr < *tablePtr )? -1: 1; | |
1177 | } | |
1178 | return 0; | |
1179 | } | |
1180 | ||
1181 | // _CompareLowerTestEntryPrefixToTableEntryKey | |
1182 | // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) | |
1183 | // Comparison function for bsearch. Assumes prefix NOT terminated with '-' or '_'. | |
1184 | // Lowercases the test string before comparison (the table should already have lowercased entries). | |
1185 | static int _CompareLowerTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) { | |
1186 | const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key; | |
1187 | const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key; | |
1188 | char lowerTestChar; | |
1189 | ||
1190 | while ( (lowerTestChar = tolower(*testPtr)) == *tablePtr && *tablePtr != 0 && lowerTestChar != '_' ) { // <1.9> | |
1191 | testPtr++; tablePtr++; | |
1192 | } | |
1193 | if ( *tablePtr != 0 ) { | |
1194 | // strings are different, and the string in the table has not run out; | |
1195 | // i.e. the table entry is not a prefix of the text string. | |
1196 | if (lowerTestChar == '_') // <1.9> | |
1197 | return -1; // <1.9> | |
1198 | return ( lowerTestChar < *tablePtr )? -1: 1; | |
1199 | } | |
1200 | // The string in the table has run out. If the test string char is not alnum, | |
1201 | // then the string matches, else the test string sorts after. | |
1202 | return ( !isalnum(lowerTestChar) )? 0: 1; | |
1203 | } | |
1204 | ||
1205 | // _DeleteCharsAtPointer | |
1206 | // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) | |
1207 | // remove _length_ characters from the beginning of the string indicated by _stringPtr_ | |
1208 | // (we know that the string has at least _length_ characters in it) | |
1209 | static void _DeleteCharsAtPointer(char *stringPtr, int length) { | |
1210 | do { | |
1211 | *stringPtr = stringPtr[length]; | |
1212 | } while (*stringPtr++ != 0); | |
1213 | } | |
1214 | ||
1215 | // _CopyReplacementAtPointer | |
1216 | // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) | |
1217 | // Copy replacement string (*excluding* terminating NULL byte) to the place indicated by stringPtr | |
1218 | static void _CopyReplacementAtPointer(char *stringPtr, const char *replacementPtr) { | |
1219 | while (*replacementPtr != 0) { | |
1220 | *stringPtr++ = *replacementPtr++; | |
1221 | } | |
1222 | } | |
1223 | ||
1224 | // _CheckForTag | |
1225 | // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) | |
1226 | static Boolean _CheckForTag(const char *localeStringPtr, const char *tagPtr, int tagLen) { | |
1227 | return ( strncmp(localeStringPtr, tagPtr, tagLen) == 0 && !isalnum(localeStringPtr[tagLen]) ); | |
1228 | } | |
1229 | ||
1230 | // _ReplacePrefix | |
1231 | // Move this code from _UpdateFullLocaleString into separate function // <1.10> | |
1232 | static void _ReplacePrefix(char locString[], int locStringMaxLen, int oldPrefixLen, const char *newPrefix) { | |
1233 | int newPrefixLen = strlen(newPrefix); | |
1234 | int lengthDelta = newPrefixLen - oldPrefixLen; | |
1235 | ||
1236 | if (lengthDelta < 0) { | |
1237 | // replacement is shorter, delete chars by shifting tail of string | |
1238 | _DeleteCharsAtPointer(locString + newPrefixLen, -lengthDelta); | |
1239 | } else if (lengthDelta > 0) { | |
1240 | // replacement is longer... | |
1241 | int stringLen = strlen(locString); | |
1242 | ||
1243 | if (stringLen + lengthDelta < locStringMaxLen) { | |
1244 | // make room by shifting tail of string | |
1245 | char * tailShiftPtr = locString + stringLen; | |
1246 | char * tailStartPtr = locString + oldPrefixLen; // pointer to tail of string to shift | |
1247 | ||
1248 | while (tailShiftPtr >= tailStartPtr) { | |
1249 | tailShiftPtr[lengthDelta] = *tailShiftPtr; | |
1250 | tailShiftPtr--; | |
1251 | } | |
1252 | } else { | |
1253 | // no room, can't do substitution | |
1254 | newPrefix = NULL; | |
1255 | } | |
1256 | } | |
1257 | ||
1258 | if (newPrefix) { | |
1259 | // do the substitution | |
1260 | _CopyReplacementAtPointer(locString, newPrefix); | |
1261 | } | |
1262 | } | |
1263 | ||
1264 | // _UpdateFullLocaleString | |
1265 | // Given a locale string that uses standard codes (not a special old-style Apple string), | |
1266 | // update all the language codes and region codes to latest versions, map 3-letter | |
1267 | // language codes to 2-letter codes if possible, and normalize casing. If requested, return | |
1268 | // pointers to a language-region variant subtag (if present) and a region tag (if present). | |
1269 | // (add locStringMaxLen parameter) // <1.10> | |
1270 | static void _UpdateFullLocaleString(char inLocaleString[], int locStringMaxLen, | |
1271 | char **langRegSubtagRef, char **regionTagRef, | |
1272 | char varKeyValueString[]) // <1.17> | |
1273 | { | |
1274 | KeyStringToResultString testEntry; | |
1275 | KeyStringToResultString * foundEntry; | |
1276 | const SpecialCaseUpdates * specialCasePtr; | |
1277 | char * inLocalePtr; | |
1278 | char * subtagPtr; | |
1279 | char * langRegSubtag = NULL; | |
1280 | char * regionTag = NULL; | |
1281 | char * variantTag = NULL; | |
1282 | Boolean subtagHasDigits, pastPrimarySubtag, hadRegion; | |
1283 | ||
1284 | // 1. First replace any non-canonical prefix (case insensitive) with canonical | |
1285 | // (change 3-letter ISO 639 code to 2-letter, update obsolete ISO 639 codes & RFC 3066 tags, etc.) | |
1286 | ||
1287 | testEntry.key = inLocaleString; | |
1288 | foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToCanonical, kNumLocaleStringPrefixToCanonical, | |
1289 | sizeof(KeyStringToResultString), _CompareLowerTestEntryPrefixToTableEntryKey ); | |
1290 | if (foundEntry) { | |
1291 | // replace key (at beginning of string) with result | |
1292 | _ReplacePrefix(inLocaleString, locStringMaxLen, strlen(foundEntry->key), foundEntry->result); // <1.10> | |
1293 | } | |
1294 | ||
1295 | // 2. Walk through input string, normalizing case & marking use of ISO 3166 codes | |
1296 | ||
1297 | inLocalePtr = inLocaleString; | |
1298 | subtagPtr = inLocaleString; | |
1299 | subtagHasDigits = false; | |
1300 | pastPrimarySubtag = false; | |
1301 | hadRegion = false; | |
1302 | ||
1303 | while ( true ) { | |
1304 | if ( isalpha(*inLocalePtr) ) { | |
1305 | // if not past a region tag, then lowercase, else uppercase | |
1306 | *inLocalePtr = (!hadRegion)? tolower(*inLocalePtr): toupper(*inLocalePtr); | |
1307 | } else if ( isdigit(*inLocalePtr) ) { | |
1308 | subtagHasDigits = true; | |
1309 | } else { | |
1310 | ||
1311 | if (!pastPrimarySubtag) { | |
1312 | // may have a NULL primary subtag | |
1313 | if (subtagHasDigits) { | |
1314 | break; | |
1315 | } | |
1316 | pastPrimarySubtag = true; | |
1317 | } else if (!hadRegion) { | |
1318 | // We are after any primary language subtag, but not past any region tag. | |
1319 | // This subtag is preceded by '-' or '_'. | |
1320 | int subtagLength = inLocalePtr - subtagPtr; // includes leading '-' or '_' | |
1321 | ||
1322 | if (subtagLength == 3 && !subtagHasDigits) { | |
1323 | // potential ISO 3166 code for region or language variant; if so, needs uppercasing | |
1324 | if (*subtagPtr == '_') { | |
1325 | regionTag = subtagPtr; | |
1326 | hadRegion = true; | |
1327 | subtagPtr[1] = toupper(subtagPtr[1]); | |
1328 | subtagPtr[2] = toupper(subtagPtr[2]); | |
1329 | } else if (langRegSubtag == NULL) { | |
1330 | langRegSubtag = subtagPtr; | |
1331 | subtagPtr[1] = toupper(subtagPtr[1]); | |
1332 | subtagPtr[2] = toupper(subtagPtr[2]); | |
1333 | } | |
1334 | } else if (subtagLength == 4 && subtagHasDigits) { | |
1335 | // potential UN M.49 region code | |
1336 | if (*subtagPtr == '_') { | |
1337 | regionTag = subtagPtr; | |
1338 | hadRegion = true; | |
1339 | } else if (langRegSubtag == NULL) { | |
1340 | langRegSubtag = subtagPtr; | |
1341 | } | |
1342 | } else if (subtagLength == 5 && !subtagHasDigits) { | |
1343 | // ISO 15924 script code, uppercase just the first letter | |
1344 | subtagPtr[1] = toupper(subtagPtr[1]); | |
1345 | } else if (subtagLength == 1 && *subtagPtr == '_') { // <1.17> | |
1346 | hadRegion = true; | |
1347 | } | |
1348 | ||
1349 | if (!hadRegion) { | |
1350 | // convert improper '_' to '-' | |
1351 | *subtagPtr = '-'; | |
1352 | } | |
1353 | } else { | |
1354 | variantTag = subtagPtr; // <1.17> | |
1355 | } | |
1356 | ||
1357 | if (*inLocalePtr == '-' || *inLocalePtr == '_') { | |
1358 | subtagPtr = inLocalePtr; | |
1359 | subtagHasDigits = false; | |
1360 | } else { | |
1361 | break; | |
1362 | } | |
1363 | } | |
1364 | ||
1365 | inLocalePtr++; | |
1366 | } | |
1367 | ||
1368 | // 3 If there is a variant tag, see if ICU canonicalizes it to keywords. // <1.17> [3577669] | |
1369 | // If so, copy the keywords to varKeyValueString and delete the variant tag | |
1370 | // from the original string (but don't otherwise use the ICU canonicalization). | |
1371 | varKeyValueString[0] = 0; | |
1372 | if (variantTag) { | |
1373 | UErrorCode icuStatus; | |
1374 | int icuCanonStringLen; | |
1375 | char * varKeyValueStringPtr = varKeyValueString; | |
1376 | ||
1377 | icuStatus = U_ZERO_ERROR; | |
1378 | icuCanonStringLen = uloc_canonicalize( inLocaleString, varKeyValueString, locStringMaxLen, &icuStatus ); | |
1379 | if ( U_SUCCESS(icuStatus) ) { | |
1380 | char * icuCanonStringPtr = varKeyValueString; | |
1381 | ||
1382 | if (icuCanonStringLen >= locStringMaxLen) | |
1383 | icuCanonStringLen = locStringMaxLen - 1; | |
1384 | varKeyValueString[icuCanonStringLen] = 0; | |
1385 | while (*icuCanonStringPtr != 0 && *icuCanonStringPtr != ULOC_KEYWORD_SEPARATOR) | |
1386 | ++icuCanonStringPtr; | |
1387 | if (*icuCanonStringPtr != 0) { | |
1388 | // the canonicalized string has keywords | |
1389 | // delete the variant tag in the original string (and other trailing '_' or '-') | |
1390 | *variantTag-- = 0; | |
1391 | while (*variantTag == '_') | |
1392 | *variantTag-- = 0; | |
1393 | // delete all of the canonicalized string except the keywords | |
1394 | while (*icuCanonStringPtr != 0) | |
1395 | *varKeyValueStringPtr++ = *icuCanonStringPtr++; | |
1396 | } | |
1397 | *varKeyValueStringPtr = 0; | |
1398 | } | |
1399 | } | |
1400 | ||
1401 | // 4. Handle special cases of updating region codes, or updating language codes based on | |
1402 | // region code. | |
1403 | for (specialCasePtr = specialCases; specialCasePtr->reg1 != NULL; specialCasePtr++) { | |
1404 | if ( specialCasePtr->lang == NULL || _CheckForTag(inLocaleString, specialCasePtr->lang, 2) ) { | |
1405 | // OK, we matched any language specified. Now what needs updating? | |
1406 | char * foundTag; | |
1407 | ||
1408 | if ( isupper(specialCasePtr->update1[0]) ) { | |
1409 | // updating a region code | |
1410 | if ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) { | |
1411 | _CopyReplacementAtPointer(foundTag+1, specialCasePtr->update1); | |
1412 | } | |
1413 | if ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) { | |
1414 | _CopyReplacementAtPointer(regionTag+1, specialCasePtr->update1); | |
1415 | } | |
1416 | ||
1417 | } else { | |
1418 | // updating the language, there will be two choices based on region | |
1419 | if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) || | |
1420 | ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) ) { | |
1421 | _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update1); | |
1422 | } else if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg2 + 1, 2) ) || | |
1423 | ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg2) ) && !isalnum(foundTag[3]) ) ) { | |
1424 | _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update2); | |
1425 | } | |
1426 | } | |
1427 | } | |
1428 | } | |
1429 | ||
1430 | // 5. return pointers if requested. | |
1431 | if (langRegSubtagRef != NULL) { | |
1432 | *langRegSubtagRef = langRegSubtag; | |
1433 | } | |
1434 | if (regionTagRef != NULL) { | |
1435 | *regionTagRef = regionTag; | |
1436 | } | |
1437 | } | |
1438 | ||
1439 | ||
1440 | // _RemoveSubstringsIfPresent | |
1441 | // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) | |
1442 | // substringList is a list of space-separated substrings to strip if found in localeString | |
1443 | static void _RemoveSubstringsIfPresent(char *localeString, const char *substringList) { | |
1444 | while (*substringList != 0) { | |
1445 | char currentSubstring[kLocaleIdentifierCStringMax]; | |
1446 | int substringLength = 0; | |
1447 | char * foundSubstring; | |
1448 | ||
1449 | // copy current substring & get its length | |
1450 | while ( isgraph(*substringList) ) { | |
1451 | currentSubstring[substringLength++] = *substringList++; | |
1452 | } | |
1453 | // move to next substring | |
1454 | while ( isspace(*substringList) ) { | |
1455 | substringList++; | |
1456 | } | |
1457 | ||
1458 | // search for current substring in locale string | |
1459 | if (substringLength == 0) | |
1460 | continue; | |
1461 | currentSubstring[substringLength] = 0; | |
1462 | foundSubstring = strstr(localeString, currentSubstring); | |
1463 | ||
1464 | // if substring is found, delete it | |
1465 | if (foundSubstring) { | |
1466 | _DeleteCharsAtPointer(foundSubstring, substringLength); | |
1467 | } | |
1468 | } | |
1469 | } | |
1470 | ||
1471 | ||
1472 | // _GetKeyValueString // <1.10> | |
1473 | // Removes any key-value string from inLocaleString, puts canonized version in keyValueString | |
1474 | ||
1475 | static void _GetKeyValueString(char inLocaleString[], char keyValueString[]) { | |
1476 | char * inLocalePtr = inLocaleString; | |
1477 | ||
1478 | while (*inLocalePtr != 0 && *inLocalePtr != ULOC_KEYWORD_SEPARATOR) { | |
1479 | inLocalePtr++; | |
1480 | } | |
1481 | if (*inLocalePtr != 0) { // we found a key-value section | |
1482 | char * keyValuePtr = keyValueString; | |
1483 | ||
1484 | *keyValuePtr = *inLocalePtr; | |
1485 | *inLocalePtr = 0; | |
1486 | do { | |
1487 | if ( *(++inLocalePtr) != ' ' ) { | |
1488 | *(++keyValuePtr) = *inLocalePtr; // remove "tolower() for *inLocalePtr" // <1.11> | |
1489 | } | |
1490 | } while (*inLocalePtr != 0); | |
1491 | } else { | |
1492 | keyValueString[0] = 0; | |
1493 | } | |
1494 | } | |
1495 | ||
1496 | static void _AppendKeyValueString(char inLocaleString[], int locStringMaxLen, char keyValueString[]) { | |
1497 | if (keyValueString[0] != 0) { | |
1498 | UErrorCode uerr = U_ZERO_ERROR; | |
1499 | UEnumeration * uenum = uloc_openKeywords(keyValueString, &uerr); | |
1500 | if ( uenum != NULL ) { | |
1501 | const char * keyword; | |
1502 | int32_t length; | |
1503 | char value[ULOC_KEYWORDS_CAPACITY]; // use as max for keyword value | |
1504 | while ( U_SUCCESS(uerr) ) { | |
1505 | keyword = uenum_next(uenum, &length, &uerr); | |
1506 | if ( keyword == NULL ) { | |
1507 | break; | |
1508 | } | |
1509 | length = uloc_getKeywordValue( keyValueString, keyword, value, sizeof(value), &uerr ); | |
1510 | length = uloc_setKeywordValue( keyword, value, inLocaleString, locStringMaxLen, &uerr ); | |
1511 | } | |
1512 | uenum_close(uenum); | |
1513 | } | |
1514 | } | |
1515 | } | |
1516 | ||
cf7d2af9 | 1517 | // __private_extern__ CFStringRef _CFLocaleCreateCanonicalLanguageIdentifierForCFBundle(CFAllocatorRef allocator, CFStringRef localeIdentifier) {} |
bd5b749c A |
1518 | |
1519 | CFStringRef CFLocaleCreateCanonicalLanguageIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) { | |
1520 | char inLocaleString[kLocaleIdentifierCStringMax]; | |
1521 | CFStringRef outStringRef = NULL; | |
1522 | ||
1523 | if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) { | |
1524 | KeyStringToResultString testEntry; | |
1525 | KeyStringToResultString * foundEntry; | |
1526 | char keyValueString[sizeof(inLocaleString)]; // <1.10> | |
1527 | char varKeyValueString[sizeof(inLocaleString)]; // <1.17> | |
1528 | ||
1529 | _GetKeyValueString(inLocaleString, keyValueString); // <1.10> | |
1530 | testEntry.result = NULL; | |
1531 | ||
1532 | // A. First check if input string matches an old-style string that has a replacement | |
1533 | // (do this before case normalization) | |
1534 | testEntry.key = inLocaleString; | |
1535 | foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical, | |
1536 | sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); | |
1537 | if (foundEntry) { | |
1538 | // It does match, so replace old string with new | |
1539 | strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); | |
1540 | varKeyValueString[0] = 0; | |
1541 | } else { | |
1542 | char * langRegSubtag = NULL; | |
1543 | char * regionTag = NULL; | |
1544 | ||
1545 | // B. No match with an old-style string, use input string but update codes, normalize case, etc. | |
1546 | _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, ®ionTag, varKeyValueString); // <1.10><1.17><1.19> | |
1547 | ||
1548 | // if the language part already includes a regional variant, then delete any region tag. <1.19> | |
1549 | if (langRegSubtag && regionTag) | |
1550 | *regionTag = 0; | |
1551 | } | |
1552 | ||
1553 | // C. Now we have an up-to-date locale string, but we need to strip defaults and turn it into a language string | |
1554 | ||
1555 | // 1. Strip defaults in input string based on initial part of locale string | |
1556 | // (mainly to strip default script tag for a language) | |
1557 | testEntry.key = inLocaleString; | |
1558 | foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults, | |
1559 | sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey ); | |
1560 | if (foundEntry) { | |
1561 | // The input string begins with a character sequence for which | |
1562 | // there are default substrings which should be stripped if present | |
1563 | _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); | |
1564 | } | |
1565 | ||
1566 | // 2. If the string matches a locale string used by Apple as a language string, turn it into a language string | |
1567 | testEntry.key = inLocaleString; | |
1568 | foundEntry = (KeyStringToResultString *)bsearch( &testEntry, appleLocaleToLanguageString, kNumAppleLocaleToLanguageString, | |
1569 | sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); | |
1570 | if (foundEntry) { | |
1571 | // it does match | |
1572 | strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); | |
1573 | } else { | |
1574 | // skip to any region tag or java-type variant | |
1575 | char * inLocalePtr = inLocaleString; | |
1576 | while (*inLocalePtr != 0 && *inLocalePtr != '_') { | |
1577 | inLocalePtr++; | |
1578 | } | |
1579 | // if there is still a region tag, turn it into a language variant <1.19> | |
1580 | if (*inLocalePtr == '_') { | |
1581 | // handle 3-digit regions in addition to 2-letter ones | |
1582 | char * regionTag = inLocalePtr++; | |
1583 | long expectedLength = 0; | |
1584 | if ( isalpha(*inLocalePtr) ) { | |
1585 | while ( isalpha(*(++inLocalePtr)) ) | |
1586 | ; | |
1587 | expectedLength = 3; | |
1588 | } else if ( isdigit(*inLocalePtr) ) { | |
1589 | while ( isdigit(*(++inLocalePtr)) ) | |
1590 | ; | |
1591 | expectedLength = 4; | |
1592 | } | |
1593 | *regionTag = (inLocalePtr - regionTag == expectedLength)? '-': 0; | |
1594 | } | |
1595 | // anything else at/after '_' just gets deleted | |
1596 | *inLocalePtr = 0; | |
1597 | } | |
1598 | ||
1599 | // D. Re-append any key-value strings, now canonical // <1.10><1.17> | |
1600 | _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString ); | |
1601 | _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString ); | |
1602 | ||
1603 | // All done, return what we came up with. | |
1604 | outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII); | |
1605 | } | |
1606 | ||
1607 | return outStringRef; | |
1608 | } | |
1609 | ||
1610 | ||
1611 | CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) { | |
1612 | char inLocaleString[kLocaleIdentifierCStringMax]; | |
1613 | CFStringRef outStringRef = NULL; | |
1614 | ||
1615 | if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) { | |
1616 | KeyStringToResultString testEntry; | |
1617 | KeyStringToResultString * foundEntry; | |
1618 | char keyValueString[sizeof(inLocaleString)]; // <1.10> | |
1619 | char varKeyValueString[sizeof(inLocaleString)]; // <1.17> | |
1620 | ||
1621 | _GetKeyValueString(inLocaleString, keyValueString); // <1.10> | |
1622 | testEntry.result = NULL; | |
1623 | ||
1624 | // A. First check if input string matches an old-style Apple string that has a replacement | |
1625 | // (do this before case normalization) | |
1626 | testEntry.key = inLocaleString; | |
1627 | foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical, | |
1628 | sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); | |
1629 | if (foundEntry) { | |
1630 | // It does match, so replace old string with new // <1.10> | |
1631 | strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); | |
1632 | varKeyValueString[0] = 0; | |
1633 | } else { | |
1634 | char * langRegSubtag = NULL; | |
1635 | char * regionTag = NULL; | |
1636 | ||
1637 | // B. No match with an old-style string, use input string but update codes, normalize case, etc. | |
1638 | _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, ®ionTag, varKeyValueString); // <1.10><1.17> | |
1639 | ||
1640 | ||
1641 | // C. Now strip defaults that are implied by other fields. | |
1642 | ||
1643 | // 1. If an ISO 3166 region tag matches an ISO 3166 regional language variant subtag, strip the latter. | |
1644 | if ( langRegSubtag && regionTag && strncmp(langRegSubtag+1, regionTag+1, 2) == 0 ) { | |
1645 | _DeleteCharsAtPointer(langRegSubtag, 3); | |
1646 | } | |
1647 | ||
1648 | // 2. Strip defaults in input string based on final region tag in locale string | |
1649 | // (mainly for Chinese, to strip -Hans for _CN/_SG, -Hant for _TW/_HK/_MO) | |
1650 | if ( regionTag ) { | |
1651 | testEntry.key = regionTag; | |
1652 | foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringRegionToDefaults, kNumLocaleStringRegionToDefaults, | |
1653 | sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); | |
1654 | if (foundEntry) { | |
1655 | _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); | |
1656 | } | |
1657 | } | |
1658 | ||
1659 | // 3. Strip defaults in input string based on initial part of locale string | |
1660 | // (mainly to strip default script tag for a language) | |
1661 | testEntry.key = inLocaleString; | |
1662 | foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults, | |
1663 | sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey ); | |
1664 | if (foundEntry) { | |
1665 | // The input string begins with a character sequence for which | |
1666 | // there are default substrings which should be stripped if present | |
1667 | _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); | |
1668 | } | |
1669 | } | |
1670 | ||
1671 | // D. Re-append any key-value strings, now canonical // <1.10><1.17> | |
1672 | _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString ); | |
1673 | _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString ); | |
1674 | ||
1675 | // Now create the CFString (even if empty!) | |
1676 | outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII); | |
1677 | } | |
1678 | ||
1679 | return outStringRef; | |
1680 | } | |
1681 | ||
1682 | // CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes, based on | |
1683 | // the first part of the SPI CFBundleCopyLocalizationForLocalizationInfo in CFBundle_Resources.c | |
1684 | CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes(CFAllocatorRef allocator, LangCode lcode, RegionCode rcode) { | |
1685 | CFStringRef result = NULL; | |
1686 | if (0 <= rcode && rcode < kNumRegionCodeToLocaleString) { | |
1687 | const char *localeString = regionCodeToLocaleString[rcode]; | |
1688 | if (localeString != NULL && *localeString != '\0') { | |
1689 | result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull); | |
1690 | } | |
1691 | } | |
1692 | if (result) return result; | |
1693 | if (0 <= lcode && lcode < kNumLangCodeToLocaleString) { | |
1694 | const char *localeString = langCodeToLocaleString[lcode]; | |
1695 | if (localeString != NULL && *localeString != '\0') { | |
1696 | result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull); | |
1697 | } | |
1698 | } | |
1699 | return result; | |
1700 | } | |
1701 | ||
1702 | ||
cf7d2af9 A |
1703 | /* |
1704 | SPI: CFLocaleGetLanguageRegionEncodingForLocaleIdentifier gets the appropriate language and region codes, | |
1705 | and the default legacy script code and encoding, for the specified locale (or language) string. | |
1706 | Returns false if CFLocale has no information about the given locale (in which case none of the by-reference return values are set); | |
1707 | otherwise may set *langCode and/or *regCode to -1 if there is no appropriate legacy value for the locale. | |
1708 | This is a replacement for the CFBundle SPI CFBundleGetLocalizationInfoForLocalization (which was intended to be temporary and transitional); | |
1709 | this function is more up-to-date in its handling of locale strings, and is in CFLocale where this functionality should belong. Compared | |
1710 | to CFBundleGetLocalizationInfoForLocalization, this function does not spcially interpret a NULL localeIdentifier to mean use the single most | |
1711 | preferred localization in the current context (this function returns NO for a NULL localeIdentifier); and in this function | |
1712 | langCode, regCode, and scriptCode are all SInt16* (not SInt32* like the equivalent parameters in CFBundleGetLocalizationInfoForLocalization). | |
1713 | */ | |
1714 | static int CompareLocaleToLegacyCodesEntries( const void *entry1, const void *entry2 ); | |
1715 | ||
1716 | Boolean CFLocaleGetLanguageRegionEncodingForLocaleIdentifier(CFStringRef localeIdentifier, LangCode *langCode, RegionCode *regCode, ScriptCode *scriptCode, CFStringEncoding *stringEncoding) { | |
1717 | Boolean returnValue = false; | |
1718 | CFStringRef canonicalIdentifier = CFLocaleCreateCanonicalLocaleIdentifierFromString(NULL, localeIdentifier); | |
1719 | if (canonicalIdentifier) { | |
1720 | char localeCString[kLocaleIdentifierCStringMax]; | |
1721 | if ( CFStringGetCString(canonicalIdentifier, localeCString, sizeof(localeCString), kCFStringEncodingASCII) ) { | |
1722 | UErrorCode icuStatus = U_ZERO_ERROR; | |
1723 | int32_t languagelength; | |
1724 | char searchString[ULOC_LANG_CAPACITY + ULOC_FULLNAME_CAPACITY]; | |
1725 | ||
1726 | languagelength = uloc_getLanguage( localeCString, searchString, ULOC_LANG_CAPACITY, &icuStatus ); | |
1727 | if ( U_SUCCESS(icuStatus) && languagelength > 0 ) { | |
1728 | // OK, here we have at least a language code, check for other components in order | |
1729 | LocaleToLegacyCodes searchEntry = { (const char *)searchString, 0, 0, 0 }; | |
1730 | const LocaleToLegacyCodes * foundEntryPtr; | |
1731 | int32_t componentLength; | |
1732 | char componentString[ULOC_FULLNAME_CAPACITY]; | |
1733 | ||
1734 | languagelength = strlen(searchString); // in case it got truncated | |
1735 | icuStatus = U_ZERO_ERROR; | |
1736 | componentLength = uloc_getScript( localeCString, componentString, sizeof(componentString), &icuStatus ); | |
1737 | if ( U_FAILURE(icuStatus) || componentLength == 0 ) { | |
1738 | icuStatus = U_ZERO_ERROR; | |
1739 | componentLength = uloc_getCountry( localeCString, componentString, sizeof(componentString), &icuStatus ); | |
1740 | if ( U_FAILURE(icuStatus) || componentLength == 0 ) { | |
1741 | icuStatus = U_ZERO_ERROR; | |
1742 | componentLength = uloc_getVariant( localeCString, componentString, sizeof(componentString), &icuStatus ); | |
1743 | if ( U_FAILURE(icuStatus) ) { | |
1744 | componentLength = 0; | |
1745 | } | |
1746 | } | |
1747 | } | |
1748 | ||
1749 | // Append whichever other component we first found | |
1750 | if (componentLength > 0) { | |
1751 | strlcat(searchString, "_", sizeof(searchString)); | |
1752 | strlcat(searchString, componentString, sizeof(searchString)); | |
1753 | } | |
1754 | ||
1755 | // Search | |
1756 | foundEntryPtr = (const LocaleToLegacyCodes *)bsearch( &searchEntry, localeToLegacyCodes, kNumLocaleToLegacyCodes, sizeof(LocaleToLegacyCodes), CompareLocaleToLegacyCodesEntries ); | |
1757 | if (foundEntryPtr == NULL && (int32_t) strlen(searchString) > languagelength) { | |
1758 | // truncate to language al;one and try again | |
1759 | searchString[languagelength] = 0; | |
1760 | foundEntryPtr = (const LocaleToLegacyCodes *)bsearch( &searchEntry, localeToLegacyCodes, kNumLocaleToLegacyCodes, sizeof(LocaleToLegacyCodes), CompareLocaleToLegacyCodesEntries ); | |
1761 | } | |
1762 | ||
1763 | // If found a matching entry, return requested values | |
1764 | if (foundEntryPtr) { | |
1765 | returnValue = true; | |
1766 | if (langCode) *langCode = foundEntryPtr->langCode; | |
1767 | if (regCode) *regCode = foundEntryPtr->regCode; | |
1768 | if (stringEncoding) *stringEncoding = foundEntryPtr->encoding; | |
1769 | if (scriptCode) { | |
1770 | // map CFStringEncoding to ScriptCode | |
1771 | if (foundEntryPtr->encoding < 33/*kCFStringEncodingMacSymbol*/) { | |
1772 | *scriptCode = foundEntryPtr->encoding; | |
1773 | } else { | |
1774 | switch (foundEntryPtr->encoding) { | |
1775 | case 0x8C/*kCFStringEncodingMacFarsi*/: *scriptCode = 4/*smArabic*/; break; | |
1776 | case 0x98/*kCFStringEncodingMacUkrainian*/: *scriptCode = 7/*smCyrillic*/; break; | |
1777 | case 0xEC/*kCFStringEncodingMacInuit*/: *scriptCode = 28/*smEthiopic*/; break; | |
1778 | case 0xFC/*kCFStringEncodingMacVT100*/: *scriptCode = 32/*smUninterp*/; break; | |
1779 | default: *scriptCode = 0/*smRoman*/; break; | |
1780 | } | |
1781 | } | |
1782 | } | |
1783 | } | |
1784 | } | |
1785 | } | |
1786 | CFRelease(canonicalIdentifier); | |
1787 | } | |
1788 | return returnValue; | |
1789 | } | |
1790 | ||
1791 | static int CompareLocaleToLegacyCodesEntries( const void *entry1, const void *entry2 ) { | |
1792 | const char * localeString1 = ((const LocaleToLegacyCodes *)entry1)->locale; | |
1793 | const char * localeString2 = ((const LocaleToLegacyCodes *)entry2)->locale; | |
1794 | return strcmp(localeString1, localeString2); | |
1795 | } | |
1796 | ||
1797 | ||
bd5b749c A |
1798 | CFDictionaryRef CFLocaleCreateComponentsFromLocaleIdentifier(CFAllocatorRef allocator, CFStringRef localeID) { |
1799 | char cLocaleID[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY]; | |
1800 | char buffer[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY]; | |
1801 | CFMutableDictionaryRef working = CFDictionaryCreateMutable(allocator, 10, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); | |
1802 | ||
1803 | UErrorCode icuStatus = U_ZERO_ERROR; | |
1804 | int32_t length = 0; | |
1805 | ||
1806 | // Extract the C string locale ID, for ICU | |
1807 | CFIndex outBytes = 0; | |
1808 | CFStringGetBytes(localeID, CFRangeMake(0, CFStringGetLength(localeID)), kCFStringEncodingASCII, (UInt8) '?', true, (unsigned char *)cLocaleID, sizeof(cLocaleID)/sizeof(char) - 1, &outBytes); | |
1809 | cLocaleID[outBytes] = '\0'; | |
1810 | ||
1811 | // Get the components | |
1812 | length = uloc_getLanguage(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); | |
1813 | if (U_SUCCESS(icuStatus) && length > 0) | |
1814 | { | |
1815 | CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); | |
cf7d2af9 | 1816 | CFDictionaryAddValue(working, kCFLocaleLanguageCodeKey, string); |
bd5b749c A |
1817 | CFRelease(string); |
1818 | } | |
1819 | icuStatus = U_ZERO_ERROR; | |
1820 | ||
1821 | length = uloc_getScript(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); | |
1822 | if (U_SUCCESS(icuStatus) && length > 0) | |
1823 | { | |
1824 | CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); | |
cf7d2af9 | 1825 | CFDictionaryAddValue(working, kCFLocaleScriptCodeKey, string); |
bd5b749c A |
1826 | CFRelease(string); |
1827 | } | |
1828 | icuStatus = U_ZERO_ERROR; | |
1829 | ||
1830 | length = uloc_getCountry(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); | |
1831 | if (U_SUCCESS(icuStatus) && length > 0) | |
1832 | { | |
1833 | CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); | |
cf7d2af9 | 1834 | CFDictionaryAddValue(working, kCFLocaleCountryCodeKey, string); |
bd5b749c A |
1835 | CFRelease(string); |
1836 | } | |
1837 | icuStatus = U_ZERO_ERROR; | |
1838 | ||
1839 | length = uloc_getVariant(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); | |
1840 | if (U_SUCCESS(icuStatus) && length > 0) | |
1841 | { | |
1842 | CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); | |
cf7d2af9 | 1843 | CFDictionaryAddValue(working, kCFLocaleVariantCodeKey, string); |
bd5b749c A |
1844 | CFRelease(string); |
1845 | } | |
1846 | icuStatus = U_ZERO_ERROR; | |
1847 | ||
1848 | // Now get the keywords; open an enumerator on them | |
1849 | UEnumeration *iter = uloc_openKeywords(cLocaleID, &icuStatus); | |
1850 | const char *locKey = NULL; | |
1851 | int32_t locKeyLen = 0; | |
1852 | while ((locKey = uenum_next(iter, &locKeyLen, &icuStatus)) && U_SUCCESS(icuStatus)) | |
1853 | { | |
1854 | char locValue[ULOC_KEYWORD_AND_VALUES_CAPACITY]; | |
1855 | ||
1856 | // Get the value for this keyword | |
1857 | if (uloc_getKeywordValue(cLocaleID, locKey, locValue, sizeof(locValue)/sizeof(char), &icuStatus) > 0 | |
1858 | && U_SUCCESS(icuStatus)) | |
1859 | { | |
1860 | CFStringRef key = CFStringCreateWithBytes(allocator, (UInt8 *)locKey, strlen(locKey), kCFStringEncodingASCII, true); | |
1861 | CFStringRef value = CFStringCreateWithBytes(allocator, (UInt8 *)locValue, strlen(locValue), kCFStringEncodingASCII, true); | |
1862 | if (key && value) | |
1863 | CFDictionaryAddValue(working, key, value); | |
1864 | if (key) | |
1865 | CFRelease(key); | |
1866 | if (value) | |
1867 | CFRelease(value); | |
1868 | } | |
1869 | } | |
1870 | uenum_close(iter); | |
1871 | ||
1872 | // Convert to an immutable dictionary and return | |
1873 | CFDictionaryRef result = CFDictionaryCreateCopy(allocator, working); | |
1874 | CFRelease(working); | |
1875 | return result; | |
1876 | } | |
1877 | ||
cf7d2af9 A |
1878 | static char *__CStringFromString(CFStringRef str) { |
1879 | if (!str) return NULL; | |
1880 | CFRange rg = CFRangeMake(0, CFStringGetLength(str)); | |
1881 | CFIndex neededLength = 0; | |
1882 | CFStringGetBytes(str, rg, kCFStringEncodingASCII, (UInt8)'?', false, NULL, 0, &neededLength); | |
1883 | char *buf = (char *)malloc(neededLength + 1); | |
1884 | CFStringGetBytes(str, rg, kCFStringEncodingASCII, (UInt8)'?', false, (uint8_t *)buf, neededLength, &neededLength); | |
1885 | buf[neededLength] = '\0'; | |
1886 | return buf; | |
bd5b749c A |
1887 | } |
1888 | ||
1889 | CFStringRef CFLocaleCreateLocaleIdentifierFromComponents(CFAllocatorRef allocator, CFDictionaryRef dictionary) { | |
cf7d2af9 A |
1890 | CFIndex cnt = CFDictionaryGetCount(dictionary); |
1891 | STACK_BUFFER_DECL(CFStringRef, values, cnt); | |
1892 | STACK_BUFFER_DECL(CFStringRef, keys, cnt); | |
1893 | CFDictionaryGetKeysAndValues(dictionary, (const void **)keys, (const void **)values); | |
1894 | ||
1895 | char *language = NULL, *script = NULL, *country = NULL, *variant = NULL; | |
1896 | for (CFIndex idx = 0; idx < cnt; idx++) { | |
1897 | if (CFEqual(kCFLocaleLanguageCodeKey, keys[idx])) { | |
1898 | language = __CStringFromString(values[idx]); | |
1899 | keys[idx] = NULL; | |
1900 | } else if (CFEqual(kCFLocaleScriptCodeKey, keys[idx])) { | |
1901 | script = __CStringFromString(values[idx]); | |
1902 | keys[idx] = NULL; | |
1903 | } else if (CFEqual(kCFLocaleCountryCodeKey, keys[idx])) { | |
1904 | country = __CStringFromString(values[idx]); | |
1905 | keys[idx] = NULL; | |
1906 | } else if (CFEqual(kCFLocaleVariantCodeKey, keys[idx])) { | |
1907 | variant = __CStringFromString(values[idx]); | |
1908 | keys[idx] = NULL; | |
1909 | } | |
bd5b749c A |
1910 | } |
1911 | ||
cf7d2af9 A |
1912 | char *buf1 = NULL; // (|L)(|_S)(|_C|_C_V|__V) |
1913 | asprintf(&buf1, "%s%s%s%s%s%s%s", language ? language : "", script ? "_" : "", script ? script : "", (country || variant ? "_" : ""), country ? country : "", variant ? "_" : "", variant ? variant : ""); | |
1914 | ||
1915 | char cLocaleID[2 * ULOC_FULLNAME_CAPACITY + 2 * ULOC_KEYWORD_AND_VALUES_CAPACITY]; | |
1916 | strlcpy(cLocaleID, buf1, sizeof(cLocaleID)); | |
1917 | free(language); | |
1918 | free(script); | |
1919 | free(country); | |
1920 | free(variant); | |
1921 | free(buf1); | |
1922 | ||
1923 | for (CFIndex idx = 0; idx < cnt; idx++) { | |
1924 | if (keys[idx]) { | |
1925 | char *key = __CStringFromString(keys[idx]); | |
8ca704e1 A |
1926 | char *value; |
1927 | if (0 == strcmp(key, "kCFLocaleCalendarKey")) { | |
1928 | // For interchangeability convenience, we alternatively allow a | |
1929 | // calendar object to be passed in, with the alternate key, and | |
1930 | // we'll extract the identifier. | |
1931 | CFCalendarRef cal = (CFCalendarRef)values[idx]; | |
1932 | CFStringRef ident = CFCalendarGetIdentifier(cal); | |
1933 | value = __CStringFromString(ident); | |
1934 | char *oldkey = key; | |
1935 | key = strdup("calendar"); | |
1936 | free(oldkey); | |
1937 | } else { | |
1938 | value = __CStringFromString(values[idx]); | |
1939 | } | |
cf7d2af9 A |
1940 | UErrorCode status = U_ZERO_ERROR; |
1941 | uloc_setKeywordValue(key, value, cLocaleID, sizeof(cLocaleID), &status); | |
1942 | free(key); | |
1943 | free(value); | |
1944 | } | |
bd5b749c | 1945 | } |
cf7d2af9 A |
1946 | |
1947 | return CFStringCreateWithCString(allocator, cLocaleID, kCFStringEncodingASCII); | |
bd5b749c A |
1948 | } |
1949 |