]>
Commit | Line | Data |
---|---|---|
c958260b VS |
1 | ///////////////////////////////////////////////////////////////////////////// |
2 | // Name: encconv.cpp | |
3 | // Purpose: wxEncodingConverter class for converting between different | |
4 | // font encodings | |
5 | // Author: Vaclav Slavik | |
6 | // Copyright: (c) 1999 Vaclav Slavik | |
7 | // Licence: wxWindows Licence | |
8 | ///////////////////////////////////////////////////////////////////////////// | |
9 | ||
10 | #ifdef __GNUG__ | |
11 | #pragma implementation "encconv.h" | |
12 | #endif | |
13 | ||
14 | // For compilers that support precompilation, includes "wx.h". | |
15 | #include "wx/wxprec.h" | |
16 | ||
17 | #ifdef __BORLANDC__ | |
18 | #pragma hdrstop | |
19 | #endif | |
20 | ||
1e6feb95 VZ |
21 | #if wxUSE_FONTMAP |
22 | ||
c958260b VS |
23 | #include "wx/encconv.h" |
24 | ||
25 | #include <stdlib.h> | |
26 | ||
94fc5183 VS |
27 | // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl: |
28 | #ifdef __BORLANDC__ | |
29 | #include "../common/unictabl.inc" | |
30 | #else | |
03424b1b | 31 | #include "unictabl.inc" |
94fc5183 | 32 | #endif |
c958260b | 33 | |
f6bcfd97 BP |
34 | #if wxUSE_WCHAR_T |
35 | typedef wchar_t tchar; | |
36 | #else | |
37 | typedef char tchar; | |
38 | #endif | |
c958260b | 39 | |
1c193821 JS |
40 | #ifdef __WXWINCE__ |
41 | #undef LINKAGEMODE | |
42 | #define LINKAGEMODE __cdecl | |
43 | #endif | |
44 | ||
eda22ec3 | 45 | static wxUint16* LINKAGEMODE GetEncTable(wxFontEncoding enc) |
c958260b VS |
46 | { |
47 | for (int i = 0; encodings_list[i].table != NULL; i++) | |
48 | { | |
03424b1b | 49 | if (encodings_list[i].encoding == enc) |
c958260b VS |
50 | return encodings_list[i].table; |
51 | } | |
52 | return NULL; | |
53 | } | |
54 | ||
55 | typedef struct { | |
56 | wxUint16 u; | |
57 | wxUint8 c; | |
58 | } CharsetItem; | |
59 | ||
90350682 | 60 | extern "C" int LINKAGEMODE CompareCharsetItems(const void *i1, const void *i2) |
c958260b VS |
61 | { |
62 | return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u ); | |
63 | } | |
64 | ||
65 | ||
eda22ec3 | 66 | static CharsetItem* LINKAGEMODE BuildReverseTable(wxUint16 *tbl) |
c958260b VS |
67 | { |
68 | CharsetItem *rev = new CharsetItem[128]; | |
03424b1b | 69 | |
c958260b VS |
70 | for (int i = 0; i < 128; i++) |
71 | rev[i].c = 128 + i, rev[i].u = tbl[i]; | |
72 | ||
73 | qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems); | |
03424b1b | 74 | |
c958260b VS |
75 | return rev; |
76 | } | |
77 | ||
78 | ||
79 | ||
80 | wxEncodingConverter::wxEncodingConverter() | |
81 | { | |
82 | m_Table = NULL; | |
47e55c2f | 83 | m_UnicodeInput = m_UnicodeOutput = FALSE; |
c958260b VS |
84 | m_JustCopy = FALSE; |
85 | } | |
86 | ||
87 | ||
88 | ||
89 | bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method) | |
90 | { | |
91 | unsigned i; | |
92 | wxUint16 *in_tbl = NULL, *out_tbl = NULL; | |
93 | ||
94 | if (m_Table) {delete[] m_Table; m_Table = NULL;} | |
95 | ||
f6bcfd97 | 96 | #if !wxUSE_WCHAR_T |
c958260b VS |
97 | if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return FALSE; |
98 | #endif | |
99 | ||
100 | if (input_enc == output_enc) {m_JustCopy = TRUE; return TRUE;} | |
03424b1b | 101 | |
47e55c2f | 102 | m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE); |
c958260b | 103 | m_JustCopy = FALSE; |
03424b1b | 104 | |
c958260b VS |
105 | if (input_enc == wxFONTENCODING_UNICODE) |
106 | { | |
107 | if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE; | |
108 | ||
f6bcfd97 BP |
109 | m_Table = new tchar[65536]; |
110 | for (i = 0; i < 128; i++) m_Table[i] = (tchar)i; // 7bit ASCII | |
03424b1b | 111 | for (i = 128; i < 65536; i++) m_Table[i] = (tchar)'?'; |
c958260b VS |
112 | // FIXME - this should be character that means `unicode to charset' impossible, not '?' |
113 | ||
114 | if (method == wxCONVERT_SUBSTITUTE) | |
115 | { | |
116 | for (i = 0; i < encoding_unicode_fallback_count; i++) | |
f6bcfd97 | 117 | m_Table[encoding_unicode_fallback[i].c] = (tchar) encoding_unicode_fallback[i].s; |
c958260b VS |
118 | } |
119 | ||
120 | for (i = 0; i < 128; i++) | |
f6bcfd97 | 121 | m_Table[out_tbl[i]] = (tchar)(128 + i); |
c958260b VS |
122 | |
123 | m_UnicodeInput = TRUE; | |
124 | return TRUE; | |
125 | } | |
03424b1b | 126 | |
c958260b VS |
127 | else |
128 | { | |
129 | if ((in_tbl = GetEncTable(input_enc)) == NULL) return FALSE; | |
130 | if (output_enc != wxFONTENCODING_UNICODE) | |
131 | if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE; | |
132 | ||
133 | m_UnicodeInput = FALSE; | |
03424b1b | 134 | |
f6bcfd97 BP |
135 | m_Table = new tchar[256]; |
136 | for (i = 0; i < 128; i++) m_Table[i] = (tchar)i; // 7bit ASCII | |
03424b1b | 137 | |
c958260b VS |
138 | if (output_enc == wxFONTENCODING_UNICODE) |
139 | { | |
f6bcfd97 | 140 | for (i = 0; i < 128; i++) m_Table[128 + i] = (tchar)in_tbl[i]; |
c958260b VS |
141 | return TRUE; |
142 | } | |
1c193821 JS |
143 | // FIXME: write a substitute for bsearch |
144 | #ifndef __WXWINCE__ | |
03424b1b | 145 | else |
c958260b VS |
146 | { |
147 | CharsetItem *rev = BuildReverseTable(out_tbl); | |
33ac7e6f KB |
148 | CharsetItem *item; |
149 | CharsetItem key; | |
03424b1b VZ |
150 | |
151 | for (i = 0; i < 128; i++) | |
c958260b VS |
152 | { |
153 | key.u = in_tbl[i]; | |
154 | item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems); | |
155 | if (item == NULL && method == wxCONVERT_SUBSTITUTE) | |
03424b1b | 156 | item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback, |
c958260b VS |
157 | encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems); |
158 | if (item) | |
f6bcfd97 | 159 | m_Table[128 + i] = (tchar)item -> c; |
c958260b | 160 | else |
33ac7e6f KB |
161 | #if wxUSE_WCHAR_T |
162 | m_Table[128 + i] = (wchar_t)(128 + i); | |
163 | #else | |
164 | m_Table[128 + i] = (char)(128 + i); | |
307fd956 | 165 | #endif |
c958260b | 166 | } |
03424b1b | 167 | |
c958260b VS |
168 | delete[] rev; |
169 | return TRUE; | |
170 | } | |
1c193821 JS |
171 | #endif |
172 | return TRUE; | |
c958260b VS |
173 | } |
174 | } | |
175 | ||
176 | ||
47e55c2f | 177 | |
f6bcfd97 | 178 | void wxEncodingConverter::Convert(const char* input, char* output) |
c958260b | 179 | { |
f6bcfd97 BP |
180 | wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!")); |
181 | wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!")); | |
182 | ||
183 | const char *i; | |
184 | char *o; | |
185 | ||
c958260b VS |
186 | if (m_JustCopy) |
187 | { | |
f6bcfd97 | 188 | strcpy(output, input); |
c958260b VS |
189 | return; |
190 | } | |
03424b1b | 191 | |
307fd956 | 192 | wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!")); |
03424b1b | 193 | |
f6bcfd97 BP |
194 | for (i = input, o = output; *i != 0;) |
195 | *(o++) = (char)(m_Table[(wxUint8)*(i++)]); | |
c958260b VS |
196 | *o = 0; |
197 | } | |
198 | ||
199 | ||
f6bcfd97 | 200 | #if wxUSE_WCHAR_T |
47e55c2f | 201 | |
f6bcfd97 | 202 | void wxEncodingConverter::Convert(const char* input, wchar_t* output) |
47e55c2f | 203 | { |
f6bcfd97 | 204 | wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!")); |
47e55c2f VS |
205 | wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!")); |
206 | ||
207 | const char *i; | |
f6bcfd97 | 208 | wchar_t *o; |
47e55c2f VS |
209 | |
210 | if (m_JustCopy) | |
211 | { | |
212 | for (i = input, o = output; *i != 0;) | |
f6bcfd97 | 213 | *(o++) = (wchar_t)(*(i++)); |
47e55c2f VS |
214 | *o = 0; |
215 | return; | |
216 | } | |
03424b1b | 217 | |
307fd956 | 218 | wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!")); |
03424b1b | 219 | |
47e55c2f | 220 | for (i = input, o = output; *i != 0;) |
f6bcfd97 | 221 | *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]); |
47e55c2f VS |
222 | *o = 0; |
223 | } | |
224 | ||
225 | ||
226 | ||
f6bcfd97 | 227 | void wxEncodingConverter::Convert(const wchar_t* input, char* output) |
47e55c2f VS |
228 | { |
229 | wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!")); | |
f6bcfd97 | 230 | wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!")); |
47e55c2f | 231 | |
f6bcfd97 | 232 | const wchar_t *i; |
47e55c2f VS |
233 | char *o; |
234 | ||
235 | if (m_JustCopy) | |
236 | { | |
237 | for (i = input, o = output; *i != 0;) | |
238 | *(o++) = (char)(*(i++)); | |
239 | *o = 0; | |
240 | return; | |
241 | } | |
03424b1b | 242 | |
307fd956 | 243 | wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!")); |
03424b1b | 244 | |
f6bcfd97 BP |
245 | for (i = input, o = output; *i != 0;) |
246 | *(o++) = (char)(m_Table[(wxUint16)*(i++)]); | |
47e55c2f VS |
247 | *o = 0; |
248 | } | |
249 | ||
250 | ||
251 | ||
f6bcfd97 | 252 | void wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output) |
47e55c2f | 253 | { |
f6bcfd97 BP |
254 | wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!")); |
255 | wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!")); | |
47e55c2f | 256 | |
f6bcfd97 BP |
257 | const wchar_t *i; |
258 | wchar_t *o; | |
47e55c2f VS |
259 | |
260 | if (m_JustCopy) | |
261 | { | |
f6bcfd97 BP |
262 | // wcscpy() is not guaranteed to exist |
263 | for (i = input, o = output; *i != 0;) | |
264 | *(o++) = (*(i++)); | |
265 | *o = 0; | |
47e55c2f VS |
266 | return; |
267 | } | |
03424b1b | 268 | |
307fd956 | 269 | wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!")); |
03424b1b | 270 | |
47e55c2f | 271 | for (i = input, o = output; *i != 0;) |
f6bcfd97 | 272 | *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]); |
47e55c2f VS |
273 | *o = 0; |
274 | } | |
275 | ||
f6bcfd97 | 276 | #endif // wxUSE_WCHAR_T |
47e55c2f | 277 | |
c958260b VS |
278 | |
279 | wxString wxEncodingConverter::Convert(const wxString& input) | |
280 | { | |
281 | if (m_JustCopy) return input; | |
03424b1b | 282 | |
c958260b VS |
283 | wxString s; |
284 | const wxChar *i; | |
03424b1b | 285 | |
4ccae30a VZ |
286 | wxCHECK_MSG(m_Table != NULL, s, |
287 | wxT("You must call wxEncodingConverter::Init() before actually converting!")); | |
03424b1b | 288 | |
c958260b | 289 | if (m_UnicodeInput) |
307fd956 | 290 | { |
c958260b VS |
291 | for (i = input.c_str(); *i != 0; i++) |
292 | s << (wxChar)(m_Table[(wxUint16)*i]); | |
307fd956 | 293 | } |
c958260b | 294 | else |
307fd956 | 295 | { |
c958260b VS |
296 | for (i = input.c_str(); *i != 0; i++) |
297 | s << (wxChar)(m_Table[(wxUint8)*i]); | |
307fd956 VZ |
298 | } |
299 | ||
c958260b VS |
300 | return s; |
301 | } | |
302 | ||
303 | ||
304 | ||
305 | ||
47e55c2f VS |
306 | |
307 | ||
308 | ||
c958260b | 309 | // Following tables describe classes of encoding equivalence. |
03424b1b | 310 | // |
c958260b VS |
311 | |
312 | #define STOP wxFONTENCODING_SYSTEM | |
313 | ||
314 | #define NUM_OF_PLATFORMS 4 /*must conform to enum wxPLATFORM_XXXX !!!*/ | |
617eb021 | 315 | #define ENC_PER_PLATFORM 5 |
c958260b | 316 | // max no. of encodings for one language used on one platform |
617eb021 | 317 | // Anybody thinks 5 is not enough? ;-) |
c958260b | 318 | |
03424b1b | 319 | static wxFontEncoding |
c958260b VS |
320 | EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = { |
321 | ||
47e55c2f VS |
322 | // *** Please put more common encodings as first! *** |
323 | ||
03424b1b | 324 | // Western European |
c958260b VS |
325 | { |
326 | /* unix */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP}, | |
327 | /* windows */ {wxFONTENCODING_CP1252, STOP}, | |
328 | /* os2 */ {STOP}, | |
03424b1b | 329 | /* mac */ {STOP} |
c958260b VS |
330 | }, |
331 | ||
47e55c2f | 332 | // Central European |
c958260b VS |
333 | { |
334 | /* unix */ {wxFONTENCODING_ISO8859_2, STOP}, | |
335 | /* windows */ {wxFONTENCODING_CP1250, STOP}, | |
336 | /* os2 */ {STOP}, | |
03424b1b | 337 | /* mac */ {STOP} |
c958260b | 338 | }, |
03424b1b | 339 | |
47e55c2f VS |
340 | // Baltic |
341 | { | |
03424b1b | 342 | /* unix */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP}, |
47e55c2f VS |
343 | /* windows */ {wxFONTENCODING_CP1257, STOP}, |
344 | /* os2 */ {STOP}, | |
03424b1b | 345 | /* mac */ {STOP} |
47e55c2f VS |
346 | }, |
347 | ||
348 | // Hebrew | |
349 | { | |
350 | /* unix */ {wxFONTENCODING_ISO8859_8, STOP}, | |
351 | /* windows */ {wxFONTENCODING_CP1255, STOP}, | |
352 | /* os2 */ {STOP}, | |
03424b1b | 353 | /* mac */ {STOP} |
47e55c2f VS |
354 | }, |
355 | ||
356 | // Greek | |
357 | { | |
358 | /* unix */ {wxFONTENCODING_ISO8859_7, STOP}, | |
359 | /* windows */ {wxFONTENCODING_CP1253, STOP}, | |
360 | /* os2 */ {STOP}, | |
03424b1b | 361 | /* mac */ {STOP} |
47e55c2f VS |
362 | }, |
363 | ||
364 | // Arabic | |
365 | { | |
366 | /* unix */ {wxFONTENCODING_ISO8859_6, STOP}, | |
367 | /* windows */ {wxFONTENCODING_CP1256, STOP}, | |
368 | /* os2 */ {STOP}, | |
03424b1b | 369 | /* mac */ {STOP} |
47e55c2f VS |
370 | }, |
371 | ||
372 | // Turkish | |
373 | { | |
374 | /* unix */ {wxFONTENCODING_ISO8859_9, STOP}, | |
375 | /* windows */ {wxFONTENCODING_CP1254, STOP}, | |
376 | /* os2 */ {STOP}, | |
03424b1b | 377 | /* mac */ {STOP} |
47e55c2f VS |
378 | }, |
379 | ||
380 | // Cyrillic | |
3b61656e VS |
381 | { |
382 | /* unix */ {wxFONTENCODING_KOI8, wxFONTENCODING_ISO8859_5, STOP}, | |
47e55c2f VS |
383 | /* windows */ {wxFONTENCODING_CP1251, STOP}, |
384 | /* os2 */ {STOP}, | |
03424b1b | 385 | /* mac */ {STOP} |
47e55c2f | 386 | }, |
c958260b VS |
387 | |
388 | {{STOP},{STOP},{STOP},{STOP}} /* Terminator */ | |
389 | /* no, _not_ Arnold! */ | |
390 | }; | |
391 | ||
392 | ||
df5168c4 MB |
393 | static bool FindEncoding(const wxFontEncodingArray& arr, wxFontEncoding f) |
394 | { | |
395 | for (wxFontEncodingArray::const_iterator it = arr.begin(), en = arr.end(); | |
396 | it != en; ++it) | |
397 | if (*it == f) | |
398 | return true; | |
399 | return false; | |
400 | } | |
c958260b VS |
401 | |
402 | wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform) | |
403 | { | |
404 | if (platform == wxPLATFORM_CURRENT) | |
405 | { | |
406 | #if defined(__WXMSW__) | |
407 | platform = wxPLATFORM_WINDOWS; | |
408 | #elif defined(__WXGTK__) || defined(__WXMOTIF__) | |
409 | platform = wxPLATFORM_UNIX; | |
410 | #elif defined(__WXOS2__) | |
411 | platform = wxPLATFORM_OS2; | |
412 | #elif defined(__WXMAC__) | |
413 | platform = wxPLATFORM_MAC; | |
414 | #endif | |
415 | } | |
03424b1b | 416 | |
c958260b VS |
417 | int i, clas, e ; |
418 | wxFontEncoding *f; | |
419 | wxFontEncodingArray arr; | |
420 | ||
421 | clas = 0; | |
422 | while (EquivalentEncodings[clas][0][0] != STOP) | |
423 | { | |
424 | for (i = 0; i < NUM_OF_PLATFORMS; i++) | |
425 | for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++) | |
426 | if (EquivalentEncodings[clas][i][e] == enc) | |
427 | { | |
47e55c2f | 428 | for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++) |
df5168c4 | 429 | if (*f == enc) arr.push_back(enc); |
47e55c2f | 430 | for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++) |
df5168c4 | 431 | if (!FindEncoding(arr, *f)) arr.push_back(*f); |
03424b1b | 432 | i = NUM_OF_PLATFORMS/*hack*/; break; |
c958260b VS |
433 | } |
434 | clas++; | |
435 | } | |
03424b1b | 436 | |
c958260b VS |
437 | return arr; |
438 | } | |
439 | ||
440 | ||
441 | ||
442 | wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc) | |
443 | { | |
444 | int i, clas, e, j ; | |
445 | wxFontEncoding *f; | |
446 | wxFontEncodingArray arr; | |
03424b1b | 447 | |
47e55c2f | 448 | arr = GetPlatformEquivalents(enc); // we want them to be first items in array |
c958260b VS |
449 | |
450 | clas = 0; | |
451 | while (EquivalentEncodings[clas][0][0] != STOP) | |
452 | { | |
453 | for (i = 0; i < NUM_OF_PLATFORMS; i++) | |
454 | for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++) | |
455 | if (EquivalentEncodings[clas][i][e] == enc) | |
456 | { | |
457 | for (j = 0; j < NUM_OF_PLATFORMS; j++) | |
03424b1b | 458 | for (f = EquivalentEncodings[clas][j]; *f != STOP; f++) |
df5168c4 | 459 | if (!FindEncoding(arr, *f)) arr.push_back(*f); |
03424b1b | 460 | i = NUM_OF_PLATFORMS/*hack*/; break; |
c958260b VS |
461 | } |
462 | clas++; | |
463 | } | |
03424b1b | 464 | |
c958260b VS |
465 | return arr; |
466 | } | |
1e6feb95 VZ |
467 | |
468 | #endif // wxUSE_FONTMAP |