]> git.saurik.com Git - wxWidgets.git/blame - src/common/encconv.cpp
Large image-loading speedup and small attribute-loading speedup
[wxWidgets.git] / src / common / encconv.cpp
CommitLineData
c958260b
VS
1/////////////////////////////////////////////////////////////////////////////
2// Name: encconv.cpp
3// Purpose: wxEncodingConverter class for converting between different
4// font encodings
5// Author: Vaclav Slavik
6// Copyright: (c) 1999 Vaclav Slavik
65571936 7// Licence: wxWindows licence
c958260b
VS
8/////////////////////////////////////////////////////////////////////////////
9
c958260b
VS
10// For compilers that support precompilation, includes "wx.h".
11#include "wx/wxprec.h"
12
13#ifdef __BORLANDC__
14 #pragma hdrstop
15#endif
16
17#include "wx/encconv.h"
18
19#include <stdlib.h>
20
94fc5183 21// conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
8f9c25cc 22#if defined( __BORLANDC__ ) || defined(__DARWIN__)
0f8d6461 23 #include "../common/unictabl.inc"
94fc5183 24#else
0f8d6461 25 #include "unictabl.inc"
94fc5183 26#endif
c958260b 27
788a28b4 28#ifdef __WXMAC__
c933e267 29 #include "wx/osx/core/cfstring.h"
e84f7290 30 #include <CoreFoundation/CFStringEncodingExt.h>
788a28b4 31
0f8d6461
VZ
32 wxUint16 gMacEncodings[wxFONTENCODING_MACMAX-wxFONTENCODING_MACMIN+1][128] ;
33 bool gMacEncodingsInited[wxFONTENCODING_MACMAX-wxFONTENCODING_MACMIN+1] ;
34#endif
788a28b4 35
0f8d6461
VZ
36#ifdef __WXWINCE__
37 #include "wx/msw/wince/missing.h" // for bsearch()
788a28b4
SC
38#endif
39
cd1a22d1 40static const wxUint16* GetEncTable(wxFontEncoding enc)
c958260b 41{
788a28b4 42#ifdef __WXMAC__
3af5821c 43 if( enc >= wxFONTENCODING_MACMIN && enc <= wxFONTENCODING_MACMAX )
788a28b4 44 {
3af5821c
SC
45 int i = enc-wxFONTENCODING_MACMIN ;
46 if ( gMacEncodingsInited[i] == false )
788a28b4 47 {
2ad4f89f 48 // create
e84f7290
SC
49 CFStringEncoding cfencoding = wxMacGetSystemEncFromFontEnc( enc ) ;
50 if( !CFStringIsEncodingAvailable( cfencoding ) )
51 return NULL;
2ad4f89f 52
e84f7290
SC
53 memset( gMacEncodings[i] , 0 , 128 * 2 );
54 char s[2] = { 0 , 0 };
55 CFRange firstchar = CFRangeMake( 0, 1 );
1a18887b
WS
56 for( unsigned char c = 255 ; c >= 128 ; --c )
57 {
58 s[0] = c ;
e84f7290
SC
59 wxCFStringRef cfref( CFStringCreateWithCStringNoCopy( NULL, s, cfencoding , kCFAllocatorNull ) );
60 CFStringGetCharacters( cfref, firstchar, (UniChar*) &gMacEncodings[i][c-128] );
1a18887b 61 }
1a18887b 62 gMacEncodingsInited[i]=true;
788a28b4 63 }
3af5821c 64 return gMacEncodings[i] ;
788a28b4
SC
65 }
66#endif
67
c958260b
VS
68 for (int i = 0; encodings_list[i].table != NULL; i++)
69 {
03424b1b 70 if (encodings_list[i].encoding == enc)
c958260b
VS
71 return encodings_list[i].table;
72 }
73 return NULL;
74}
75
76typedef struct {
77 wxUint16 u;
78 wxUint8 c;
79} CharsetItem;
80
2ad4f89f
FM
81extern "C"
82{
83static int wxCMPFUNC_CONV
0f8d6461 84CompareCharsetItems(const void *i1, const void *i2)
c958260b
VS
85{
86 return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
87}
2ad4f89f 88}
c958260b 89
cd1a22d1 90static CharsetItem* BuildReverseTable(const wxUint16 *tbl)
c958260b
VS
91{
92 CharsetItem *rev = new CharsetItem[128];
03424b1b 93
c958260b 94 for (int i = 0; i < 128; i++)
0203c8cd 95 rev[i].c = wxUint8(128 + i), rev[i].u = tbl[i];
c958260b
VS
96
97 qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
03424b1b 98
c958260b
VS
99 return rev;
100}
101
102
103
104wxEncodingConverter::wxEncodingConverter()
105{
106 m_Table = NULL;
1a18887b
WS
107 m_UnicodeInput = m_UnicodeOutput = false;
108 m_JustCopy = false;
c958260b
VS
109}
110
111
112
113bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
114{
115 unsigned i;
cd1a22d1
MR
116 const wxUint16 *in_tbl;
117 const wxUint16 *out_tbl = NULL;
c958260b 118
5276b0a5 119 wxDELETEA(m_Table);
c958260b 120
1a18887b 121 if (input_enc == output_enc) {m_JustCopy = true; return true;}
03424b1b 122
47e55c2f 123 m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
1a18887b 124 m_JustCopy = false;
03424b1b 125
c958260b
VS
126 if (input_enc == wxFONTENCODING_UNICODE)
127 {
1a18887b 128 if ((out_tbl = GetEncTable(output_enc)) == NULL) return false;
c958260b 129
8d94819c
VS
130 m_Table = new wchar_t[65536];
131 for (i = 0; i < 128; i++) m_Table[i] = (wchar_t)i; // 7bit ASCII
132 for (i = 128; i < 65536; i++) m_Table[i] = (wchar_t)0;
c958260b
VS
133
134 if (method == wxCONVERT_SUBSTITUTE)
135 {
136 for (i = 0; i < encoding_unicode_fallback_count; i++)
8d94819c 137 m_Table[encoding_unicode_fallback[i].c] = (wchar_t) encoding_unicode_fallback[i].s;
c958260b
VS
138 }
139
140 for (i = 0; i < 128; i++)
8d94819c 141 m_Table[out_tbl[i]] = (wchar_t)(128 + i);
c958260b 142
1a18887b 143 m_UnicodeInput = true;
c958260b 144 }
b8c253ec 145 else // input !Unicode
c958260b 146 {
1a18887b 147 if ((in_tbl = GetEncTable(input_enc)) == NULL) return false;
c958260b 148 if (output_enc != wxFONTENCODING_UNICODE)
1a18887b 149 if ((out_tbl = GetEncTable(output_enc)) == NULL) return false;
c958260b 150
1a18887b 151 m_UnicodeInput = false;
03424b1b 152
8d94819c
VS
153 m_Table = new wchar_t[256];
154 for (i = 0; i < 128; i++) m_Table[i] = (wchar_t)i; // 7bit ASCII
03424b1b 155
c958260b
VS
156 if (output_enc == wxFONTENCODING_UNICODE)
157 {
8d94819c 158 for (i = 0; i < 128; i++) m_Table[128 + i] = (wchar_t)in_tbl[i];
1a18887b 159 return true;
c958260b 160 }
b8c253ec 161 else // output !Unicode
c958260b
VS
162 {
163 CharsetItem *rev = BuildReverseTable(out_tbl);
33ac7e6f
KB
164 CharsetItem *item;
165 CharsetItem key;
03424b1b
VZ
166
167 for (i = 0; i < 128; i++)
c958260b
VS
168 {
169 key.u = in_tbl[i];
170 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
171 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
03424b1b 172 item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
c958260b
VS
173 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
174 if (item)
8d94819c 175 m_Table[128 + i] = (wchar_t)item -> c;
c958260b 176 else
33ac7e6f 177 m_Table[128 + i] = (wchar_t)(128 + i);
c958260b 178 }
03424b1b 179
c958260b 180 delete[] rev;
c958260b
VS
181 }
182 }
b8c253ec 183
1a18887b 184 return true;
c958260b
VS
185}
186
187
8d94819c 188#define REPLACEMENT_CHAR (L'?')
47e55c2f 189
8d94819c 190inline wchar_t GetTableValue(const wchar_t *table, wchar_t value, bool& repl)
02c92ad9 191{
8d94819c 192 wchar_t r = table[value];
02c92ad9
VS
193 if (r == 0 && value != 0)
194 {
195 r = REPLACEMENT_CHAR;
196 repl = true;
197 }
198 return r;
199}
200
201
202bool wxEncodingConverter::Convert(const char* input, char* output) const
c958260b 203{
f6bcfd97
BP
204 wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
205 wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
206
207 const char *i;
208 char *o;
209
c958260b
VS
210 if (m_JustCopy)
211 {
f6bcfd97 212 strcpy(output, input);
02c92ad9 213 return true;
c958260b 214 }
03424b1b 215
02c92ad9
VS
216 wxCHECK_MSG(m_Table != NULL, false,
217 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
218
219 bool replaced = false;
03424b1b 220
f6bcfd97 221 for (i = input, o = output; *i != 0;)
02c92ad9 222 *(o++) = (char)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
c958260b 223 *o = 0;
02c92ad9
VS
224
225 return !replaced;
c958260b
VS
226}
227
228
02c92ad9 229bool wxEncodingConverter::Convert(const char* input, wchar_t* output) const
47e55c2f 230{
f6bcfd97 231 wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
47e55c2f
VS
232 wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
233
234 const char *i;
f6bcfd97 235 wchar_t *o;
47e55c2f
VS
236
237 if (m_JustCopy)
238 {
239 for (i = input, o = output; *i != 0;)
f6bcfd97 240 *(o++) = (wchar_t)(*(i++));
47e55c2f 241 *o = 0;
02c92ad9 242 return true;
47e55c2f 243 }
03424b1b 244
02c92ad9
VS
245 wxCHECK_MSG(m_Table != NULL, false,
246 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
247
248 bool replaced = false;
03424b1b 249
47e55c2f 250 for (i = input, o = output; *i != 0;)
02c92ad9 251 *(o++) = (wchar_t)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
47e55c2f 252 *o = 0;
02c92ad9
VS
253
254 return !replaced;
47e55c2f
VS
255}
256
257
258
02c92ad9 259bool wxEncodingConverter::Convert(const wchar_t* input, char* output) const
47e55c2f
VS
260{
261 wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
f6bcfd97 262 wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
47e55c2f 263
f6bcfd97 264 const wchar_t *i;
47e55c2f
VS
265 char *o;
266
267 if (m_JustCopy)
268 {
269 for (i = input, o = output; *i != 0;)
270 *(o++) = (char)(*(i++));
271 *o = 0;
02c92ad9 272 return true;
47e55c2f 273 }
03424b1b 274
02c92ad9
VS
275 wxCHECK_MSG(m_Table != NULL, false,
276 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
277
278 bool replaced = false;
03424b1b 279
f6bcfd97 280 for (i = input, o = output; *i != 0;)
02c92ad9 281 *(o++) = (char)(GetTableValue(m_Table, (wxUint16)*(i++), replaced));
47e55c2f 282 *o = 0;
02c92ad9
VS
283
284 return !replaced;
47e55c2f
VS
285}
286
287
288
02c92ad9 289bool wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output) const
47e55c2f 290{
f6bcfd97
BP
291 wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
292 wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
47e55c2f 293
f6bcfd97
BP
294 const wchar_t *i;
295 wchar_t *o;
47e55c2f
VS
296
297 if (m_JustCopy)
298 {
f6bcfd97
BP
299 // wcscpy() is not guaranteed to exist
300 for (i = input, o = output; *i != 0;)
301 *(o++) = (*(i++));
302 *o = 0;
02c92ad9 303 return true;
47e55c2f 304 }
03424b1b 305
02c92ad9
VS
306 wxCHECK_MSG(m_Table != NULL, false,
307 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
03424b1b 308
02c92ad9 309 bool replaced = false;
2ad4f89f 310
47e55c2f 311 for (i = input, o = output; *i != 0;)
02c92ad9 312 *(o++) = (wchar_t)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
47e55c2f 313 *o = 0;
02c92ad9
VS
314
315 return !replaced;
47e55c2f
VS
316}
317
c958260b 318
57c5293e 319wxString wxEncodingConverter::Convert(const wxString& input) const
c958260b
VS
320{
321 if (m_JustCopy) return input;
03424b1b 322
c958260b
VS
323 wxString s;
324 const wxChar *i;
03424b1b 325
4ccae30a
VZ
326 wxCHECK_MSG(m_Table != NULL, s,
327 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
03424b1b 328
c958260b 329 if (m_UnicodeInput)
307fd956 330 {
c958260b
VS
331 for (i = input.c_str(); *i != 0; i++)
332 s << (wxChar)(m_Table[(wxUint16)*i]);
307fd956 333 }
c958260b 334 else
307fd956 335 {
c958260b
VS
336 for (i = input.c_str(); *i != 0; i++)
337 s << (wxChar)(m_Table[(wxUint8)*i]);
307fd956
VZ
338 }
339
c958260b
VS
340 return s;
341}
342
343
344
345
47e55c2f
VS
346
347
348
c958260b 349// Following tables describe classes of encoding equivalence.
03424b1b 350//
c958260b
VS
351
352#define STOP wxFONTENCODING_SYSTEM
353
354#define NUM_OF_PLATFORMS 4 /*must conform to enum wxPLATFORM_XXXX !!!*/
8ddbb137 355#define ENC_PER_PLATFORM 3
cc845a61
MR
356 // max no. of encodings for one language used on one platform.
357 // Using maximum of everything at the current moment to not make the
358 // library larger than necessary. Make larger only if necessary - MR
8ddbb137
MR
359
360static const wxFontEncoding
c958260b
VS
361 EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
362
47e55c2f
VS
363 // *** Please put more common encodings as first! ***
364
03424b1b 365 // Western European
c958260b
VS
366 {
367 /* unix */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
368 /* windows */ {wxFONTENCODING_CP1252, STOP},
369 /* os2 */ {STOP},
788a28b4 370 /* mac */ {wxFONTENCODING_MACROMAN, STOP}
c958260b
VS
371 },
372
47e55c2f 373 // Central European
c958260b
VS
374 {
375 /* unix */ {wxFONTENCODING_ISO8859_2, STOP},
376 /* windows */ {wxFONTENCODING_CP1250, STOP},
377 /* os2 */ {STOP},
788a28b4 378 /* mac */ {wxFONTENCODING_MACCENTRALEUR, STOP}
c958260b 379 },
03424b1b 380
47e55c2f
VS
381 // Baltic
382 {
03424b1b 383 /* unix */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP},
47e55c2f
VS
384 /* windows */ {wxFONTENCODING_CP1257, STOP},
385 /* os2 */ {STOP},
03424b1b 386 /* mac */ {STOP}
47e55c2f
VS
387 },
388
389 // Hebrew
390 {
391 /* unix */ {wxFONTENCODING_ISO8859_8, STOP},
392 /* windows */ {wxFONTENCODING_CP1255, STOP},
393 /* os2 */ {STOP},
788a28b4 394 /* mac */ {wxFONTENCODING_MACHEBREW, STOP}
47e55c2f
VS
395 },
396
397 // Greek
398 {
399 /* unix */ {wxFONTENCODING_ISO8859_7, STOP},
400 /* windows */ {wxFONTENCODING_CP1253, STOP},
401 /* os2 */ {STOP},
788a28b4 402 /* mac */ {wxFONTENCODING_MACGREEK, STOP}
47e55c2f
VS
403 },
404
405 // Arabic
406 {
407 /* unix */ {wxFONTENCODING_ISO8859_6, STOP},
408 /* windows */ {wxFONTENCODING_CP1256, STOP},
409 /* os2 */ {STOP},
788a28b4 410 /* mac */ {wxFONTENCODING_MACARABIC, STOP}
47e55c2f
VS
411 },
412
413 // Turkish
414 {
415 /* unix */ {wxFONTENCODING_ISO8859_9, STOP},
416 /* windows */ {wxFONTENCODING_CP1254, STOP},
417 /* os2 */ {STOP},
788a28b4 418 /* mac */ {wxFONTENCODING_MACTURKISH, STOP}
47e55c2f
VS
419 },
420
421 // Cyrillic
3b61656e 422 {
15ad38c3 423 /* unix */ {wxFONTENCODING_KOI8, wxFONTENCODING_KOI8_U, wxFONTENCODING_ISO8859_5, STOP},
47e55c2f
VS
424 /* windows */ {wxFONTENCODING_CP1251, STOP},
425 /* os2 */ {STOP},
788a28b4 426 /* mac */ {wxFONTENCODING_MACCYRILLIC, STOP}
47e55c2f 427 },
c958260b
VS
428
429 {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
430 /* no, _not_ Arnold! */
431};
432
433
df5168c4
MB
434static bool FindEncoding(const wxFontEncodingArray& arr, wxFontEncoding f)
435{
436 for (wxFontEncodingArray::const_iterator it = arr.begin(), en = arr.end();
437 it != en; ++it)
438 if (*it == f)
439 return true;
440 return false;
441}
c958260b
VS
442
443wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
444{
445 if (platform == wxPLATFORM_CURRENT)
446 {
447#if defined(__WXMSW__)
448 platform = wxPLATFORM_WINDOWS;
449#elif defined(__WXGTK__) || defined(__WXMOTIF__)
450 platform = wxPLATFORM_UNIX;
554fdf48 451#elif defined(__WXPM__)
c958260b
VS
452 platform = wxPLATFORM_OS2;
453#elif defined(__WXMAC__)
454 platform = wxPLATFORM_MAC;
455#endif
456 }
03424b1b 457
c958260b 458 int i, clas, e ;
8ddbb137 459 const wxFontEncoding *f;
c958260b
VS
460 wxFontEncodingArray arr;
461
462 clas = 0;
463 while (EquivalentEncodings[clas][0][0] != STOP)
464 {
465 for (i = 0; i < NUM_OF_PLATFORMS; i++)
466 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
467 if (EquivalentEncodings[clas][i][e] == enc)
468 {
47e55c2f 469 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
df5168c4 470 if (*f == enc) arr.push_back(enc);
47e55c2f 471 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
df5168c4 472 if (!FindEncoding(arr, *f)) arr.push_back(*f);
03424b1b 473 i = NUM_OF_PLATFORMS/*hack*/; break;
c958260b
VS
474 }
475 clas++;
476 }
03424b1b 477
c958260b
VS
478 return arr;
479}
480
481
482
483wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
484{
485 int i, clas, e, j ;
8ddbb137 486 const wxFontEncoding *f;
c958260b 487 wxFontEncodingArray arr;
03424b1b 488
47e55c2f 489 arr = GetPlatformEquivalents(enc); // we want them to be first items in array
c958260b
VS
490
491 clas = 0;
492 while (EquivalentEncodings[clas][0][0] != STOP)
493 {
494 for (i = 0; i < NUM_OF_PLATFORMS; i++)
495 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
496 if (EquivalentEncodings[clas][i][e] == enc)
497 {
498 for (j = 0; j < NUM_OF_PLATFORMS; j++)
03424b1b 499 for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
df5168c4 500 if (!FindEncoding(arr, *f)) arr.push_back(*f);
03424b1b 501 i = NUM_OF_PLATFORMS/*hack*/; break;
c958260b
VS
502 }
503 clas++;
504 }
03424b1b 505
c958260b
VS
506 return arr;
507}
1e6feb95 508