]> git.saurik.com Git - wxWidgets.git/blame - src/common/encconv.cpp
document On{Open,Save}Document()
[wxWidgets.git] / src / common / encconv.cpp
CommitLineData
c958260b
VS
1/////////////////////////////////////////////////////////////////////////////
2// Name: encconv.cpp
3// Purpose: wxEncodingConverter class for converting between different
4// font encodings
5// Author: Vaclav Slavik
6// Copyright: (c) 1999 Vaclav Slavik
65571936 7// Licence: wxWindows licence
c958260b
VS
8/////////////////////////////////////////////////////////////////////////////
9
c958260b
VS
10// For compilers that support precompilation, includes "wx.h".
11#include "wx/wxprec.h"
12
13#ifdef __BORLANDC__
14 #pragma hdrstop
15#endif
16
17#include "wx/encconv.h"
18
19#include <stdlib.h>
20
94fc5183 21// conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
8f9c25cc 22#if defined( __BORLANDC__ ) || defined(__DARWIN__)
0f8d6461 23 #include "../common/unictabl.inc"
94fc5183 24#else
0f8d6461 25 #include "unictabl.inc"
94fc5183 26#endif
c958260b 27
f6bcfd97 28#if wxUSE_WCHAR_T
0f8d6461 29 typedef wchar_t tchar;
f6bcfd97 30#else
0f8d6461 31 typedef char tchar;
1c193821
JS
32#endif
33
788a28b4 34#ifdef __WXMAC__
e84f7290
SC
35 #include "wx/mac/corefoundation/cfstring.h"
36 #include <CoreFoundation/CFStringEncodingExt.h>
788a28b4 37
0f8d6461
VZ
38 wxUint16 gMacEncodings[wxFONTENCODING_MACMAX-wxFONTENCODING_MACMIN+1][128] ;
39 bool gMacEncodingsInited[wxFONTENCODING_MACMAX-wxFONTENCODING_MACMIN+1] ;
40#endif
788a28b4 41
0f8d6461
VZ
42#ifdef __WXWINCE__
43 #include "wx/msw/wince/missing.h" // for bsearch()
788a28b4
SC
44#endif
45
cd1a22d1 46static const wxUint16* GetEncTable(wxFontEncoding enc)
c958260b 47{
788a28b4 48#ifdef __WXMAC__
3af5821c 49 if( enc >= wxFONTENCODING_MACMIN && enc <= wxFONTENCODING_MACMAX )
788a28b4 50 {
3af5821c
SC
51 int i = enc-wxFONTENCODING_MACMIN ;
52 if ( gMacEncodingsInited[i] == false )
788a28b4 53 {
e84f7290
SC
54 // create
55 CFStringEncoding cfencoding = wxMacGetSystemEncFromFontEnc( enc ) ;
56 if( !CFStringIsEncodingAvailable( cfencoding ) )
57 return NULL;
58
59 memset( gMacEncodings[i] , 0 , 128 * 2 );
60 char s[2] = { 0 , 0 };
61 CFRange firstchar = CFRangeMake( 0, 1 );
1a18887b
WS
62 for( unsigned char c = 255 ; c >= 128 ; --c )
63 {
64 s[0] = c ;
e84f7290
SC
65 wxCFStringRef cfref( CFStringCreateWithCStringNoCopy( NULL, s, cfencoding , kCFAllocatorNull ) );
66 CFStringGetCharacters( cfref, firstchar, (UniChar*) &gMacEncodings[i][c-128] );
1a18887b 67 }
1a18887b 68 gMacEncodingsInited[i]=true;
788a28b4 69 }
3af5821c 70 return gMacEncodings[i] ;
788a28b4
SC
71 }
72#endif
73
c958260b
VS
74 for (int i = 0; encodings_list[i].table != NULL; i++)
75 {
03424b1b 76 if (encodings_list[i].encoding == enc)
c958260b
VS
77 return encodings_list[i].table;
78 }
79 return NULL;
80}
81
82typedef struct {
83 wxUint16 u;
84 wxUint8 c;
85} CharsetItem;
86
0f8d6461
VZ
87extern "C" int wxCMPFUNC_CONV
88CompareCharsetItems(const void *i1, const void *i2)
c958260b
VS
89{
90 return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
91}
92
93
cd1a22d1 94static CharsetItem* BuildReverseTable(const wxUint16 *tbl)
c958260b
VS
95{
96 CharsetItem *rev = new CharsetItem[128];
03424b1b 97
c958260b 98 for (int i = 0; i < 128; i++)
0203c8cd 99 rev[i].c = wxUint8(128 + i), rev[i].u = tbl[i];
c958260b
VS
100
101 qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
03424b1b 102
c958260b
VS
103 return rev;
104}
105
106
107
108wxEncodingConverter::wxEncodingConverter()
109{
110 m_Table = NULL;
1a18887b
WS
111 m_UnicodeInput = m_UnicodeOutput = false;
112 m_JustCopy = false;
c958260b
VS
113}
114
115
116
117bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
118{
119 unsigned i;
cd1a22d1
MR
120 const wxUint16 *in_tbl;
121 const wxUint16 *out_tbl = NULL;
c958260b
VS
122
123 if (m_Table) {delete[] m_Table; m_Table = NULL;}
124
f6bcfd97 125#if !wxUSE_WCHAR_T
1a18887b 126 if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return false;
c958260b
VS
127#endif
128
1a18887b 129 if (input_enc == output_enc) {m_JustCopy = true; return true;}
03424b1b 130
47e55c2f 131 m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
1a18887b 132 m_JustCopy = false;
03424b1b 133
c958260b
VS
134 if (input_enc == wxFONTENCODING_UNICODE)
135 {
1a18887b 136 if ((out_tbl = GetEncTable(output_enc)) == NULL) return false;
c958260b 137
f6bcfd97
BP
138 m_Table = new tchar[65536];
139 for (i = 0; i < 128; i++) m_Table[i] = (tchar)i; // 7bit ASCII
02c92ad9 140 for (i = 128; i < 65536; i++) m_Table[i] = (tchar)0;
c958260b
VS
141
142 if (method == wxCONVERT_SUBSTITUTE)
143 {
144 for (i = 0; i < encoding_unicode_fallback_count; i++)
f6bcfd97 145 m_Table[encoding_unicode_fallback[i].c] = (tchar) encoding_unicode_fallback[i].s;
c958260b
VS
146 }
147
148 for (i = 0; i < 128; i++)
f6bcfd97 149 m_Table[out_tbl[i]] = (tchar)(128 + i);
c958260b 150
1a18887b 151 m_UnicodeInput = true;
c958260b 152 }
b8c253ec 153 else // input !Unicode
c958260b 154 {
1a18887b 155 if ((in_tbl = GetEncTable(input_enc)) == NULL) return false;
c958260b 156 if (output_enc != wxFONTENCODING_UNICODE)
1a18887b 157 if ((out_tbl = GetEncTable(output_enc)) == NULL) return false;
c958260b 158
1a18887b 159 m_UnicodeInput = false;
03424b1b 160
f6bcfd97
BP
161 m_Table = new tchar[256];
162 for (i = 0; i < 128; i++) m_Table[i] = (tchar)i; // 7bit ASCII
03424b1b 163
c958260b
VS
164 if (output_enc == wxFONTENCODING_UNICODE)
165 {
f6bcfd97 166 for (i = 0; i < 128; i++) m_Table[128 + i] = (tchar)in_tbl[i];
1a18887b 167 return true;
c958260b 168 }
b8c253ec 169 else // output !Unicode
c958260b
VS
170 {
171 CharsetItem *rev = BuildReverseTable(out_tbl);
33ac7e6f
KB
172 CharsetItem *item;
173 CharsetItem key;
03424b1b
VZ
174
175 for (i = 0; i < 128; i++)
c958260b
VS
176 {
177 key.u = in_tbl[i];
178 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
179 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
03424b1b 180 item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
c958260b
VS
181 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
182 if (item)
f6bcfd97 183 m_Table[128 + i] = (tchar)item -> c;
c958260b 184 else
33ac7e6f
KB
185#if wxUSE_WCHAR_T
186 m_Table[128 + i] = (wchar_t)(128 + i);
187#else
188 m_Table[128 + i] = (char)(128 + i);
307fd956 189#endif
c958260b 190 }
03424b1b 191
c958260b 192 delete[] rev;
c958260b
VS
193 }
194 }
b8c253ec 195
1a18887b 196 return true;
c958260b
VS
197}
198
199
682f3d02 200#define REPLACEMENT_CHAR ((tchar)'?')
47e55c2f 201
02c92ad9
VS
202inline tchar GetTableValue(const tchar *table, tchar value, bool& repl)
203{
204 tchar r = table[value];
205 if (r == 0 && value != 0)
206 {
207 r = REPLACEMENT_CHAR;
208 repl = true;
209 }
210 return r;
211}
212
213
214bool wxEncodingConverter::Convert(const char* input, char* output) const
c958260b 215{
f6bcfd97
BP
216 wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
217 wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
218
219 const char *i;
220 char *o;
221
c958260b
VS
222 if (m_JustCopy)
223 {
f6bcfd97 224 strcpy(output, input);
02c92ad9 225 return true;
c958260b 226 }
03424b1b 227
02c92ad9
VS
228 wxCHECK_MSG(m_Table != NULL, false,
229 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
230
231 bool replaced = false;
03424b1b 232
f6bcfd97 233 for (i = input, o = output; *i != 0;)
02c92ad9 234 *(o++) = (char)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
c958260b 235 *o = 0;
02c92ad9
VS
236
237 return !replaced;
c958260b
VS
238}
239
240
f6bcfd97 241#if wxUSE_WCHAR_T
47e55c2f 242
02c92ad9 243bool wxEncodingConverter::Convert(const char* input, wchar_t* output) const
47e55c2f 244{
f6bcfd97 245 wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
47e55c2f
VS
246 wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
247
248 const char *i;
f6bcfd97 249 wchar_t *o;
47e55c2f
VS
250
251 if (m_JustCopy)
252 {
253 for (i = input, o = output; *i != 0;)
f6bcfd97 254 *(o++) = (wchar_t)(*(i++));
47e55c2f 255 *o = 0;
02c92ad9 256 return true;
47e55c2f 257 }
03424b1b 258
02c92ad9
VS
259 wxCHECK_MSG(m_Table != NULL, false,
260 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
261
262 bool replaced = false;
03424b1b 263
47e55c2f 264 for (i = input, o = output; *i != 0;)
02c92ad9 265 *(o++) = (wchar_t)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
47e55c2f 266 *o = 0;
02c92ad9
VS
267
268 return !replaced;
47e55c2f
VS
269}
270
271
272
02c92ad9 273bool wxEncodingConverter::Convert(const wchar_t* input, char* output) const
47e55c2f
VS
274{
275 wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
f6bcfd97 276 wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
47e55c2f 277
f6bcfd97 278 const wchar_t *i;
47e55c2f
VS
279 char *o;
280
281 if (m_JustCopy)
282 {
283 for (i = input, o = output; *i != 0;)
284 *(o++) = (char)(*(i++));
285 *o = 0;
02c92ad9 286 return true;
47e55c2f 287 }
03424b1b 288
02c92ad9
VS
289 wxCHECK_MSG(m_Table != NULL, false,
290 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
291
292 bool replaced = false;
03424b1b 293
f6bcfd97 294 for (i = input, o = output; *i != 0;)
02c92ad9 295 *(o++) = (char)(GetTableValue(m_Table, (wxUint16)*(i++), replaced));
47e55c2f 296 *o = 0;
02c92ad9
VS
297
298 return !replaced;
47e55c2f
VS
299}
300
301
302
02c92ad9 303bool wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output) const
47e55c2f 304{
f6bcfd97
BP
305 wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
306 wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
47e55c2f 307
f6bcfd97
BP
308 const wchar_t *i;
309 wchar_t *o;
47e55c2f
VS
310
311 if (m_JustCopy)
312 {
f6bcfd97
BP
313 // wcscpy() is not guaranteed to exist
314 for (i = input, o = output; *i != 0;)
315 *(o++) = (*(i++));
316 *o = 0;
02c92ad9 317 return true;
47e55c2f 318 }
03424b1b 319
02c92ad9
VS
320 wxCHECK_MSG(m_Table != NULL, false,
321 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
03424b1b 322
02c92ad9
VS
323 bool replaced = false;
324
47e55c2f 325 for (i = input, o = output; *i != 0;)
02c92ad9 326 *(o++) = (wchar_t)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
47e55c2f 327 *o = 0;
02c92ad9
VS
328
329 return !replaced;
47e55c2f
VS
330}
331
f6bcfd97 332#endif // wxUSE_WCHAR_T
47e55c2f 333
c958260b 334
57c5293e 335wxString wxEncodingConverter::Convert(const wxString& input) const
c958260b
VS
336{
337 if (m_JustCopy) return input;
03424b1b 338
c958260b
VS
339 wxString s;
340 const wxChar *i;
03424b1b 341
4ccae30a
VZ
342 wxCHECK_MSG(m_Table != NULL, s,
343 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
03424b1b 344
c958260b 345 if (m_UnicodeInput)
307fd956 346 {
c958260b
VS
347 for (i = input.c_str(); *i != 0; i++)
348 s << (wxChar)(m_Table[(wxUint16)*i]);
307fd956 349 }
c958260b 350 else
307fd956 351 {
c958260b
VS
352 for (i = input.c_str(); *i != 0; i++)
353 s << (wxChar)(m_Table[(wxUint8)*i]);
307fd956
VZ
354 }
355
c958260b
VS
356 return s;
357}
358
359
360
361
47e55c2f
VS
362
363
364
c958260b 365// Following tables describe classes of encoding equivalence.
03424b1b 366//
c958260b
VS
367
368#define STOP wxFONTENCODING_SYSTEM
369
370#define NUM_OF_PLATFORMS 4 /*must conform to enum wxPLATFORM_XXXX !!!*/
8ddbb137 371#define ENC_PER_PLATFORM 3
cc845a61
MR
372 // max no. of encodings for one language used on one platform.
373 // Using maximum of everything at the current moment to not make the
374 // library larger than necessary. Make larger only if necessary - MR
8ddbb137
MR
375
376static const wxFontEncoding
c958260b
VS
377 EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
378
47e55c2f
VS
379 // *** Please put more common encodings as first! ***
380
03424b1b 381 // Western European
c958260b
VS
382 {
383 /* unix */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
384 /* windows */ {wxFONTENCODING_CP1252, STOP},
385 /* os2 */ {STOP},
788a28b4 386 /* mac */ {wxFONTENCODING_MACROMAN, STOP}
c958260b
VS
387 },
388
47e55c2f 389 // Central European
c958260b
VS
390 {
391 /* unix */ {wxFONTENCODING_ISO8859_2, STOP},
392 /* windows */ {wxFONTENCODING_CP1250, STOP},
393 /* os2 */ {STOP},
788a28b4 394 /* mac */ {wxFONTENCODING_MACCENTRALEUR, STOP}
c958260b 395 },
03424b1b 396
47e55c2f
VS
397 // Baltic
398 {
03424b1b 399 /* unix */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP},
47e55c2f
VS
400 /* windows */ {wxFONTENCODING_CP1257, STOP},
401 /* os2 */ {STOP},
03424b1b 402 /* mac */ {STOP}
47e55c2f
VS
403 },
404
405 // Hebrew
406 {
407 /* unix */ {wxFONTENCODING_ISO8859_8, STOP},
408 /* windows */ {wxFONTENCODING_CP1255, STOP},
409 /* os2 */ {STOP},
788a28b4 410 /* mac */ {wxFONTENCODING_MACHEBREW, STOP}
47e55c2f
VS
411 },
412
413 // Greek
414 {
415 /* unix */ {wxFONTENCODING_ISO8859_7, STOP},
416 /* windows */ {wxFONTENCODING_CP1253, STOP},
417 /* os2 */ {STOP},
788a28b4 418 /* mac */ {wxFONTENCODING_MACGREEK, STOP}
47e55c2f
VS
419 },
420
421 // Arabic
422 {
423 /* unix */ {wxFONTENCODING_ISO8859_6, STOP},
424 /* windows */ {wxFONTENCODING_CP1256, STOP},
425 /* os2 */ {STOP},
788a28b4 426 /* mac */ {wxFONTENCODING_MACARABIC, STOP}
47e55c2f
VS
427 },
428
429 // Turkish
430 {
431 /* unix */ {wxFONTENCODING_ISO8859_9, STOP},
432 /* windows */ {wxFONTENCODING_CP1254, STOP},
433 /* os2 */ {STOP},
788a28b4 434 /* mac */ {wxFONTENCODING_MACTURKISH, STOP}
47e55c2f
VS
435 },
436
437 // Cyrillic
3b61656e 438 {
15ad38c3 439 /* unix */ {wxFONTENCODING_KOI8, wxFONTENCODING_KOI8_U, wxFONTENCODING_ISO8859_5, STOP},
47e55c2f
VS
440 /* windows */ {wxFONTENCODING_CP1251, STOP},
441 /* os2 */ {STOP},
788a28b4 442 /* mac */ {wxFONTENCODING_MACCYRILLIC, STOP}
47e55c2f 443 },
c958260b
VS
444
445 {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
446 /* no, _not_ Arnold! */
447};
448
449
df5168c4
MB
450static bool FindEncoding(const wxFontEncodingArray& arr, wxFontEncoding f)
451{
452 for (wxFontEncodingArray::const_iterator it = arr.begin(), en = arr.end();
453 it != en; ++it)
454 if (*it == f)
455 return true;
456 return false;
457}
c958260b
VS
458
459wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
460{
461 if (platform == wxPLATFORM_CURRENT)
462 {
463#if defined(__WXMSW__)
464 platform = wxPLATFORM_WINDOWS;
465#elif defined(__WXGTK__) || defined(__WXMOTIF__)
466 platform = wxPLATFORM_UNIX;
554fdf48 467#elif defined(__WXPM__)
c958260b
VS
468 platform = wxPLATFORM_OS2;
469#elif defined(__WXMAC__)
470 platform = wxPLATFORM_MAC;
471#endif
472 }
03424b1b 473
c958260b 474 int i, clas, e ;
8ddbb137 475 const wxFontEncoding *f;
c958260b
VS
476 wxFontEncodingArray arr;
477
478 clas = 0;
479 while (EquivalentEncodings[clas][0][0] != STOP)
480 {
481 for (i = 0; i < NUM_OF_PLATFORMS; i++)
482 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
483 if (EquivalentEncodings[clas][i][e] == enc)
484 {
47e55c2f 485 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
df5168c4 486 if (*f == enc) arr.push_back(enc);
47e55c2f 487 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
df5168c4 488 if (!FindEncoding(arr, *f)) arr.push_back(*f);
03424b1b 489 i = NUM_OF_PLATFORMS/*hack*/; break;
c958260b
VS
490 }
491 clas++;
492 }
03424b1b 493
c958260b
VS
494 return arr;
495}
496
497
498
499wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
500{
501 int i, clas, e, j ;
8ddbb137 502 const wxFontEncoding *f;
c958260b 503 wxFontEncodingArray arr;
03424b1b 504
47e55c2f 505 arr = GetPlatformEquivalents(enc); // we want them to be first items in array
c958260b
VS
506
507 clas = 0;
508 while (EquivalentEncodings[clas][0][0] != STOP)
509 {
510 for (i = 0; i < NUM_OF_PLATFORMS; i++)
511 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
512 if (EquivalentEncodings[clas][i][e] == enc)
513 {
514 for (j = 0; j < NUM_OF_PLATFORMS; j++)
03424b1b 515 for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
df5168c4 516 if (!FindEncoding(arr, *f)) arr.push_back(*f);
03424b1b 517 i = NUM_OF_PLATFORMS/*hack*/; break;
c958260b
VS
518 }
519 clas++;
520 }
03424b1b 521
c958260b
VS
522 return arr;
523}
1e6feb95 524