]> git.saurik.com Git - wxWidgets.git/blob - src/common/encconv.cpp
use ifdef, not if, to test for HAVE_WCSRTOMBS
[wxWidgets.git] / src / common / encconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: encconv.cpp
3 // Purpose: wxEncodingConverter class for converting between different
4 // font encodings
5 // Author: Vaclav Slavik
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
9
10 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
11 #pragma implementation "encconv.h"
12 #endif
13
14 // For compilers that support precompilation, includes "wx.h".
15 #include "wx/wxprec.h"
16
17 #ifdef __BORLANDC__
18 #pragma hdrstop
19 #endif
20
21 #include "wx/encconv.h"
22
23 #include <stdlib.h>
24
25 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
26 #if defined( __BORLANDC__ ) || defined(__DARWIN__)
27 #include "../common/unictabl.inc"
28 #else
29 #include "unictabl.inc"
30 #endif
31
32 #if wxUSE_WCHAR_T
33 typedef wchar_t tchar;
34 #else
35 typedef char tchar;
36 #endif
37
38 #ifdef __WXMAC__
39 #ifdef __DARWIN__
40 #include <Carbon/Carbon.h>
41 #else
42 #include <ATSUnicode.h>
43 #include <TextCommon.h>
44 #include <TextEncodingConverter.h>
45 #endif
46 #include "wx/fontutil.h"
47 #include "wx/mac/private.h" // includes mac headers
48
49 wxUint16 gMacEncodings[wxFONTENCODING_MACMAX-wxFONTENCODING_MACMIN+1][128] ;
50 bool gMacEncodingsInited[wxFONTENCODING_MACMAX-wxFONTENCODING_MACMIN+1] ;
51 #endif
52
53 #ifdef __WXWINCE__
54 #include "wx/msw/wince/missing.h" // for bsearch()
55 #endif
56
57 static wxUint16* GetEncTable(wxFontEncoding enc)
58 {
59 #ifdef __WXMAC__
60 if( enc >= wxFONTENCODING_MACMIN && enc <= wxFONTENCODING_MACMAX )
61 {
62 int i = enc-wxFONTENCODING_MACMIN ;
63 if ( gMacEncodingsInited[i] == false )
64 {
65 TECObjectRef converter ;
66 TextEncodingBase code = wxMacGetSystemEncFromFontEnc( enc ) ;
67 TextEncodingBase unicode = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
68 OSStatus status = TECCreateConverter(&converter,code,unicode);
69 char s[2] ;
70 s[1] = 0 ;
71 ByteCount byteInLen, byteOutLen ;
72 for( unsigned char c = 255 ; c >= 128 ; --c )
73 {
74 s[0] = c ;
75 status = TECConvertText(converter, (ConstTextPtr) &s , 1, &byteInLen,
76 (TextPtr) &gMacEncodings[i][c-128] , 2, &byteOutLen);
77 }
78 status = TECDisposeConverter(converter);
79 gMacEncodingsInited[i]=true;
80 }
81 return gMacEncodings[i] ;
82 }
83 #endif
84
85 for (int i = 0; encodings_list[i].table != NULL; i++)
86 {
87 if (encodings_list[i].encoding == enc)
88 return encodings_list[i].table;
89 }
90 return NULL;
91 }
92
93 typedef struct {
94 wxUint16 u;
95 wxUint8 c;
96 } CharsetItem;
97
98 extern "C" int wxCMPFUNC_CONV
99 CompareCharsetItems(const void *i1, const void *i2)
100 {
101 return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
102 }
103
104
105 static CharsetItem* BuildReverseTable(wxUint16 *tbl)
106 {
107 CharsetItem *rev = new CharsetItem[128];
108
109 for (int i = 0; i < 128; i++)
110 rev[i].c = wxUint8(128 + i), rev[i].u = tbl[i];
111
112 qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
113
114 return rev;
115 }
116
117
118
119 wxEncodingConverter::wxEncodingConverter()
120 {
121 m_Table = NULL;
122 m_UnicodeInput = m_UnicodeOutput = false;
123 m_JustCopy = false;
124 }
125
126
127
128 bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
129 {
130 unsigned i;
131 wxUint16 *in_tbl, *out_tbl = NULL;
132
133 if (m_Table) {delete[] m_Table; m_Table = NULL;}
134
135 #if !wxUSE_WCHAR_T
136 if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return false;
137 #endif
138
139 if (input_enc == output_enc) {m_JustCopy = true; return true;}
140
141 m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
142 m_JustCopy = false;
143
144 if (input_enc == wxFONTENCODING_UNICODE)
145 {
146 if ((out_tbl = GetEncTable(output_enc)) == NULL) return false;
147
148 m_Table = new tchar[65536];
149 for (i = 0; i < 128; i++) m_Table[i] = (tchar)i; // 7bit ASCII
150 for (i = 128; i < 65536; i++) m_Table[i] = (tchar)0;
151
152 if (method == wxCONVERT_SUBSTITUTE)
153 {
154 for (i = 0; i < encoding_unicode_fallback_count; i++)
155 m_Table[encoding_unicode_fallback[i].c] = (tchar) encoding_unicode_fallback[i].s;
156 }
157
158 for (i = 0; i < 128; i++)
159 m_Table[out_tbl[i]] = (tchar)(128 + i);
160
161 m_UnicodeInput = true;
162 }
163 else // input !Unicode
164 {
165 if ((in_tbl = GetEncTable(input_enc)) == NULL) return false;
166 if (output_enc != wxFONTENCODING_UNICODE)
167 if ((out_tbl = GetEncTable(output_enc)) == NULL) return false;
168
169 m_UnicodeInput = false;
170
171 m_Table = new tchar[256];
172 for (i = 0; i < 128; i++) m_Table[i] = (tchar)i; // 7bit ASCII
173
174 if (output_enc == wxFONTENCODING_UNICODE)
175 {
176 for (i = 0; i < 128; i++) m_Table[128 + i] = (tchar)in_tbl[i];
177 return true;
178 }
179 else // output !Unicode
180 {
181 CharsetItem *rev = BuildReverseTable(out_tbl);
182 CharsetItem *item;
183 CharsetItem key;
184
185 for (i = 0; i < 128; i++)
186 {
187 key.u = in_tbl[i];
188 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
189 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
190 item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
191 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
192 if (item)
193 m_Table[128 + i] = (tchar)item -> c;
194 else
195 #if wxUSE_WCHAR_T
196 m_Table[128 + i] = (wchar_t)(128 + i);
197 #else
198 m_Table[128 + i] = (char)(128 + i);
199 #endif
200 }
201
202 delete[] rev;
203 }
204 }
205
206 return true;
207 }
208
209
210 #define REPLACEMENT_CHAR ((tchar)'?')
211
212 inline tchar GetTableValue(const tchar *table, tchar value, bool& repl)
213 {
214 tchar r = table[value];
215 if (r == 0 && value != 0)
216 {
217 r = REPLACEMENT_CHAR;
218 repl = true;
219 }
220 return r;
221 }
222
223
224 bool wxEncodingConverter::Convert(const char* input, char* output) const
225 {
226 wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
227 wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
228
229 const char *i;
230 char *o;
231
232 if (m_JustCopy)
233 {
234 strcpy(output, input);
235 return true;
236 }
237
238 wxCHECK_MSG(m_Table != NULL, false,
239 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
240
241 bool replaced = false;
242
243 for (i = input, o = output; *i != 0;)
244 *(o++) = (char)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
245 *o = 0;
246
247 return !replaced;
248 }
249
250
251 #if wxUSE_WCHAR_T
252
253 bool wxEncodingConverter::Convert(const char* input, wchar_t* output) const
254 {
255 wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
256 wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
257
258 const char *i;
259 wchar_t *o;
260
261 if (m_JustCopy)
262 {
263 for (i = input, o = output; *i != 0;)
264 *(o++) = (wchar_t)(*(i++));
265 *o = 0;
266 return true;
267 }
268
269 wxCHECK_MSG(m_Table != NULL, false,
270 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
271
272 bool replaced = false;
273
274 for (i = input, o = output; *i != 0;)
275 *(o++) = (wchar_t)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
276 *o = 0;
277
278 return !replaced;
279 }
280
281
282
283 bool wxEncodingConverter::Convert(const wchar_t* input, char* output) const
284 {
285 wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
286 wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
287
288 const wchar_t *i;
289 char *o;
290
291 if (m_JustCopy)
292 {
293 for (i = input, o = output; *i != 0;)
294 *(o++) = (char)(*(i++));
295 *o = 0;
296 return true;
297 }
298
299 wxCHECK_MSG(m_Table != NULL, false,
300 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
301
302 bool replaced = false;
303
304 for (i = input, o = output; *i != 0;)
305 *(o++) = (char)(GetTableValue(m_Table, (wxUint16)*(i++), replaced));
306 *o = 0;
307
308 return !replaced;
309 }
310
311
312
313 bool wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output) const
314 {
315 wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
316 wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
317
318 const wchar_t *i;
319 wchar_t *o;
320
321 if (m_JustCopy)
322 {
323 // wcscpy() is not guaranteed to exist
324 for (i = input, o = output; *i != 0;)
325 *(o++) = (*(i++));
326 *o = 0;
327 return true;
328 }
329
330 wxCHECK_MSG(m_Table != NULL, false,
331 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
332
333 bool replaced = false;
334
335 for (i = input, o = output; *i != 0;)
336 *(o++) = (wchar_t)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
337 *o = 0;
338
339 return !replaced;
340 }
341
342 #endif // wxUSE_WCHAR_T
343
344
345 wxString wxEncodingConverter::Convert(const wxString& input) const
346 {
347 if (m_JustCopy) return input;
348
349 wxString s;
350 const wxChar *i;
351
352 wxCHECK_MSG(m_Table != NULL, s,
353 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
354
355 if (m_UnicodeInput)
356 {
357 for (i = input.c_str(); *i != 0; i++)
358 s << (wxChar)(m_Table[(wxUint16)*i]);
359 }
360 else
361 {
362 for (i = input.c_str(); *i != 0; i++)
363 s << (wxChar)(m_Table[(wxUint8)*i]);
364 }
365
366 return s;
367 }
368
369
370
371
372
373
374
375 // Following tables describe classes of encoding equivalence.
376 //
377
378 #define STOP wxFONTENCODING_SYSTEM
379
380 #define NUM_OF_PLATFORMS 4 /*must conform to enum wxPLATFORM_XXXX !!!*/
381 #define ENC_PER_PLATFORM 5
382 // max no. of encodings for one language used on one platform
383 // Anybody thinks 5 is not enough? ;-)
384
385 static wxFontEncoding
386 EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
387
388 // *** Please put more common encodings as first! ***
389
390 // Western European
391 {
392 /* unix */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
393 /* windows */ {wxFONTENCODING_CP1252, STOP},
394 /* os2 */ {STOP},
395 /* mac */ {wxFONTENCODING_MACROMAN, STOP}
396 },
397
398 // Central European
399 {
400 /* unix */ {wxFONTENCODING_ISO8859_2, STOP},
401 /* windows */ {wxFONTENCODING_CP1250, STOP},
402 /* os2 */ {STOP},
403 /* mac */ {wxFONTENCODING_MACCENTRALEUR, STOP}
404 },
405
406 // Baltic
407 {
408 /* unix */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP},
409 /* windows */ {wxFONTENCODING_CP1257, STOP},
410 /* os2 */ {STOP},
411 /* mac */ {STOP}
412 },
413
414 // Hebrew
415 {
416 /* unix */ {wxFONTENCODING_ISO8859_8, STOP},
417 /* windows */ {wxFONTENCODING_CP1255, STOP},
418 /* os2 */ {STOP},
419 /* mac */ {wxFONTENCODING_MACHEBREW, STOP}
420 },
421
422 // Greek
423 {
424 /* unix */ {wxFONTENCODING_ISO8859_7, STOP},
425 /* windows */ {wxFONTENCODING_CP1253, STOP},
426 /* os2 */ {STOP},
427 /* mac */ {wxFONTENCODING_MACGREEK, STOP}
428 },
429
430 // Arabic
431 {
432 /* unix */ {wxFONTENCODING_ISO8859_6, STOP},
433 /* windows */ {wxFONTENCODING_CP1256, STOP},
434 /* os2 */ {STOP},
435 /* mac */ {wxFONTENCODING_MACARABIC, STOP}
436 },
437
438 // Turkish
439 {
440 /* unix */ {wxFONTENCODING_ISO8859_9, STOP},
441 /* windows */ {wxFONTENCODING_CP1254, STOP},
442 /* os2 */ {STOP},
443 /* mac */ {wxFONTENCODING_MACTURKISH, STOP}
444 },
445
446 // Cyrillic
447 {
448 /* unix */ {wxFONTENCODING_KOI8, wxFONTENCODING_KOI8_U, wxFONTENCODING_ISO8859_5, STOP},
449 /* windows */ {wxFONTENCODING_CP1251, STOP},
450 /* os2 */ {STOP},
451 /* mac */ {wxFONTENCODING_MACCYRILLIC, STOP}
452 },
453
454 {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
455 /* no, _not_ Arnold! */
456 };
457
458
459 static bool FindEncoding(const wxFontEncodingArray& arr, wxFontEncoding f)
460 {
461 for (wxFontEncodingArray::const_iterator it = arr.begin(), en = arr.end();
462 it != en; ++it)
463 if (*it == f)
464 return true;
465 return false;
466 }
467
468 wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
469 {
470 if (platform == wxPLATFORM_CURRENT)
471 {
472 #if defined(__WXMSW__)
473 platform = wxPLATFORM_WINDOWS;
474 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
475 platform = wxPLATFORM_UNIX;
476 #elif defined(__WXOS2__)
477 platform = wxPLATFORM_OS2;
478 #elif defined(__WXMAC__)
479 platform = wxPLATFORM_MAC;
480 #endif
481 }
482
483 int i, clas, e ;
484 wxFontEncoding *f;
485 wxFontEncodingArray arr;
486
487 clas = 0;
488 while (EquivalentEncodings[clas][0][0] != STOP)
489 {
490 for (i = 0; i < NUM_OF_PLATFORMS; i++)
491 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
492 if (EquivalentEncodings[clas][i][e] == enc)
493 {
494 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
495 if (*f == enc) arr.push_back(enc);
496 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
497 if (!FindEncoding(arr, *f)) arr.push_back(*f);
498 i = NUM_OF_PLATFORMS/*hack*/; break;
499 }
500 clas++;
501 }
502
503 return arr;
504 }
505
506
507
508 wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
509 {
510 int i, clas, e, j ;
511 wxFontEncoding *f;
512 wxFontEncodingArray arr;
513
514 arr = GetPlatformEquivalents(enc); // we want them to be first items in array
515
516 clas = 0;
517 while (EquivalentEncodings[clas][0][0] != STOP)
518 {
519 for (i = 0; i < NUM_OF_PLATFORMS; i++)
520 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
521 if (EquivalentEncodings[clas][i][e] == enc)
522 {
523 for (j = 0; j < NUM_OF_PLATFORMS; j++)
524 for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
525 if (!FindEncoding(arr, *f)) arr.push_back(*f);
526 i = NUM_OF_PLATFORMS/*hack*/; break;
527 }
528 clas++;
529 }
530
531 return arr;
532 }
533