]> git.saurik.com Git - wxWidgets.git/blob - src/common/encconv.cpp
systen encoding for mac
[wxWidgets.git] / src / common / encconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: encconv.cpp
3 // Purpose: wxEncodingConverter class for converting between different
4 // font encodings
5 // Author: Vaclav Slavik
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows Licence
8 /////////////////////////////////////////////////////////////////////////////
9
10 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
11 #pragma implementation "encconv.h"
12 #endif
13
14 // For compilers that support precompilation, includes "wx.h".
15 #include "wx/wxprec.h"
16
17 #ifdef __BORLANDC__
18 #pragma hdrstop
19 #endif
20
21 #if wxUSE_FONTMAP
22
23 #include "wx/encconv.h"
24
25 #include <stdlib.h>
26
27 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
28 #ifdef __BORLANDC__
29 #include "../common/unictabl.inc"
30 #else
31 #include "unictabl.inc"
32 #endif
33
34 #if wxUSE_WCHAR_T
35 typedef wchar_t tchar;
36 #else
37 typedef char tchar;
38 #endif
39
40 #ifdef __WXWINCE__
41 #undef LINKAGEMODE
42 #define LINKAGEMODE __cdecl
43 #endif
44
45 #ifdef __WXMAC__
46
47 #include "ATSUnicode.h"
48 #include "TextCommon.h"
49 #include "TextEncodingConverter.h"
50
51 #include "wx/mac/private.h" // includes mac headers
52
53 typedef struct {
54 wxFontEncoding enc ;
55 TextEncodingBase mac ;
56 } MacCP ;
57
58 MacCP gMacCodePages[] =
59 {
60 wxFONTENCODING_MACROMAN, kTextEncodingMacRoman,
61 wxFONTENCODING_MACCENTRALEUR, kTextEncodingMacCentralEurRoman,
62 wxFONTENCODING_MACHEBREW, kTextEncodingMacHebrew,
63 wxFONTENCODING_MACGREEK, kTextEncodingMacGreek,
64 wxFONTENCODING_MACARABIC, kTextEncodingMacArabic,
65 wxFONTENCODING_MACTURKISH, kTextEncodingMacTurkish,
66 wxFONTENCODING_MACCYRILLIC, kTextEncodingMacCyrillic,
67 } ;
68
69 wxUint16 gMacEncodings[WXSIZEOF(gMacCodePages)][128] ;
70 bool gMacEncodingsInited[WXSIZEOF(gMacCodePages)] ;
71
72 #endif
73
74 static wxUint16* LINKAGEMODE GetEncTable(wxFontEncoding enc)
75 {
76 #ifdef __WXMAC__
77 for (int i = 0 ; i < WXSIZEOF(gMacCodePages) ; ++i )
78 {
79 if ( gMacCodePages[i].enc == enc )
80 {
81 if ( gMacEncodingsInited[i] == false )
82 {
83 TECObjectRef converter ;
84 TextEncodingBase code = gMacCodePages[i].mac ;
85 TextEncodingBase unicode = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
86 OSStatus status = TECCreateConverter(&converter,code,unicode);
87 char s[2] ;
88 s[1] = 0 ;
89 ByteCount byteInLen, byteOutLen ;
90 for( char c = 255 ; c >= 128 ; --c )
91 {
92 s[0] = c ;
93 status = TECConvertText(converter, (ConstTextPtr) &s , 1, &byteInLen,
94 (TextPtr) &gMacEncodings[i][c-128] , 2, &byteOutLen);
95 }
96 status = TECDisposeConverter(converter);
97 gMacEncodingsInited[i]=true;
98 }
99 return gMacEncodings[i] ;
100 }
101 }
102 #endif
103
104 for (int i = 0; encodings_list[i].table != NULL; i++)
105 {
106 if (encodings_list[i].encoding == enc)
107 return encodings_list[i].table;
108 }
109 return NULL;
110 }
111
112 typedef struct {
113 wxUint16 u;
114 wxUint8 c;
115 } CharsetItem;
116
117 extern "C" int LINKAGEMODE CompareCharsetItems(const void *i1, const void *i2)
118 {
119 return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
120 }
121
122
123 static CharsetItem* LINKAGEMODE BuildReverseTable(wxUint16 *tbl)
124 {
125 CharsetItem *rev = new CharsetItem[128];
126
127 for (int i = 0; i < 128; i++)
128 rev[i].c = 128 + i, rev[i].u = tbl[i];
129
130 qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
131
132 return rev;
133 }
134
135
136
137 wxEncodingConverter::wxEncodingConverter()
138 {
139 m_Table = NULL;
140 m_UnicodeInput = m_UnicodeOutput = FALSE;
141 m_JustCopy = FALSE;
142 }
143
144
145
146 bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
147 {
148 unsigned i;
149 wxUint16 *in_tbl, *out_tbl = NULL;
150
151 if (m_Table) {delete[] m_Table; m_Table = NULL;}
152
153 #if !wxUSE_WCHAR_T
154 if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return FALSE;
155 #endif
156
157 if (input_enc == output_enc) {m_JustCopy = TRUE; return TRUE;}
158
159 m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
160 m_JustCopy = FALSE;
161
162 if (input_enc == wxFONTENCODING_UNICODE)
163 {
164 if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
165
166 m_Table = new tchar[65536];
167 for (i = 0; i < 128; i++) m_Table[i] = (tchar)i; // 7bit ASCII
168 for (i = 128; i < 65536; i++) m_Table[i] = (tchar)'?';
169 // FIXME - this should be character that means `unicode to charset' impossible, not '?'
170
171 if (method == wxCONVERT_SUBSTITUTE)
172 {
173 for (i = 0; i < encoding_unicode_fallback_count; i++)
174 m_Table[encoding_unicode_fallback[i].c] = (tchar) encoding_unicode_fallback[i].s;
175 }
176
177 for (i = 0; i < 128; i++)
178 m_Table[out_tbl[i]] = (tchar)(128 + i);
179
180 m_UnicodeInput = TRUE;
181 }
182 else // input !Unicode
183 {
184 if ((in_tbl = GetEncTable(input_enc)) == NULL) return FALSE;
185 if (output_enc != wxFONTENCODING_UNICODE)
186 if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
187
188 m_UnicodeInput = FALSE;
189
190 m_Table = new tchar[256];
191 for (i = 0; i < 128; i++) m_Table[i] = (tchar)i; // 7bit ASCII
192
193 if (output_enc == wxFONTENCODING_UNICODE)
194 {
195 for (i = 0; i < 128; i++) m_Table[128 + i] = (tchar)in_tbl[i];
196 return TRUE;
197 }
198 // FIXME: write a substitute for bsearch
199 #ifndef __WXWINCE__
200 else // output !Unicode
201 {
202 CharsetItem *rev = BuildReverseTable(out_tbl);
203 CharsetItem *item;
204 CharsetItem key;
205
206 for (i = 0; i < 128; i++)
207 {
208 key.u = in_tbl[i];
209 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
210 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
211 item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
212 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
213 if (item)
214 m_Table[128 + i] = (tchar)item -> c;
215 else
216 #if wxUSE_WCHAR_T
217 m_Table[128 + i] = (wchar_t)(128 + i);
218 #else
219 m_Table[128 + i] = (char)(128 + i);
220 #endif
221 }
222
223 delete[] rev;
224 }
225 #endif // !__WXWINCE__
226 }
227
228 return TRUE;
229 }
230
231
232
233 void wxEncodingConverter::Convert(const char* input, char* output) const
234 {
235 wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
236 wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
237
238 const char *i;
239 char *o;
240
241 if (m_JustCopy)
242 {
243 strcpy(output, input);
244 return;
245 }
246
247 wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
248
249 for (i = input, o = output; *i != 0;)
250 *(o++) = (char)(m_Table[(wxUint8)*(i++)]);
251 *o = 0;
252 }
253
254
255 #if wxUSE_WCHAR_T
256
257 void wxEncodingConverter::Convert(const char* input, wchar_t* output) const
258 {
259 wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
260 wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
261
262 const char *i;
263 wchar_t *o;
264
265 if (m_JustCopy)
266 {
267 for (i = input, o = output; *i != 0;)
268 *(o++) = (wchar_t)(*(i++));
269 *o = 0;
270 return;
271 }
272
273 wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
274
275 for (i = input, o = output; *i != 0;)
276 *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
277 *o = 0;
278 }
279
280
281
282 void wxEncodingConverter::Convert(const wchar_t* input, char* output) const
283 {
284 wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
285 wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
286
287 const wchar_t *i;
288 char *o;
289
290 if (m_JustCopy)
291 {
292 for (i = input, o = output; *i != 0;)
293 *(o++) = (char)(*(i++));
294 *o = 0;
295 return;
296 }
297
298 wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
299
300 for (i = input, o = output; *i != 0;)
301 *(o++) = (char)(m_Table[(wxUint16)*(i++)]);
302 *o = 0;
303 }
304
305
306
307 void wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output) const
308 {
309 wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
310 wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
311
312 const wchar_t *i;
313 wchar_t *o;
314
315 if (m_JustCopy)
316 {
317 // wcscpy() is not guaranteed to exist
318 for (i = input, o = output; *i != 0;)
319 *(o++) = (*(i++));
320 *o = 0;
321 return;
322 }
323
324 wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
325
326 for (i = input, o = output; *i != 0;)
327 *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
328 *o = 0;
329 }
330
331 #endif // wxUSE_WCHAR_T
332
333
334 wxString wxEncodingConverter::Convert(const wxString& input) const
335 {
336 if (m_JustCopy) return input;
337
338 wxString s;
339 const wxChar *i;
340
341 wxCHECK_MSG(m_Table != NULL, s,
342 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
343
344 if (m_UnicodeInput)
345 {
346 for (i = input.c_str(); *i != 0; i++)
347 s << (wxChar)(m_Table[(wxUint16)*i]);
348 }
349 else
350 {
351 for (i = input.c_str(); *i != 0; i++)
352 s << (wxChar)(m_Table[(wxUint8)*i]);
353 }
354
355 return s;
356 }
357
358
359
360
361
362
363
364 // Following tables describe classes of encoding equivalence.
365 //
366
367 #define STOP wxFONTENCODING_SYSTEM
368
369 #define NUM_OF_PLATFORMS 4 /*must conform to enum wxPLATFORM_XXXX !!!*/
370 #define ENC_PER_PLATFORM 5
371 // max no. of encodings for one language used on one platform
372 // Anybody thinks 5 is not enough? ;-)
373
374 static wxFontEncoding
375 EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
376
377 // *** Please put more common encodings as first! ***
378
379 // Western European
380 {
381 /* unix */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
382 /* windows */ {wxFONTENCODING_CP1252, STOP},
383 /* os2 */ {STOP},
384 /* mac */ {wxFONTENCODING_MACROMAN, STOP}
385 },
386
387 // Central European
388 {
389 /* unix */ {wxFONTENCODING_ISO8859_2, STOP},
390 /* windows */ {wxFONTENCODING_CP1250, STOP},
391 /* os2 */ {STOP},
392 /* mac */ {wxFONTENCODING_MACCENTRALEUR, STOP}
393 },
394
395 // Baltic
396 {
397 /* unix */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP},
398 /* windows */ {wxFONTENCODING_CP1257, STOP},
399 /* os2 */ {STOP},
400 /* mac */ {STOP}
401 },
402
403 // Hebrew
404 {
405 /* unix */ {wxFONTENCODING_ISO8859_8, STOP},
406 /* windows */ {wxFONTENCODING_CP1255, STOP},
407 /* os2 */ {STOP},
408 /* mac */ {wxFONTENCODING_MACHEBREW, STOP}
409 },
410
411 // Greek
412 {
413 /* unix */ {wxFONTENCODING_ISO8859_7, STOP},
414 /* windows */ {wxFONTENCODING_CP1253, STOP},
415 /* os2 */ {STOP},
416 /* mac */ {wxFONTENCODING_MACGREEK, STOP}
417 },
418
419 // Arabic
420 {
421 /* unix */ {wxFONTENCODING_ISO8859_6, STOP},
422 /* windows */ {wxFONTENCODING_CP1256, STOP},
423 /* os2 */ {STOP},
424 /* mac */ {wxFONTENCODING_MACARABIC, STOP}
425 },
426
427 // Turkish
428 {
429 /* unix */ {wxFONTENCODING_ISO8859_9, STOP},
430 /* windows */ {wxFONTENCODING_CP1254, STOP},
431 /* os2 */ {STOP},
432 /* mac */ {wxFONTENCODING_MACTURKISH, STOP}
433 },
434
435 // Cyrillic
436 {
437 /* unix */ {wxFONTENCODING_KOI8, wxFONTENCODING_ISO8859_5, STOP},
438 /* windows */ {wxFONTENCODING_CP1251, STOP},
439 /* os2 */ {STOP},
440 /* mac */ {wxFONTENCODING_MACCYRILLIC, STOP}
441 },
442
443 {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
444 /* no, _not_ Arnold! */
445 };
446
447
448 static bool FindEncoding(const wxFontEncodingArray& arr, wxFontEncoding f)
449 {
450 for (wxFontEncodingArray::const_iterator it = arr.begin(), en = arr.end();
451 it != en; ++it)
452 if (*it == f)
453 return true;
454 return false;
455 }
456
457 wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
458 {
459 if (platform == wxPLATFORM_CURRENT)
460 {
461 #if defined(__WXMSW__)
462 platform = wxPLATFORM_WINDOWS;
463 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
464 platform = wxPLATFORM_UNIX;
465 #elif defined(__WXOS2__)
466 platform = wxPLATFORM_OS2;
467 #elif defined(__WXMAC__)
468 platform = wxPLATFORM_MAC;
469 #endif
470 }
471
472 int i, clas, e ;
473 wxFontEncoding *f;
474 wxFontEncodingArray arr;
475
476 clas = 0;
477 while (EquivalentEncodings[clas][0][0] != STOP)
478 {
479 for (i = 0; i < NUM_OF_PLATFORMS; i++)
480 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
481 if (EquivalentEncodings[clas][i][e] == enc)
482 {
483 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
484 if (*f == enc) arr.push_back(enc);
485 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
486 if (!FindEncoding(arr, *f)) arr.push_back(*f);
487 i = NUM_OF_PLATFORMS/*hack*/; break;
488 }
489 clas++;
490 }
491
492 return arr;
493 }
494
495
496
497 wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
498 {
499 int i, clas, e, j ;
500 wxFontEncoding *f;
501 wxFontEncodingArray arr;
502
503 arr = GetPlatformEquivalents(enc); // we want them to be first items in array
504
505 clas = 0;
506 while (EquivalentEncodings[clas][0][0] != STOP)
507 {
508 for (i = 0; i < NUM_OF_PLATFORMS; i++)
509 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
510 if (EquivalentEncodings[clas][i][e] == enc)
511 {
512 for (j = 0; j < NUM_OF_PLATFORMS; j++)
513 for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
514 if (!FindEncoding(arr, *f)) arr.push_back(*f);
515 i = NUM_OF_PLATFORMS/*hack*/; break;
516 }
517 clas++;
518 }
519
520 return arr;
521 }
522
523 #endif // wxUSE_FONTMAP