]> git.saurik.com Git - wxWidgets.git/blob - src/common/encconv.cpp
Don't document wxSortedArrayString as deriving from wxArrayString.
[wxWidgets.git] / src / common / encconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/encconv.cpp
3 // Purpose: wxEncodingConverter class for converting between different
4 // font encodings
5 // Author: Vaclav Slavik
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
9
10 // For compilers that support precompilation, includes "wx.h".
11 #include "wx/wxprec.h"
12
13 #ifdef __BORLANDC__
14 #pragma hdrstop
15 #endif
16
17 #include "wx/encconv.h"
18
19 #include <stdlib.h>
20
21 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
22 #if defined( __BORLANDC__ ) || defined(__DARWIN__)
23 #include "../common/unictabl.inc"
24 #else
25 #include "unictabl.inc"
26 #endif
27
28 #ifdef __WXMAC__
29 #include "wx/osx/core/cfstring.h"
30 #include <CoreFoundation/CFStringEncodingExt.h>
31
32 wxUint16 gMacEncodings[wxFONTENCODING_MACMAX-wxFONTENCODING_MACMIN+1][128] ;
33 bool gMacEncodingsInited[wxFONTENCODING_MACMAX-wxFONTENCODING_MACMIN+1] ;
34 #endif
35
36 #ifdef __WXWINCE__
37 #include "wx/msw/wince/missing.h" // for bsearch()
38 #endif
39
40 static const wxUint16* GetEncTable(wxFontEncoding enc)
41 {
42 #ifdef __WXMAC__
43 if( enc >= wxFONTENCODING_MACMIN && enc <= wxFONTENCODING_MACMAX )
44 {
45 int i = enc-wxFONTENCODING_MACMIN ;
46 if ( gMacEncodingsInited[i] == false )
47 {
48 // create
49 CFStringEncoding cfencoding = wxMacGetSystemEncFromFontEnc( enc ) ;
50 if( !CFStringIsEncodingAvailable( cfencoding ) )
51 return NULL;
52
53 memset( gMacEncodings[i] , 0 , 128 * 2 );
54 char s[2] = { 0 , 0 };
55 CFRange firstchar = CFRangeMake( 0, 1 );
56 for( unsigned char c = 255 ; c >= 128 ; --c )
57 {
58 s[0] = c ;
59 wxCFStringRef cfref( CFStringCreateWithCStringNoCopy( NULL, s, cfencoding , kCFAllocatorNull ) );
60 CFStringGetCharacters( cfref, firstchar, (UniChar*) &gMacEncodings[i][c-128] );
61 }
62 gMacEncodingsInited[i]=true;
63 }
64 return gMacEncodings[i] ;
65 }
66 #endif
67
68 for (int i = 0; encodings_list[i].table != NULL; i++)
69 {
70 if (encodings_list[i].encoding == enc)
71 return encodings_list[i].table;
72 }
73 return NULL;
74 }
75
76 typedef struct {
77 wxUint16 u;
78 wxUint8 c;
79 } CharsetItem;
80
81 extern "C"
82 {
83 static int wxCMPFUNC_CONV
84 CompareCharsetItems(const void *i1, const void *i2)
85 {
86 return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
87 }
88 }
89
90 static CharsetItem* BuildReverseTable(const wxUint16 *tbl)
91 {
92 CharsetItem *rev = new CharsetItem[128];
93
94 for (int i = 0; i < 128; i++)
95 rev[i].c = wxUint8(128 + i), rev[i].u = tbl[i];
96
97 qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
98
99 return rev;
100 }
101
102
103
104 wxEncodingConverter::wxEncodingConverter()
105 {
106 m_Table = NULL;
107 m_UnicodeInput = m_UnicodeOutput = false;
108 m_JustCopy = false;
109 }
110
111
112
113 bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
114 {
115 unsigned i;
116 const wxUint16 *in_tbl;
117 const wxUint16 *out_tbl = NULL;
118
119 wxDELETEA(m_Table);
120
121 if (input_enc == output_enc) {m_JustCopy = true; return true;}
122
123 m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
124 m_JustCopy = false;
125
126 if (input_enc == wxFONTENCODING_UNICODE)
127 {
128 if ((out_tbl = GetEncTable(output_enc)) == NULL) return false;
129
130 m_Table = new wchar_t[65536];
131 for (i = 0; i < 128; i++) m_Table[i] = (wchar_t)i; // 7bit ASCII
132 for (i = 128; i < 65536; i++) m_Table[i] = (wchar_t)0;
133
134 if (method == wxCONVERT_SUBSTITUTE)
135 {
136 for (i = 0; i < encoding_unicode_fallback_count; i++)
137 m_Table[encoding_unicode_fallback[i].c] = (wchar_t) encoding_unicode_fallback[i].s;
138 }
139
140 for (i = 0; i < 128; i++)
141 m_Table[out_tbl[i]] = (wchar_t)(128 + i);
142
143 m_UnicodeInput = true;
144 }
145 else // input !Unicode
146 {
147 if ((in_tbl = GetEncTable(input_enc)) == NULL) return false;
148 if (output_enc != wxFONTENCODING_UNICODE)
149 if ((out_tbl = GetEncTable(output_enc)) == NULL) return false;
150
151 m_UnicodeInput = false;
152
153 m_Table = new wchar_t[256];
154 for (i = 0; i < 128; i++) m_Table[i] = (wchar_t)i; // 7bit ASCII
155
156 if (output_enc == wxFONTENCODING_UNICODE)
157 {
158 for (i = 0; i < 128; i++) m_Table[128 + i] = (wchar_t)in_tbl[i];
159 return true;
160 }
161 else // output !Unicode
162 {
163 CharsetItem *rev = BuildReverseTable(out_tbl);
164 CharsetItem *item;
165 CharsetItem key;
166
167 for (i = 0; i < 128; i++)
168 {
169 key.u = in_tbl[i];
170 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
171 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
172 item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
173 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
174 if (item)
175 m_Table[128 + i] = (wchar_t)item -> c;
176 else
177 m_Table[128 + i] = (wchar_t)(128 + i);
178 }
179
180 delete[] rev;
181 }
182 }
183
184 return true;
185 }
186
187
188 #define REPLACEMENT_CHAR (L'?')
189
190 inline wchar_t GetTableValue(const wchar_t *table, wchar_t value, bool& repl)
191 {
192 wchar_t r = table[value];
193 if (r == 0 && value != 0)
194 {
195 r = REPLACEMENT_CHAR;
196 repl = true;
197 }
198 return r;
199 }
200
201
202 bool wxEncodingConverter::Convert(const char* input, char* output) const
203 {
204 wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
205 wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
206
207 const char *i;
208 char *o;
209
210 if (m_JustCopy)
211 {
212 strcpy(output, input);
213 return true;
214 }
215
216 wxCHECK_MSG(m_Table != NULL, false,
217 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
218
219 bool replaced = false;
220
221 for (i = input, o = output; *i != 0;)
222 *(o++) = (char)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
223 *o = 0;
224
225 return !replaced;
226 }
227
228
229 bool wxEncodingConverter::Convert(const char* input, wchar_t* output) const
230 {
231 wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
232 wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
233
234 const char *i;
235 wchar_t *o;
236
237 if (m_JustCopy)
238 {
239 for (i = input, o = output; *i != 0;)
240 *(o++) = (wchar_t)(*(i++));
241 *o = 0;
242 return true;
243 }
244
245 wxCHECK_MSG(m_Table != NULL, false,
246 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
247
248 bool replaced = false;
249
250 for (i = input, o = output; *i != 0;)
251 *(o++) = (wchar_t)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
252 *o = 0;
253
254 return !replaced;
255 }
256
257
258
259 bool wxEncodingConverter::Convert(const wchar_t* input, char* output) const
260 {
261 wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
262 wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
263
264 const wchar_t *i;
265 char *o;
266
267 if (m_JustCopy)
268 {
269 for (i = input, o = output; *i != 0;)
270 *(o++) = (char)(*(i++));
271 *o = 0;
272 return true;
273 }
274
275 wxCHECK_MSG(m_Table != NULL, false,
276 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
277
278 bool replaced = false;
279
280 for (i = input, o = output; *i != 0;)
281 *(o++) = (char)(GetTableValue(m_Table, (wxUint16)*(i++), replaced));
282 *o = 0;
283
284 return !replaced;
285 }
286
287
288
289 bool wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output) const
290 {
291 wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
292 wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
293
294 const wchar_t *i;
295 wchar_t *o;
296
297 if (m_JustCopy)
298 {
299 // wcscpy() is not guaranteed to exist
300 for (i = input, o = output; *i != 0;)
301 *(o++) = (*(i++));
302 *o = 0;
303 return true;
304 }
305
306 wxCHECK_MSG(m_Table != NULL, false,
307 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
308
309 bool replaced = false;
310
311 for (i = input, o = output; *i != 0;)
312 *(o++) = (wchar_t)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
313 *o = 0;
314
315 return !replaced;
316 }
317
318
319 wxString wxEncodingConverter::Convert(const wxString& input) const
320 {
321 if (m_JustCopy) return input;
322
323 wxString s;
324 const wxChar *i;
325
326 wxCHECK_MSG(m_Table != NULL, s,
327 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
328
329 if (m_UnicodeInput)
330 {
331 for (i = input.c_str(); *i != 0; i++)
332 s << (wxChar)(m_Table[(wxUint16)*i]);
333 }
334 else
335 {
336 for (i = input.c_str(); *i != 0; i++)
337 s << (wxChar)(m_Table[(wxUint8)*i]);
338 }
339
340 return s;
341 }
342
343
344
345
346
347
348
349 // Following tables describe classes of encoding equivalence.
350 //
351
352 #define STOP wxFONTENCODING_SYSTEM
353
354 #define NUM_OF_PLATFORMS 4 /*must conform to enum wxPLATFORM_XXXX !!!*/
355 #define ENC_PER_PLATFORM 3
356 // max no. of encodings for one language used on one platform.
357 // Using maximum of everything at the current moment to not make the
358 // library larger than necessary. Make larger only if necessary - MR
359
360 static const wxFontEncoding
361 EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
362
363 // *** Please put more common encodings as first! ***
364
365 // Western European
366 {
367 /* unix */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
368 /* windows */ {wxFONTENCODING_CP1252, STOP},
369 /* os2 */ {STOP},
370 /* mac */ {wxFONTENCODING_MACROMAN, STOP}
371 },
372
373 // Central European
374 {
375 /* unix */ {wxFONTENCODING_ISO8859_2, STOP},
376 /* windows */ {wxFONTENCODING_CP1250, STOP},
377 /* os2 */ {STOP},
378 /* mac */ {wxFONTENCODING_MACCENTRALEUR, STOP}
379 },
380
381 // Baltic
382 {
383 /* unix */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP},
384 /* windows */ {wxFONTENCODING_CP1257, STOP},
385 /* os2 */ {STOP},
386 /* mac */ {STOP}
387 },
388
389 // Hebrew
390 {
391 /* unix */ {wxFONTENCODING_ISO8859_8, STOP},
392 /* windows */ {wxFONTENCODING_CP1255, STOP},
393 /* os2 */ {STOP},
394 /* mac */ {wxFONTENCODING_MACHEBREW, STOP}
395 },
396
397 // Greek
398 {
399 /* unix */ {wxFONTENCODING_ISO8859_7, STOP},
400 /* windows */ {wxFONTENCODING_CP1253, STOP},
401 /* os2 */ {STOP},
402 /* mac */ {wxFONTENCODING_MACGREEK, STOP}
403 },
404
405 // Arabic
406 {
407 /* unix */ {wxFONTENCODING_ISO8859_6, STOP},
408 /* windows */ {wxFONTENCODING_CP1256, STOP},
409 /* os2 */ {STOP},
410 /* mac */ {wxFONTENCODING_MACARABIC, STOP}
411 },
412
413 // Turkish
414 {
415 /* unix */ {wxFONTENCODING_ISO8859_9, STOP},
416 /* windows */ {wxFONTENCODING_CP1254, STOP},
417 /* os2 */ {STOP},
418 /* mac */ {wxFONTENCODING_MACTURKISH, STOP}
419 },
420
421 // Cyrillic
422 {
423 /* unix */ {wxFONTENCODING_KOI8, wxFONTENCODING_KOI8_U, wxFONTENCODING_ISO8859_5, STOP},
424 /* windows */ {wxFONTENCODING_CP1251, STOP},
425 /* os2 */ {STOP},
426 /* mac */ {wxFONTENCODING_MACCYRILLIC, STOP}
427 },
428
429 {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
430 /* no, _not_ Arnold! */
431 };
432
433
434 static bool FindEncoding(const wxFontEncodingArray& arr, wxFontEncoding f)
435 {
436 for (wxFontEncodingArray::const_iterator it = arr.begin(), en = arr.end();
437 it != en; ++it)
438 if (*it == f)
439 return true;
440 return false;
441 }
442
443 wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
444 {
445 if (platform == wxPLATFORM_CURRENT)
446 {
447 #if defined(__WINDOWS__)
448 platform = wxPLATFORM_WINDOWS;
449 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
450 platform = wxPLATFORM_UNIX;
451 #elif defined(__WXPM__)
452 platform = wxPLATFORM_OS2;
453 #elif defined(__WXMAC__)
454 platform = wxPLATFORM_MAC;
455 #endif
456 }
457
458 int i, clas, e ;
459 const wxFontEncoding *f;
460 wxFontEncodingArray arr;
461
462 clas = 0;
463 while (EquivalentEncodings[clas][0][0] != STOP)
464 {
465 for (i = 0; i < NUM_OF_PLATFORMS; i++)
466 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
467 if (EquivalentEncodings[clas][i][e] == enc)
468 {
469 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
470 if (*f == enc) arr.push_back(enc);
471 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
472 if (!FindEncoding(arr, *f)) arr.push_back(*f);
473 i = NUM_OF_PLATFORMS/*hack*/; break;
474 }
475 clas++;
476 }
477
478 return arr;
479 }
480
481
482
483 wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
484 {
485 int i, clas, e, j ;
486 const wxFontEncoding *f;
487 wxFontEncodingArray arr;
488
489 arr = GetPlatformEquivalents(enc); // we want them to be first items in array
490
491 clas = 0;
492 while (EquivalentEncodings[clas][0][0] != STOP)
493 {
494 for (i = 0; i < NUM_OF_PLATFORMS; i++)
495 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
496 if (EquivalentEncodings[clas][i][e] == enc)
497 {
498 for (j = 0; j < NUM_OF_PLATFORMS; j++)
499 for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
500 if (!FindEncoding(arr, *f)) arr.push_back(*f);
501 i = NUM_OF_PLATFORMS/*hack*/; break;
502 }
503 clas++;
504 }
505
506 return arr;
507 }
508