]> git.saurik.com Git - wxWidgets.git/blob - src/common/encconv.cpp
wxTextCtrk::GetRange() shouldn't crash on out of range request
[wxWidgets.git] / src / common / encconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: encconv.cpp
3 // Purpose: wxEncodingConverter class for converting between different
4 // font encodings
5 // Author: Vaclav Slavik
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
9
10 // For compilers that support precompilation, includes "wx.h".
11 #include "wx/wxprec.h"
12
13 #ifdef __BORLANDC__
14 #pragma hdrstop
15 #endif
16
17 #include "wx/encconv.h"
18
19 #include <stdlib.h>
20
21 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
22 #if defined( __BORLANDC__ ) || defined(__DARWIN__)
23 #include "../common/unictabl.inc"
24 #else
25 #include "unictabl.inc"
26 #endif
27
28 #if wxUSE_WCHAR_T
29 typedef wchar_t tchar;
30 #else
31 typedef char tchar;
32 #endif
33
34 #ifdef __WXMAC__
35 #include "wx/mac/corefoundation/cfstring.h"
36 #include <CoreFoundation/CFStringEncodingExt.h>
37
38 wxUint16 gMacEncodings[wxFONTENCODING_MACMAX-wxFONTENCODING_MACMIN+1][128] ;
39 bool gMacEncodingsInited[wxFONTENCODING_MACMAX-wxFONTENCODING_MACMIN+1] ;
40 #endif
41
42 #ifdef __WXWINCE__
43 #include "wx/msw/wince/missing.h" // for bsearch()
44 #endif
45
46 static const wxUint16* GetEncTable(wxFontEncoding enc)
47 {
48 #ifdef __WXMAC__
49 if( enc >= wxFONTENCODING_MACMIN && enc <= wxFONTENCODING_MACMAX )
50 {
51 int i = enc-wxFONTENCODING_MACMIN ;
52 if ( gMacEncodingsInited[i] == false )
53 {
54 // create
55 CFStringEncoding cfencoding = wxMacGetSystemEncFromFontEnc( enc ) ;
56 if( !CFStringIsEncodingAvailable( cfencoding ) )
57 return NULL;
58
59 memset( gMacEncodings[i] , 0 , 128 * 2 );
60 char s[2] = { 0 , 0 };
61 CFRange firstchar = CFRangeMake( 0, 1 );
62 for( unsigned char c = 255 ; c >= 128 ; --c )
63 {
64 s[0] = c ;
65 wxCFStringRef cfref( CFStringCreateWithCStringNoCopy( NULL, s, cfencoding , kCFAllocatorNull ) );
66 CFStringGetCharacters( cfref, firstchar, (UniChar*) &gMacEncodings[i][c-128] );
67 }
68 gMacEncodingsInited[i]=true;
69 }
70 return gMacEncodings[i] ;
71 }
72 #endif
73
74 for (int i = 0; encodings_list[i].table != NULL; i++)
75 {
76 if (encodings_list[i].encoding == enc)
77 return encodings_list[i].table;
78 }
79 return NULL;
80 }
81
82 typedef struct {
83 wxUint16 u;
84 wxUint8 c;
85 } CharsetItem;
86
87 extern "C" int wxCMPFUNC_CONV
88 CompareCharsetItems(const void *i1, const void *i2)
89 {
90 return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
91 }
92
93
94 static CharsetItem* BuildReverseTable(const wxUint16 *tbl)
95 {
96 CharsetItem *rev = new CharsetItem[128];
97
98 for (int i = 0; i < 128; i++)
99 rev[i].c = wxUint8(128 + i), rev[i].u = tbl[i];
100
101 qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
102
103 return rev;
104 }
105
106
107
108 wxEncodingConverter::wxEncodingConverter()
109 {
110 m_Table = NULL;
111 m_UnicodeInput = m_UnicodeOutput = false;
112 m_JustCopy = false;
113 }
114
115
116
117 bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
118 {
119 unsigned i;
120 const wxUint16 *in_tbl;
121 const wxUint16 *out_tbl = NULL;
122
123 if (m_Table) {delete[] m_Table; m_Table = NULL;}
124
125 #if !wxUSE_WCHAR_T
126 if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return false;
127 #endif
128
129 if (input_enc == output_enc) {m_JustCopy = true; return true;}
130
131 m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
132 m_JustCopy = false;
133
134 if (input_enc == wxFONTENCODING_UNICODE)
135 {
136 if ((out_tbl = GetEncTable(output_enc)) == NULL) return false;
137
138 m_Table = new tchar[65536];
139 for (i = 0; i < 128; i++) m_Table[i] = (tchar)i; // 7bit ASCII
140 for (i = 128; i < 65536; i++) m_Table[i] = (tchar)0;
141
142 if (method == wxCONVERT_SUBSTITUTE)
143 {
144 for (i = 0; i < encoding_unicode_fallback_count; i++)
145 m_Table[encoding_unicode_fallback[i].c] = (tchar) encoding_unicode_fallback[i].s;
146 }
147
148 for (i = 0; i < 128; i++)
149 m_Table[out_tbl[i]] = (tchar)(128 + i);
150
151 m_UnicodeInput = true;
152 }
153 else // input !Unicode
154 {
155 if ((in_tbl = GetEncTable(input_enc)) == NULL) return false;
156 if (output_enc != wxFONTENCODING_UNICODE)
157 if ((out_tbl = GetEncTable(output_enc)) == NULL) return false;
158
159 m_UnicodeInput = false;
160
161 m_Table = new tchar[256];
162 for (i = 0; i < 128; i++) m_Table[i] = (tchar)i; // 7bit ASCII
163
164 if (output_enc == wxFONTENCODING_UNICODE)
165 {
166 for (i = 0; i < 128; i++) m_Table[128 + i] = (tchar)in_tbl[i];
167 return true;
168 }
169 else // output !Unicode
170 {
171 CharsetItem *rev = BuildReverseTable(out_tbl);
172 CharsetItem *item;
173 CharsetItem key;
174
175 for (i = 0; i < 128; i++)
176 {
177 key.u = in_tbl[i];
178 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
179 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
180 item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
181 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
182 if (item)
183 m_Table[128 + i] = (tchar)item -> c;
184 else
185 #if wxUSE_WCHAR_T
186 m_Table[128 + i] = (wchar_t)(128 + i);
187 #else
188 m_Table[128 + i] = (char)(128 + i);
189 #endif
190 }
191
192 delete[] rev;
193 }
194 }
195
196 return true;
197 }
198
199
200 #define REPLACEMENT_CHAR ((tchar)'?')
201
202 inline tchar GetTableValue(const tchar *table, tchar value, bool& repl)
203 {
204 tchar r = table[value];
205 if (r == 0 && value != 0)
206 {
207 r = REPLACEMENT_CHAR;
208 repl = true;
209 }
210 return r;
211 }
212
213
214 bool wxEncodingConverter::Convert(const char* input, char* output) const
215 {
216 wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
217 wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
218
219 const char *i;
220 char *o;
221
222 if (m_JustCopy)
223 {
224 strcpy(output, input);
225 return true;
226 }
227
228 wxCHECK_MSG(m_Table != NULL, false,
229 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
230
231 bool replaced = false;
232
233 for (i = input, o = output; *i != 0;)
234 *(o++) = (char)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
235 *o = 0;
236
237 return !replaced;
238 }
239
240
241 #if wxUSE_WCHAR_T
242
243 bool wxEncodingConverter::Convert(const char* input, wchar_t* output) const
244 {
245 wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
246 wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
247
248 const char *i;
249 wchar_t *o;
250
251 if (m_JustCopy)
252 {
253 for (i = input, o = output; *i != 0;)
254 *(o++) = (wchar_t)(*(i++));
255 *o = 0;
256 return true;
257 }
258
259 wxCHECK_MSG(m_Table != NULL, false,
260 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
261
262 bool replaced = false;
263
264 for (i = input, o = output; *i != 0;)
265 *(o++) = (wchar_t)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
266 *o = 0;
267
268 return !replaced;
269 }
270
271
272
273 bool wxEncodingConverter::Convert(const wchar_t* input, char* output) const
274 {
275 wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
276 wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
277
278 const wchar_t *i;
279 char *o;
280
281 if (m_JustCopy)
282 {
283 for (i = input, o = output; *i != 0;)
284 *(o++) = (char)(*(i++));
285 *o = 0;
286 return true;
287 }
288
289 wxCHECK_MSG(m_Table != NULL, false,
290 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
291
292 bool replaced = false;
293
294 for (i = input, o = output; *i != 0;)
295 *(o++) = (char)(GetTableValue(m_Table, (wxUint16)*(i++), replaced));
296 *o = 0;
297
298 return !replaced;
299 }
300
301
302
303 bool wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output) const
304 {
305 wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
306 wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
307
308 const wchar_t *i;
309 wchar_t *o;
310
311 if (m_JustCopy)
312 {
313 // wcscpy() is not guaranteed to exist
314 for (i = input, o = output; *i != 0;)
315 *(o++) = (*(i++));
316 *o = 0;
317 return true;
318 }
319
320 wxCHECK_MSG(m_Table != NULL, false,
321 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
322
323 bool replaced = false;
324
325 for (i = input, o = output; *i != 0;)
326 *(o++) = (wchar_t)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
327 *o = 0;
328
329 return !replaced;
330 }
331
332 #endif // wxUSE_WCHAR_T
333
334
335 wxString wxEncodingConverter::Convert(const wxString& input) const
336 {
337 if (m_JustCopy) return input;
338
339 wxString s;
340 const wxChar *i;
341
342 wxCHECK_MSG(m_Table != NULL, s,
343 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
344
345 if (m_UnicodeInput)
346 {
347 for (i = input.c_str(); *i != 0; i++)
348 s << (wxChar)(m_Table[(wxUint16)*i]);
349 }
350 else
351 {
352 for (i = input.c_str(); *i != 0; i++)
353 s << (wxChar)(m_Table[(wxUint8)*i]);
354 }
355
356 return s;
357 }
358
359
360
361
362
363
364
365 // Following tables describe classes of encoding equivalence.
366 //
367
368 #define STOP wxFONTENCODING_SYSTEM
369
370 #define NUM_OF_PLATFORMS 4 /*must conform to enum wxPLATFORM_XXXX !!!*/
371 #define ENC_PER_PLATFORM 3
372 // max no. of encodings for one language used on one platform.
373 // Using maximum of everything at the current moment to not make the
374 // library larger than necessary. Make larger only if necessary - MR
375
376 static const wxFontEncoding
377 EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
378
379 // *** Please put more common encodings as first! ***
380
381 // Western European
382 {
383 /* unix */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
384 /* windows */ {wxFONTENCODING_CP1252, STOP},
385 /* os2 */ {STOP},
386 /* mac */ {wxFONTENCODING_MACROMAN, STOP}
387 },
388
389 // Central European
390 {
391 /* unix */ {wxFONTENCODING_ISO8859_2, STOP},
392 /* windows */ {wxFONTENCODING_CP1250, STOP},
393 /* os2 */ {STOP},
394 /* mac */ {wxFONTENCODING_MACCENTRALEUR, STOP}
395 },
396
397 // Baltic
398 {
399 /* unix */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP},
400 /* windows */ {wxFONTENCODING_CP1257, STOP},
401 /* os2 */ {STOP},
402 /* mac */ {STOP}
403 },
404
405 // Hebrew
406 {
407 /* unix */ {wxFONTENCODING_ISO8859_8, STOP},
408 /* windows */ {wxFONTENCODING_CP1255, STOP},
409 /* os2 */ {STOP},
410 /* mac */ {wxFONTENCODING_MACHEBREW, STOP}
411 },
412
413 // Greek
414 {
415 /* unix */ {wxFONTENCODING_ISO8859_7, STOP},
416 /* windows */ {wxFONTENCODING_CP1253, STOP},
417 /* os2 */ {STOP},
418 /* mac */ {wxFONTENCODING_MACGREEK, STOP}
419 },
420
421 // Arabic
422 {
423 /* unix */ {wxFONTENCODING_ISO8859_6, STOP},
424 /* windows */ {wxFONTENCODING_CP1256, STOP},
425 /* os2 */ {STOP},
426 /* mac */ {wxFONTENCODING_MACARABIC, STOP}
427 },
428
429 // Turkish
430 {
431 /* unix */ {wxFONTENCODING_ISO8859_9, STOP},
432 /* windows */ {wxFONTENCODING_CP1254, STOP},
433 /* os2 */ {STOP},
434 /* mac */ {wxFONTENCODING_MACTURKISH, STOP}
435 },
436
437 // Cyrillic
438 {
439 /* unix */ {wxFONTENCODING_KOI8, wxFONTENCODING_KOI8_U, wxFONTENCODING_ISO8859_5, STOP},
440 /* windows */ {wxFONTENCODING_CP1251, STOP},
441 /* os2 */ {STOP},
442 /* mac */ {wxFONTENCODING_MACCYRILLIC, STOP}
443 },
444
445 {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
446 /* no, _not_ Arnold! */
447 };
448
449
450 static bool FindEncoding(const wxFontEncodingArray& arr, wxFontEncoding f)
451 {
452 for (wxFontEncodingArray::const_iterator it = arr.begin(), en = arr.end();
453 it != en; ++it)
454 if (*it == f)
455 return true;
456 return false;
457 }
458
459 wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
460 {
461 if (platform == wxPLATFORM_CURRENT)
462 {
463 #if defined(__WXMSW__)
464 platform = wxPLATFORM_WINDOWS;
465 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
466 platform = wxPLATFORM_UNIX;
467 #elif defined(__WXPM__)
468 platform = wxPLATFORM_OS2;
469 #elif defined(__WXMAC__)
470 platform = wxPLATFORM_MAC;
471 #endif
472 }
473
474 int i, clas, e ;
475 const wxFontEncoding *f;
476 wxFontEncodingArray arr;
477
478 clas = 0;
479 while (EquivalentEncodings[clas][0][0] != STOP)
480 {
481 for (i = 0; i < NUM_OF_PLATFORMS; i++)
482 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
483 if (EquivalentEncodings[clas][i][e] == enc)
484 {
485 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
486 if (*f == enc) arr.push_back(enc);
487 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
488 if (!FindEncoding(arr, *f)) arr.push_back(*f);
489 i = NUM_OF_PLATFORMS/*hack*/; break;
490 }
491 clas++;
492 }
493
494 return arr;
495 }
496
497
498
499 wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
500 {
501 int i, clas, e, j ;
502 const wxFontEncoding *f;
503 wxFontEncodingArray arr;
504
505 arr = GetPlatformEquivalents(enc); // we want them to be first items in array
506
507 clas = 0;
508 while (EquivalentEncodings[clas][0][0] != STOP)
509 {
510 for (i = 0; i < NUM_OF_PLATFORMS; i++)
511 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
512 if (EquivalentEncodings[clas][i][e] == enc)
513 {
514 for (j = 0; j < NUM_OF_PLATFORMS; j++)
515 for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
516 if (!FindEncoding(arr, *f)) arr.push_back(*f);
517 i = NUM_OF_PLATFORMS/*hack*/; break;
518 }
519 clas++;
520 }
521
522 return arr;
523 }
524