]> git.saurik.com Git - wxWidgets.git/blob - src/common/encconv.cpp
applied correction from Marc Newsam in calculations of linesize
[wxWidgets.git] / src / common / encconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: encconv.cpp
3 // Purpose: wxEncodingConverter class for converting between different
4 // font encodings
5 // Author: Vaclav Slavik
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows Licence
8 /////////////////////////////////////////////////////////////////////////////
9
10 #ifdef __GNUG__
11 #pragma implementation "encconv.h"
12 #endif
13
14 // For compilers that support precompilation, includes "wx.h".
15 #include "wx/wxprec.h"
16
17 #ifdef __BORLANDC__
18 #pragma hdrstop
19 #endif
20
21 #include "wx/encconv.h"
22
23 #include <stdlib.h>
24
25 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
26 #ifdef __BORLANDC__
27 #include "../common/unictabl.inc"
28 #else
29 #include "unictabl.inc"
30 #endif
31
32 #if wxUSE_WCHAR_T
33 typedef wchar_t tchar;
34 #else
35 typedef char tchar;
36 #endif
37
38 static wxUint16* LINKAGEMODE GetEncTable(wxFontEncoding enc)
39 {
40 for (int i = 0; encodings_list[i].table != NULL; i++)
41 {
42 if (encodings_list[i].encoding == enc)
43 return encodings_list[i].table;
44 }
45 return NULL;
46 }
47
48 typedef struct {
49 wxUint16 u;
50 wxUint8 c;
51 } CharsetItem;
52
53
54
55 static int LINKAGEMODE CompareCharsetItems(const void *i1, const void *i2)
56 {
57 return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
58 }
59
60
61 static CharsetItem* LINKAGEMODE BuildReverseTable(wxUint16 *tbl)
62 {
63 CharsetItem *rev = new CharsetItem[128];
64
65 for (int i = 0; i < 128; i++)
66 rev[i].c = 128 + i, rev[i].u = tbl[i];
67
68 qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
69
70 return rev;
71 }
72
73
74
75 wxEncodingConverter::wxEncodingConverter()
76 {
77 m_Table = NULL;
78 m_UnicodeInput = m_UnicodeOutput = FALSE;
79 m_JustCopy = FALSE;
80 }
81
82
83
84 bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
85 {
86 unsigned i;
87 wxUint16 *in_tbl = NULL, *out_tbl = NULL;
88
89 if (m_Table) {delete[] m_Table; m_Table = NULL;}
90
91 #if !wxUSE_WCHAR_T
92 if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return FALSE;
93 #endif
94
95 if (input_enc == output_enc) {m_JustCopy = TRUE; return TRUE;}
96
97 m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
98 m_JustCopy = FALSE;
99
100 if (input_enc == wxFONTENCODING_UNICODE)
101 {
102 if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
103
104 m_Table = new tchar[65536];
105 for (i = 0; i < 128; i++) m_Table[i] = (tchar)i; // 7bit ASCII
106 for (i = 128; i < 65536; i++) m_Table[i] = (tchar)'?';
107 // FIXME - this should be character that means `unicode to charset' impossible, not '?'
108
109 if (method == wxCONVERT_SUBSTITUTE)
110 {
111 for (i = 0; i < encoding_unicode_fallback_count; i++)
112 m_Table[encoding_unicode_fallback[i].c] = (tchar) encoding_unicode_fallback[i].s;
113 }
114
115 for (i = 0; i < 128; i++)
116 m_Table[out_tbl[i]] = (tchar)(128 + i);
117
118 m_UnicodeInput = TRUE;
119 return TRUE;
120 }
121
122 else
123 {
124 if ((in_tbl = GetEncTable(input_enc)) == NULL) return FALSE;
125 if (output_enc != wxFONTENCODING_UNICODE)
126 if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
127
128 m_UnicodeInput = FALSE;
129
130 m_Table = new tchar[256];
131 for (i = 0; i < 128; i++) m_Table[i] = (tchar)i; // 7bit ASCII
132
133 if (output_enc == wxFONTENCODING_UNICODE)
134 {
135 for (i = 0; i < 128; i++) m_Table[128 + i] = (tchar)in_tbl[i];
136 return TRUE;
137 }
138 else
139 {
140 CharsetItem *rev = BuildReverseTable(out_tbl);
141 CharsetItem *item;
142 CharsetItem key;
143
144 for (i = 0; i < 128; i++)
145 {
146 key.u = in_tbl[i];
147 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
148 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
149 item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
150 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
151 if (item)
152 m_Table[128 + i] = (tchar)item -> c;
153 else
154 #if wxUSE_WCHAR_T
155 m_Table[128 + i] = (wchar_t)(128 + i);
156 #else
157 m_Table[128 + i] = (char)(128 + i);
158 #endif
159 }
160
161 delete[] rev;
162 return TRUE;
163 }
164 }
165 }
166
167
168
169 void wxEncodingConverter::Convert(const char* input, char* output)
170 {
171 wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
172 wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
173
174 const char *i;
175 char *o;
176
177 if (m_JustCopy)
178 {
179 strcpy(output, input);
180 return;
181 }
182
183 wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
184
185 for (i = input, o = output; *i != 0;)
186 *(o++) = (char)(m_Table[(wxUint8)*(i++)]);
187 *o = 0;
188 }
189
190
191 #if wxUSE_WCHAR_T
192
193 void wxEncodingConverter::Convert(const char* input, wchar_t* output)
194 {
195 wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
196 wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
197
198 const char *i;
199 wchar_t *o;
200
201 if (m_JustCopy)
202 {
203 for (i = input, o = output; *i != 0;)
204 *(o++) = (wchar_t)(*(i++));
205 *o = 0;
206 return;
207 }
208
209 wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
210
211 for (i = input, o = output; *i != 0;)
212 *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
213 *o = 0;
214 }
215
216
217
218 void wxEncodingConverter::Convert(const wchar_t* input, char* output)
219 {
220 wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
221 wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
222
223 const wchar_t *i;
224 char *o;
225
226 if (m_JustCopy)
227 {
228 for (i = input, o = output; *i != 0;)
229 *(o++) = (char)(*(i++));
230 *o = 0;
231 return;
232 }
233
234 wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
235
236 for (i = input, o = output; *i != 0;)
237 *(o++) = (char)(m_Table[(wxUint16)*(i++)]);
238 *o = 0;
239 }
240
241
242
243 void wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output)
244 {
245 wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
246 wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
247
248 const wchar_t *i;
249 wchar_t *o;
250
251 if (m_JustCopy)
252 {
253 // wcscpy() is not guaranteed to exist
254 for (i = input, o = output; *i != 0;)
255 *(o++) = (*(i++));
256 *o = 0;
257 return;
258 }
259
260 wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
261
262 for (i = input, o = output; *i != 0;)
263 *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
264 *o = 0;
265 }
266
267 #endif // wxUSE_WCHAR_T
268
269
270 wxString wxEncodingConverter::Convert(const wxString& input)
271 {
272 if (m_JustCopy) return input;
273
274 wxString s;
275 const wxChar *i;
276
277 wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
278
279 if (m_UnicodeInput)
280 for (i = input.c_str(); *i != 0; i++)
281 s << (wxChar)(m_Table[(wxUint16)*i]);
282 else
283 for (i = input.c_str(); *i != 0; i++)
284 s << (wxChar)(m_Table[(wxUint8)*i]);
285 return s;
286 }
287
288
289
290
291
292
293
294 // Following tables describe classes of encoding equivalence.
295 //
296
297 #define STOP wxFONTENCODING_SYSTEM
298
299 #define NUM_OF_PLATFORMS 4 /*must conform to enum wxPLATFORM_XXXX !!!*/
300 #define ENC_PER_PLATFORM 5
301 // max no. of encodings for one language used on one platform
302 // Anybody thinks 5 is not enough? ;-)
303
304 static wxFontEncoding
305 EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
306
307 // *** Please put more common encodings as first! ***
308
309 // Western European
310 {
311 /* unix */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
312 /* windows */ {wxFONTENCODING_CP1252, STOP},
313 /* os2 */ {STOP},
314 /* mac */ {STOP}
315 },
316
317 // Central European
318 {
319 /* unix */ {wxFONTENCODING_ISO8859_2, STOP},
320 /* windows */ {wxFONTENCODING_CP1250, STOP},
321 /* os2 */ {STOP},
322 /* mac */ {STOP}
323 },
324
325 // Baltic
326 {
327 /* unix */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP},
328 /* windows */ {wxFONTENCODING_CP1257, STOP},
329 /* os2 */ {STOP},
330 /* mac */ {STOP}
331 },
332
333 // Hebrew
334 {
335 /* unix */ {wxFONTENCODING_ISO8859_8, STOP},
336 /* windows */ {wxFONTENCODING_CP1255, STOP},
337 /* os2 */ {STOP},
338 /* mac */ {STOP}
339 },
340
341 // Greek
342 {
343 /* unix */ {wxFONTENCODING_ISO8859_7, STOP},
344 /* windows */ {wxFONTENCODING_CP1253, STOP},
345 /* os2 */ {STOP},
346 /* mac */ {STOP}
347 },
348
349 // Arabic
350 {
351 /* unix */ {wxFONTENCODING_ISO8859_6, STOP},
352 /* windows */ {wxFONTENCODING_CP1256, STOP},
353 /* os2 */ {STOP},
354 /* mac */ {STOP}
355 },
356
357 // Turkish
358 {
359 /* unix */ {wxFONTENCODING_ISO8859_9, STOP},
360 /* windows */ {wxFONTENCODING_CP1254, STOP},
361 /* os2 */ {STOP},
362 /* mac */ {STOP}
363 },
364
365 // Cyrillic
366 {
367 /* unix */ {wxFONTENCODING_KOI8, wxFONTENCODING_ISO8859_5, STOP},
368 /* windows */ {wxFONTENCODING_CP1251, STOP},
369 /* os2 */ {STOP},
370 /* mac */ {STOP}
371 },
372
373 {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
374 /* no, _not_ Arnold! */
375 };
376
377
378
379
380 wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
381 {
382 if (platform == wxPLATFORM_CURRENT)
383 {
384 #if defined(__WXMSW__)
385 platform = wxPLATFORM_WINDOWS;
386 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
387 platform = wxPLATFORM_UNIX;
388 #elif defined(__WXOS2__)
389 platform = wxPLATFORM_OS2;
390 #elif defined(__WXMAC__)
391 platform = wxPLATFORM_MAC;
392 #endif
393 }
394
395 int i, clas, e ;
396 wxFontEncoding *f;
397 wxFontEncodingArray arr;
398
399 clas = 0;
400 while (EquivalentEncodings[clas][0][0] != STOP)
401 {
402 for (i = 0; i < NUM_OF_PLATFORMS; i++)
403 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
404 if (EquivalentEncodings[clas][i][e] == enc)
405 {
406 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
407 if (*f == enc) arr.Add(enc);
408 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
409 if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
410 i = NUM_OF_PLATFORMS/*hack*/; break;
411 }
412 clas++;
413 }
414
415 return arr;
416 }
417
418
419
420 wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
421 {
422 int i, clas, e, j ;
423 wxFontEncoding *f;
424 wxFontEncodingArray arr;
425
426 arr = GetPlatformEquivalents(enc); // we want them to be first items in array
427
428 clas = 0;
429 while (EquivalentEncodings[clas][0][0] != STOP)
430 {
431 for (i = 0; i < NUM_OF_PLATFORMS; i++)
432 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
433 if (EquivalentEncodings[clas][i][e] == enc)
434 {
435 for (j = 0; j < NUM_OF_PLATFORMS; j++)
436 for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
437 if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
438 i = NUM_OF_PLATFORMS/*hack*/; break;
439 }
440 clas++;
441 }
442
443 return arr;
444 }