]> git.saurik.com Git - wxWidgets.git/blob - src/common/encconv.cpp
fixed Cyrillic encodings
[wxWidgets.git] / src / common / encconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: encconv.cpp
3 // Purpose: wxEncodingConverter class for converting between different
4 // font encodings
5 // Author: Vaclav Slavik
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows Licence
8 /////////////////////////////////////////////////////////////////////////////
9
10 #ifdef __GNUG__
11 #pragma implementation "encconv.h"
12 #endif
13
14 // For compilers that support precompilation, includes "wx.h".
15 #include "wx/wxprec.h"
16
17 #ifdef __BORLANDC__
18 #pragma hdrstop
19 #endif
20
21 #include "wx/encconv.h"
22
23 #include <stdlib.h>
24
25 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
26 #ifdef __BORLANDC__
27 #include "../common/unictabl.inc"
28 #else
29 #include "unictabl.inc"
30 #endif
31
32 #if wxUSE_WCHAR_T
33 typedef wchar_t tchar;
34 #else
35 typedef char tchar;
36 #endif
37
38 static wxUint16* LINKAGEMODE GetEncTable(wxFontEncoding enc)
39 {
40 for (int i = 0; encodings_list[i].table != NULL; i++)
41 {
42 if (encodings_list[i].encoding == enc)
43 return encodings_list[i].table;
44 }
45 return NULL;
46 }
47
48 typedef struct {
49 wxUint16 u;
50 wxUint8 c;
51 } CharsetItem;
52
53
54
55 static int LINKAGEMODE CompareCharsetItems(const void *i1, const void *i2)
56 {
57 return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
58 }
59
60
61 static CharsetItem* LINKAGEMODE BuildReverseTable(wxUint16 *tbl)
62 {
63 CharsetItem *rev = new CharsetItem[128];
64
65 for (int i = 0; i < 128; i++)
66 rev[i].c = 128 + i, rev[i].u = tbl[i];
67
68 qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
69
70 return rev;
71 }
72
73
74
75 wxEncodingConverter::wxEncodingConverter()
76 {
77 m_Table = NULL;
78 m_UnicodeInput = m_UnicodeOutput = FALSE;
79 m_JustCopy = FALSE;
80 }
81
82
83
84 bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
85 {
86 unsigned i;
87 wxUint16 *in_tbl = NULL, *out_tbl = NULL;
88
89 if (m_Table) {delete[] m_Table; m_Table = NULL;}
90
91 #if !wxUSE_WCHAR_T
92 if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return FALSE;
93 #endif
94
95 if (input_enc == output_enc) {m_JustCopy = TRUE; return TRUE;}
96
97 m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
98 m_JustCopy = FALSE;
99
100 if (input_enc == wxFONTENCODING_UNICODE)
101 {
102 if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
103
104 m_Table = new tchar[65536];
105 for (i = 0; i < 128; i++) m_Table[i] = (tchar)i; // 7bit ASCII
106 for (i = 128; i < 65536; i++) m_Table[i] = (tchar)'?';
107 // FIXME - this should be character that means `unicode to charset' impossible, not '?'
108
109 if (method == wxCONVERT_SUBSTITUTE)
110 {
111 for (i = 0; i < encoding_unicode_fallback_count; i++)
112 m_Table[encoding_unicode_fallback[i].c] = (tchar) encoding_unicode_fallback[i].s;
113 }
114
115 for (i = 0; i < 128; i++)
116 m_Table[out_tbl[i]] = (tchar)(128 + i);
117
118 m_UnicodeInput = TRUE;
119 return TRUE;
120 }
121
122 else
123 {
124 if ((in_tbl = GetEncTable(input_enc)) == NULL) return FALSE;
125 if (output_enc != wxFONTENCODING_UNICODE)
126 if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
127
128 m_UnicodeInput = FALSE;
129
130 m_Table = new tchar[256];
131 for (i = 0; i < 128; i++) m_Table[i] = (tchar)i; // 7bit ASCII
132
133 if (output_enc == wxFONTENCODING_UNICODE)
134 {
135 for (i = 0; i < 128; i++) m_Table[128 + i] = (tchar)in_tbl[i];
136 return TRUE;
137 }
138 else
139 {
140 CharsetItem *rev = BuildReverseTable(out_tbl);
141 CharsetItem *item, key;
142
143 for (i = 0; i < 128; i++)
144 {
145 key.u = in_tbl[i];
146 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
147 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
148 item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
149 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
150 if (item)
151 m_Table[128 + i] = (tchar)item -> c;
152 else
153 m_Table[128 + i] = 128 + i; // don't know => don't touch
154 }
155
156 delete[] rev;
157 return TRUE;
158 }
159 }
160 }
161
162
163
164 void wxEncodingConverter::Convert(const char* input, char* output)
165 {
166 wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
167 wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
168
169 const char *i;
170 char *o;
171
172 if (m_JustCopy)
173 {
174 strcpy(output, input);
175 return;
176 }
177
178 wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
179
180 for (i = input, o = output; *i != 0;)
181 *(o++) = (char)(m_Table[(wxUint8)*(i++)]);
182 *o = 0;
183 }
184
185
186 #if wxUSE_WCHAR_T
187
188 void wxEncodingConverter::Convert(const char* input, wchar_t* output)
189 {
190 wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
191 wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
192
193 const char *i;
194 wchar_t *o;
195
196 if (m_JustCopy)
197 {
198 for (i = input, o = output; *i != 0;)
199 *(o++) = (wchar_t)(*(i++));
200 *o = 0;
201 return;
202 }
203
204 wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
205
206 for (i = input, o = output; *i != 0;)
207 *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
208 *o = 0;
209 }
210
211
212
213 void wxEncodingConverter::Convert(const wchar_t* input, char* output)
214 {
215 wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
216 wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
217
218 const wchar_t *i;
219 char *o;
220
221 if (m_JustCopy)
222 {
223 for (i = input, o = output; *i != 0;)
224 *(o++) = (char)(*(i++));
225 *o = 0;
226 return;
227 }
228
229 wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
230
231 for (i = input, o = output; *i != 0;)
232 *(o++) = (char)(m_Table[(wxUint16)*(i++)]);
233 *o = 0;
234 }
235
236
237
238 void wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output)
239 {
240 wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
241 wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
242
243 const wchar_t *i;
244 wchar_t *o;
245
246 if (m_JustCopy)
247 {
248 // wcscpy() is not guaranteed to exist
249 for (i = input, o = output; *i != 0;)
250 *(o++) = (*(i++));
251 *o = 0;
252 return;
253 }
254
255 wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
256
257 for (i = input, o = output; *i != 0;)
258 *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
259 *o = 0;
260 }
261
262 #endif // wxUSE_WCHAR_T
263
264
265 wxString wxEncodingConverter::Convert(const wxString& input)
266 {
267 if (m_JustCopy) return input;
268
269 wxString s;
270 const wxChar *i;
271
272 wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
273
274 if (m_UnicodeInput)
275 for (i = input.c_str(); *i != 0; i++)
276 s << (wxChar)(m_Table[(wxUint16)*i]);
277 else
278 for (i = input.c_str(); *i != 0; i++)
279 s << (wxChar)(m_Table[(wxUint8)*i]);
280 return s;
281 }
282
283
284
285
286
287
288
289 // Following tables describe classes of encoding equivalence.
290 //
291
292 #define STOP wxFONTENCODING_SYSTEM
293
294 #define NUM_OF_PLATFORMS 4 /*must conform to enum wxPLATFORM_XXXX !!!*/
295 #define ENC_PER_PLATFORM 5
296 // max no. of encodings for one language used on one platform
297 // Anybody thinks 5 is not enough? ;-)
298
299 static wxFontEncoding
300 EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
301
302 // *** Please put more common encodings as first! ***
303
304 // West European
305 {
306 /* unix */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
307 /* windows */ {wxFONTENCODING_CP1252, STOP},
308 /* os2 */ {STOP},
309 /* mac */ {STOP}
310 },
311
312 // Central European
313 {
314 /* unix */ {wxFONTENCODING_ISO8859_2, STOP},
315 /* windows */ {wxFONTENCODING_CP1250, STOP},
316 /* os2 */ {STOP},
317 /* mac */ {STOP}
318 },
319
320 // Baltic
321 {
322 /* unix */ {wxFONTENCODING_ISO8859_13, STOP},
323 /* windows */ {wxFONTENCODING_CP1257, STOP},
324 /* os2 */ {STOP},
325 /* mac */ {STOP}
326 },
327
328 // Hebrew
329 {
330 /* unix */ {wxFONTENCODING_ISO8859_8, STOP},
331 /* windows */ {wxFONTENCODING_CP1255, STOP},
332 /* os2 */ {STOP},
333 /* mac */ {STOP}
334 },
335
336 // Greek
337 {
338 /* unix */ {wxFONTENCODING_ISO8859_7, STOP},
339 /* windows */ {wxFONTENCODING_CP1253, STOP},
340 /* os2 */ {STOP},
341 /* mac */ {STOP}
342 },
343
344 // Arabic
345 {
346 /* unix */ {wxFONTENCODING_ISO8859_6, STOP},
347 /* windows */ {wxFONTENCODING_CP1256, STOP},
348 /* os2 */ {STOP},
349 /* mac */ {STOP}
350 },
351
352 // Turkish
353 {
354 /* unix */ {wxFONTENCODING_ISO8859_9, STOP},
355 /* windows */ {wxFONTENCODING_CP1254, STOP},
356 /* os2 */ {STOP},
357 /* mac */ {STOP}
358 },
359
360 // Cyrillic
361 {
362 /* unix */ {wxFONTENCODING_KOI8, wxFONTENCODING_ISO8859_5, STOP},
363 /* windows */ {wxFONTENCODING_CP1251, STOP},
364 /* os2 */ {STOP},
365 /* mac */ {STOP}
366 },
367
368 {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
369 /* no, _not_ Arnold! */
370 };
371
372
373
374
375 wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
376 {
377 if (platform == wxPLATFORM_CURRENT)
378 {
379 #if defined(__WXMSW__)
380 platform = wxPLATFORM_WINDOWS;
381 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
382 platform = wxPLATFORM_UNIX;
383 #elif defined(__WXOS2__)
384 platform = wxPLATFORM_OS2;
385 #elif defined(__WXMAC__)
386 platform = wxPLATFORM_MAC;
387 #endif
388 }
389
390 int i, clas, e ;
391 wxFontEncoding *f;
392 wxFontEncodingArray arr;
393
394 clas = 0;
395 while (EquivalentEncodings[clas][0][0] != STOP)
396 {
397 for (i = 0; i < NUM_OF_PLATFORMS; i++)
398 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
399 if (EquivalentEncodings[clas][i][e] == enc)
400 {
401 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
402 if (*f == enc) arr.Add(enc);
403 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
404 if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
405 i = NUM_OF_PLATFORMS/*hack*/; break;
406 }
407 clas++;
408 }
409
410 return arr;
411 }
412
413
414
415 wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
416 {
417 int i, clas, e, j ;
418 wxFontEncoding *f;
419 wxFontEncodingArray arr;
420
421 arr = GetPlatformEquivalents(enc); // we want them to be first items in array
422
423 clas = 0;
424 while (EquivalentEncodings[clas][0][0] != STOP)
425 {
426 for (i = 0; i < NUM_OF_PLATFORMS; i++)
427 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
428 if (EquivalentEncodings[clas][i][e] == enc)
429 {
430 for (j = 0; j < NUM_OF_PLATFORMS; j++)
431 for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
432 if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
433 i = NUM_OF_PLATFORMS/*hack*/; break;
434 }
435 clas++;
436 }
437
438 return arr;
439 }