]> git.saurik.com Git - wxWidgets.git/blame - src/common/encconv.cpp
ParseDate() understands tomorrow and yesterday as well as today
[wxWidgets.git] / src / common / encconv.cpp
CommitLineData
c958260b
VS
1/////////////////////////////////////////////////////////////////////////////
2// Name: encconv.cpp
3// Purpose: wxEncodingConverter class for converting between different
4// font encodings
5// Author: Vaclav Slavik
6// Copyright: (c) 1999 Vaclav Slavik
7// Licence: wxWindows Licence
8/////////////////////////////////////////////////////////////////////////////
9
10#ifdef __GNUG__
11#pragma implementation "encconv.h"
12#endif
13
14// For compilers that support precompilation, includes "wx.h".
15#include "wx/wxprec.h"
16
17#ifdef __BORLANDC__
18 #pragma hdrstop
19#endif
20
21#include "wx/encconv.h"
22
23#include <stdlib.h>
24
94fc5183
VS
25// conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
26#ifdef __BORLANDC__
27#include "../common/unictabl.inc"
28#else
c958260b 29#include "unictabl.inc"
94fc5183 30#endif
c958260b
VS
31
32
eda22ec3 33static wxUint16* LINKAGEMODE GetEncTable(wxFontEncoding enc)
c958260b
VS
34{
35 for (int i = 0; encodings_list[i].table != NULL; i++)
36 {
37 if (encodings_list[i].encoding == enc)
38 return encodings_list[i].table;
39 }
40 return NULL;
41}
42
43typedef struct {
44 wxUint16 u;
45 wxUint8 c;
46} CharsetItem;
47
48
49
eda22ec3 50static int LINKAGEMODE CompareCharsetItems(const void *i1, const void *i2)
c958260b
VS
51{
52 return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
53}
54
55
eda22ec3 56static CharsetItem* LINKAGEMODE BuildReverseTable(wxUint16 *tbl)
c958260b
VS
57{
58 CharsetItem *rev = new CharsetItem[128];
59
60 for (int i = 0; i < 128; i++)
61 rev[i].c = 128 + i, rev[i].u = tbl[i];
62
63 qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
64
65 return rev;
66}
67
68
69
70wxEncodingConverter::wxEncodingConverter()
71{
72 m_Table = NULL;
47e55c2f 73 m_UnicodeInput = m_UnicodeOutput = FALSE;
c958260b
VS
74 m_JustCopy = FALSE;
75}
76
77
78
79bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
80{
81 unsigned i;
82 wxUint16 *in_tbl = NULL, *out_tbl = NULL;
83
84 if (m_Table) {delete[] m_Table; m_Table = NULL;}
85
86#if !wxUSE_UNICODE
87 if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return FALSE;
88#endif
89
90 if (input_enc == output_enc) {m_JustCopy = TRUE; return TRUE;}
91
47e55c2f 92 m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
c958260b
VS
93 m_JustCopy = FALSE;
94
95 if (input_enc == wxFONTENCODING_UNICODE)
96 {
97 if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
98
99 m_Table = new wxChar[65536];
100 for (i = 0; i < 128; i++) m_Table[i] = (wxChar)i; // 7bit ASCII
101 for (i = 128; i < 65536; i++) m_Table[i] = (wxChar)'?';
102 // FIXME - this should be character that means `unicode to charset' impossible, not '?'
103
104 if (method == wxCONVERT_SUBSTITUTE)
105 {
106 for (i = 0; i < encoding_unicode_fallback_count; i++)
107 m_Table[encoding_unicode_fallback[i].c] = (wxChar) encoding_unicode_fallback[i].s;
108 }
109
110 for (i = 0; i < 128; i++)
111 m_Table[out_tbl[i]] = (wxChar)(128 + i);
112
113 m_UnicodeInput = TRUE;
114 return TRUE;
115 }
116
117 else
118 {
119 if ((in_tbl = GetEncTable(input_enc)) == NULL) return FALSE;
120 if (output_enc != wxFONTENCODING_UNICODE)
121 if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
122
123 m_UnicodeInput = FALSE;
124
125 m_Table = new wxChar[256];
126 for (i = 0; i < 128; i++) m_Table[i] = (wxChar)i; // 7bit ASCII
127
128 if (output_enc == wxFONTENCODING_UNICODE)
129 {
130 for (i = 0; i < 128; i++) m_Table[128 + i] = (wxChar)in_tbl[i]; // wxChar is 2byte now
131 return TRUE;
132 }
133 else
134 {
135 CharsetItem *rev = BuildReverseTable(out_tbl);
136 CharsetItem *item, key;
137
138 for (i = 0; i < 128; i++)
139 {
140 key.u = in_tbl[i];
141 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
142 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
143 item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
144 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
145 if (item)
146 m_Table[128 + i] = (wxChar)item -> c;
147 else
148 m_Table[128 + i] = 128 + i; // don't know => don't touch
149 }
150
151 delete[] rev;
152 return TRUE;
153 }
154 }
155}
156
157
47e55c2f 158
c958260b
VS
159void wxEncodingConverter::Convert(const wxChar* input, wxChar* output)
160{
161 if (m_JustCopy)
162 {
163 wxStrcpy(output, input);
164 return;
165 }
166
167 wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
168
169 const wxChar *i;
170 wxChar *o;
171
172 if (m_UnicodeInput)
173 for (i = input, o = output; *i != 0; i++, o++)
174 *o = (wxChar)(m_Table[(wxUint16)*i]);
175 else
176 for (i = input, o = output; *i != 0; i++, o++)
177 *o = (wxChar)(m_Table[(wxUint8)*i]);
178 *o = 0;
179}
180
181
47e55c2f
VS
182#if wxUSE_UNICODE // otherwise wxChar === char
183
184void wxEncodingConverter::Convert(const char* input, wxChar* output)
185{
186 wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
187
188 const char *i;
189 wxChar *o;
190
191 if (m_JustCopy)
192 {
193 for (i = input, o = output; *i != 0;)
194 *(o++) = (wxChar)(*(i++));
195 *o = 0;
196 return;
197 }
198
199 wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
200
201 for (i = input, o = output; *i != 0;)
202 *(o++) = (wxChar)(m_Table[(wxUint8)*(i++)]);
203 *o = 0;
204}
205
206
207
208void wxEncodingConverter::Convert(const wxChar* input, char* output)
209{
210 wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
211
212 const wxChar *i;
213 char *o;
214
215 if (m_JustCopy)
216 {
217 for (i = input, o = output; *i != 0;)
218 *(o++) = (char)(*(i++));
219 *o = 0;
220 return;
221 }
222
223 wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
224
225 if (m_UnicodeInput)
226 for (i = input, o = output; *i != 0; i++, o++)
227 *o = (char)(m_Table[(wxUint16)*i]);
228 else
229 for (i = input, o = output; *i != 0; i++, o++)
230 *o = (char)(m_Table[(wxUint8)*i]);
231 *o = 0;
232}
233
234
235
236void wxEncodingConverter::Convert(const char* input, char* output)
237{
238 wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
239 wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
240
241 const char *i;
242 char *o;
243
244 if (m_JustCopy)
245 {
246 strcpy(output, input);
247 return;
248 }
249
250 wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
251
252 for (i = input, o = output; *i != 0;)
253 *(o++) = (char)(m_Table[(wxUint8)*(i++)]);
254 *o = 0;
255}
256
257#endif // wxUSE_UNICODE
258
c958260b
VS
259
260wxString wxEncodingConverter::Convert(const wxString& input)
261{
262 if (m_JustCopy) return input;
263
264 wxString s;
265 const wxChar *i;
266
267 wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
268
269 if (m_UnicodeInput)
270 for (i = input.c_str(); *i != 0; i++)
271 s << (wxChar)(m_Table[(wxUint16)*i]);
272 else
273 for (i = input.c_str(); *i != 0; i++)
274 s << (wxChar)(m_Table[(wxUint8)*i]);
275 return s;
276}
277
278
279
280
47e55c2f
VS
281
282
283
c958260b
VS
284// Following tables describe classes of encoding equivalence.
285//
286
287#define STOP wxFONTENCODING_SYSTEM
288
289#define NUM_OF_PLATFORMS 4 /*must conform to enum wxPLATFORM_XXXX !!!*/
290#define ENC_PER_PLATFORM 3
291 // max no. of encodings for one language used on one platform
292 // Anybody thinks 3 is not enough? ;-)
293
294static wxFontEncoding
295 EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
296
47e55c2f
VS
297 // *** Please put more common encodings as first! ***
298
299 // West European
c958260b
VS
300 {
301 /* unix */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
302 /* windows */ {wxFONTENCODING_CP1252, STOP},
303 /* os2 */ {STOP},
304 /* mac */ {STOP}
305 },
306
47e55c2f 307 // Central European
c958260b
VS
308 {
309 /* unix */ {wxFONTENCODING_ISO8859_2, STOP},
310 /* windows */ {wxFONTENCODING_CP1250, STOP},
311 /* os2 */ {STOP},
312 /* mac */ {STOP}
313 },
47e55c2f
VS
314
315 // Baltic
316 {
317 /* unix */ {wxFONTENCODING_ISO8859_13, STOP},
318 /* windows */ {wxFONTENCODING_CP1257, STOP},
319 /* os2 */ {STOP},
320 /* mac */ {STOP}
321 },
322
323 // Hebrew
324 {
325 /* unix */ {wxFONTENCODING_ISO8859_8, STOP},
326 /* windows */ {wxFONTENCODING_CP1255, STOP},
327 /* os2 */ {STOP},
328 /* mac */ {STOP}
329 },
330
331 // Greek
332 {
333 /* unix */ {wxFONTENCODING_ISO8859_7, STOP},
334 /* windows */ {wxFONTENCODING_CP1253, STOP},
335 /* os2 */ {STOP},
336 /* mac */ {STOP}
337 },
338
339 // Arabic
340 {
341 /* unix */ {wxFONTENCODING_ISO8859_6, STOP},
342 /* windows */ {wxFONTENCODING_CP1256, STOP},
343 /* os2 */ {STOP},
344 /* mac */ {STOP}
345 },
346
347 // Turkish
348 {
349 /* unix */ {wxFONTENCODING_ISO8859_9, STOP},
350 /* windows */ {wxFONTENCODING_CP1254, STOP},
351 /* os2 */ {STOP},
352 /* mac */ {STOP}
353 },
354
355 // Cyrillic
356 {
357 /* unix */ {wxFONTENCODING_ISO8859_5, STOP},
358 /* windows */ {wxFONTENCODING_CP1251, STOP},
359 /* os2 */ {STOP},
360 /* mac */ {STOP}
361 },
c958260b
VS
362
363 {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
364 /* no, _not_ Arnold! */
365};
366
367
368
369
370wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
371{
372 if (platform == wxPLATFORM_CURRENT)
373 {
374#if defined(__WXMSW__)
375 platform = wxPLATFORM_WINDOWS;
376#elif defined(__WXGTK__) || defined(__WXMOTIF__)
377 platform = wxPLATFORM_UNIX;
378#elif defined(__WXOS2__)
379 platform = wxPLATFORM_OS2;
380#elif defined(__WXMAC__)
381 platform = wxPLATFORM_MAC;
382#endif
383 }
384
385 int i, clas, e ;
386 wxFontEncoding *f;
387 wxFontEncodingArray arr;
388
389 clas = 0;
390 while (EquivalentEncodings[clas][0][0] != STOP)
391 {
392 for (i = 0; i < NUM_OF_PLATFORMS; i++)
393 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
394 if (EquivalentEncodings[clas][i][e] == enc)
395 {
47e55c2f
VS
396 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
397 if (*f == enc) arr.Add(enc);
398 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
c958260b
VS
399 if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
400 i = NUM_OF_PLATFORMS/*hack*/; break;
401 }
402 clas++;
403 }
404
405 return arr;
406}
407
408
409
410wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
411{
412 int i, clas, e, j ;
413 wxFontEncoding *f;
414 wxFontEncodingArray arr;
47e55c2f
VS
415
416 arr = GetPlatformEquivalents(enc); // we want them to be first items in array
c958260b
VS
417
418 clas = 0;
419 while (EquivalentEncodings[clas][0][0] != STOP)
420 {
421 for (i = 0; i < NUM_OF_PLATFORMS; i++)
422 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
423 if (EquivalentEncodings[clas][i][e] == enc)
424 {
425 for (j = 0; j < NUM_OF_PLATFORMS; j++)
426 for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
427 if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
428 i = NUM_OF_PLATFORMS/*hack*/; break;
429 }
430 clas++;
431 }
432
433 return arr;
434}