]> git.saurik.com Git - wxWidgets.git/blob - src/common/encconv.cpp
added wxEncodingConverter
[wxWidgets.git] / src / common / encconv.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: encconv.cpp
3 // Purpose: wxEncodingConverter class for converting between different
4 // font encodings
5 // Author: Vaclav Slavik
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows Licence
8 /////////////////////////////////////////////////////////////////////////////
9
10 #ifdef __GNUG__
11 #pragma implementation "encconv.h"
12 #endif
13
14 // For compilers that support precompilation, includes "wx.h".
15 #include "wx/wxprec.h"
16
17 #ifdef __BORLANDC__
18 #pragma hdrstop
19 #endif
20
21 #include "wx/encconv.h"
22
23 #include <stdlib.h>
24
25 #include "unictabl.inc"
26 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl
27
28
29 static wxUint16 *GetEncTable(wxFontEncoding enc)
30 {
31 for (int i = 0; encodings_list[i].table != NULL; i++)
32 {
33 if (encodings_list[i].encoding == enc)
34 return encodings_list[i].table;
35 }
36 return NULL;
37 }
38
39 typedef struct {
40 wxUint16 u;
41 wxUint8 c;
42 } CharsetItem;
43
44
45
46 static int CompareCharsetItems(const void *i1, const void *i2)
47 {
48 return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
49 }
50
51
52 static CharsetItem* BuildReverseTable(wxUint16 *tbl)
53 {
54 CharsetItem *rev = new CharsetItem[128];
55
56 for (int i = 0; i < 128; i++)
57 rev[i].c = 128 + i, rev[i].u = tbl[i];
58
59 qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
60
61 return rev;
62 }
63
64
65
66 wxEncodingConverter::wxEncodingConverter()
67 {
68 m_Table = NULL;
69 m_UnicodeInput = FALSE;
70 m_JustCopy = FALSE;
71 }
72
73
74
75 bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
76 {
77 unsigned i;
78 wxUint16 *in_tbl = NULL, *out_tbl = NULL;
79
80 if (m_Table) {delete[] m_Table; m_Table = NULL;}
81
82 #if !wxUSE_UNICODE
83 if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return FALSE;
84 #endif
85
86 if (input_enc == output_enc) {m_JustCopy = TRUE; return TRUE;}
87
88 m_JustCopy = FALSE;
89
90 if (input_enc == wxFONTENCODING_UNICODE)
91 {
92 if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
93
94 m_Table = new wxChar[65536];
95 for (i = 0; i < 128; i++) m_Table[i] = (wxChar)i; // 7bit ASCII
96 for (i = 128; i < 65536; i++) m_Table[i] = (wxChar)'?';
97 // FIXME - this should be character that means `unicode to charset' impossible, not '?'
98
99 if (method == wxCONVERT_SUBSTITUTE)
100 {
101 for (i = 0; i < encoding_unicode_fallback_count; i++)
102 m_Table[encoding_unicode_fallback[i].c] = (wxChar) encoding_unicode_fallback[i].s;
103 }
104
105 for (i = 0; i < 128; i++)
106 m_Table[out_tbl[i]] = (wxChar)(128 + i);
107
108 m_UnicodeInput = TRUE;
109 return TRUE;
110 }
111
112 else
113 {
114 if ((in_tbl = GetEncTable(input_enc)) == NULL) return FALSE;
115 if (output_enc != wxFONTENCODING_UNICODE)
116 if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
117
118 m_UnicodeInput = FALSE;
119
120 m_Table = new wxChar[256];
121 for (i = 0; i < 128; i++) m_Table[i] = (wxChar)i; // 7bit ASCII
122
123 if (output_enc == wxFONTENCODING_UNICODE)
124 {
125 for (i = 0; i < 128; i++) m_Table[128 + i] = (wxChar)in_tbl[i]; // wxChar is 2byte now
126 return TRUE;
127 }
128 else
129 {
130 CharsetItem *rev = BuildReverseTable(out_tbl);
131 CharsetItem *item, key;
132
133 for (i = 0; i < 128; i++)
134 {
135 key.u = in_tbl[i];
136 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
137 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
138 item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
139 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
140 if (item)
141 m_Table[128 + i] = (wxChar)item -> c;
142 else
143 m_Table[128 + i] = 128 + i; // don't know => don't touch
144 }
145
146 delete[] rev;
147 return TRUE;
148 }
149 }
150 }
151
152
153 void wxEncodingConverter::Convert(const wxChar* input, wxChar* output)
154 {
155 if (m_JustCopy)
156 {
157 wxStrcpy(output, input);
158 return;
159 }
160
161 wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
162
163 const wxChar *i;
164 wxChar *o;
165
166 if (m_UnicodeInput)
167 for (i = input, o = output; *i != 0; i++, o++)
168 *o = (wxChar)(m_Table[(wxUint16)*i]);
169 else
170 for (i = input, o = output; *i != 0; i++, o++)
171 *o = (wxChar)(m_Table[(wxUint8)*i]);
172 *o = 0;
173 }
174
175
176
177 wxString wxEncodingConverter::Convert(const wxString& input)
178 {
179 if (m_JustCopy) return input;
180
181 wxString s;
182 const wxChar *i;
183
184 wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
185
186 if (m_UnicodeInput)
187 for (i = input.c_str(); *i != 0; i++)
188 s << (wxChar)(m_Table[(wxUint16)*i]);
189 else
190 for (i = input.c_str(); *i != 0; i++)
191 s << (wxChar)(m_Table[(wxUint8)*i]);
192 return s;
193 }
194
195
196
197
198 // Following tables describe classes of encoding equivalence.
199 //
200
201 #define STOP wxFONTENCODING_SYSTEM
202
203 #define NUM_OF_PLATFORMS 4 /*must conform to enum wxPLATFORM_XXXX !!!*/
204 #define ENC_PER_PLATFORM 3
205 // max no. of encodings for one language used on one platform
206 // Anybody thinks 3 is not enough? ;-)
207
208 static wxFontEncoding
209 EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
210
211 // West European (Latin1)
212 {
213 /* unix */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
214 /* windows */ {wxFONTENCODING_CP1252, STOP},
215 /* os2 */ {STOP},
216 /* mac */ {STOP}
217 },
218
219 // Central European (Latin2)
220 {
221 /* unix */ {wxFONTENCODING_ISO8859_2, STOP},
222 /* windows */ {wxFONTENCODING_CP1250, STOP},
223 /* os2 */ {STOP},
224 /* mac */ {STOP}
225 },
226
227 {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
228 /* no, _not_ Arnold! */
229 };
230
231
232
233
234 wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
235 {
236 if (platform == wxPLATFORM_CURRENT)
237 {
238 #if defined(__WXMSW__)
239 platform = wxPLATFORM_WINDOWS;
240 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
241 platform = wxPLATFORM_UNIX;
242 #elif defined(__WXOS2__)
243 platform = wxPLATFORM_OS2;
244 #elif defined(__WXMAC__)
245 platform = wxPLATFORM_MAC;
246 #endif
247 }
248
249 int i, clas, e ;
250 wxFontEncoding *f;
251 wxFontEncodingArray arr;
252
253 clas = 0;
254 while (EquivalentEncodings[clas][0][0] != STOP)
255 {
256 for (i = 0; i < NUM_OF_PLATFORMS; i++)
257 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
258 if (EquivalentEncodings[clas][i][e] == enc)
259 {
260 for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
261 if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
262 i = NUM_OF_PLATFORMS/*hack*/; break;
263 }
264 clas++;
265 }
266
267 return arr;
268 }
269
270
271
272 wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
273 {
274 int i, clas, e, j ;
275 wxFontEncoding *f;
276 wxFontEncodingArray arr;
277
278 clas = 0;
279 while (EquivalentEncodings[clas][0][0] != STOP)
280 {
281 for (i = 0; i < NUM_OF_PLATFORMS; i++)
282 for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
283 if (EquivalentEncodings[clas][i][e] == enc)
284 {
285 for (j = 0; j < NUM_OF_PLATFORMS; j++)
286 for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
287 if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
288 i = NUM_OF_PLATFORMS/*hack*/; break;
289 }
290 clas++;
291 }
292
293 return arr;
294 }