1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: wxEncodingConverter class for converting between different
5 // Author: Vaclav Slavik
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows Licence
8 /////////////////////////////////////////////////////////////////////////////
11 #pragma implementation "encconv.h"
14 // For compilers that support precompilation, includes "wx.h".
15 #include "wx/wxprec.h"
21 #include "wx/encconv.h"
25 #include "unictabl.inc"
26 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl
29 static wxUint16
*GetEncTable(wxFontEncoding enc
)
31 for (int i
= 0; encodings_list
[i
].table
!= NULL
; i
++)
33 if (encodings_list
[i
].encoding
== enc
)
34 return encodings_list
[i
].table
;
46 static int CompareCharsetItems(const void *i1
, const void *i2
)
48 return ( ((CharsetItem
*)i1
) -> u
- ((CharsetItem
*)i2
) -> u
);
52 static CharsetItem
* BuildReverseTable(wxUint16
*tbl
)
54 CharsetItem
*rev
= new CharsetItem
[128];
56 for (int i
= 0; i
< 128; i
++)
57 rev
[i
].c
= 128 + i
, rev
[i
].u
= tbl
[i
];
59 qsort(rev
, 128, sizeof(CharsetItem
), CompareCharsetItems
);
66 wxEncodingConverter::wxEncodingConverter()
69 m_UnicodeInput
= m_UnicodeOutput
= FALSE
;
75 bool wxEncodingConverter::Init(wxFontEncoding input_enc
, wxFontEncoding output_enc
, int method
)
78 wxUint16
*in_tbl
= NULL
, *out_tbl
= NULL
;
80 if (m_Table
) {delete[] m_Table
; m_Table
= NULL
;}
83 if (input_enc
== wxFONTENCODING_UNICODE
|| output_enc
== wxFONTENCODING_UNICODE
) return FALSE
;
86 if (input_enc
== output_enc
) {m_JustCopy
= TRUE
; return TRUE
;}
88 m_UnicodeOutput
= (output_enc
== wxFONTENCODING_UNICODE
);
91 if (input_enc
== wxFONTENCODING_UNICODE
)
93 if ((out_tbl
= GetEncTable(output_enc
)) == NULL
) return FALSE
;
95 m_Table
= new wxChar
[65536];
96 for (i
= 0; i
< 128; i
++) m_Table
[i
] = (wxChar
)i
; // 7bit ASCII
97 for (i
= 128; i
< 65536; i
++) m_Table
[i
] = (wxChar
)'?';
98 // FIXME - this should be character that means `unicode to charset' impossible, not '?'
100 if (method
== wxCONVERT_SUBSTITUTE
)
102 for (i
= 0; i
< encoding_unicode_fallback_count
; i
++)
103 m_Table
[encoding_unicode_fallback
[i
].c
] = (wxChar
) encoding_unicode_fallback
[i
].s
;
106 for (i
= 0; i
< 128; i
++)
107 m_Table
[out_tbl
[i
]] = (wxChar
)(128 + i
);
109 m_UnicodeInput
= TRUE
;
115 if ((in_tbl
= GetEncTable(input_enc
)) == NULL
) return FALSE
;
116 if (output_enc
!= wxFONTENCODING_UNICODE
)
117 if ((out_tbl
= GetEncTable(output_enc
)) == NULL
) return FALSE
;
119 m_UnicodeInput
= FALSE
;
121 m_Table
= new wxChar
[256];
122 for (i
= 0; i
< 128; i
++) m_Table
[i
] = (wxChar
)i
; // 7bit ASCII
124 if (output_enc
== wxFONTENCODING_UNICODE
)
126 for (i
= 0; i
< 128; i
++) m_Table
[128 + i
] = (wxChar
)in_tbl
[i
]; // wxChar is 2byte now
131 CharsetItem
*rev
= BuildReverseTable(out_tbl
);
132 CharsetItem
*item
, key
;
134 for (i
= 0; i
< 128; i
++)
137 item
= (CharsetItem
*) bsearch(&key
, rev
, 128, sizeof(CharsetItem
), CompareCharsetItems
);
138 if (item
== NULL
&& method
== wxCONVERT_SUBSTITUTE
)
139 item
= (CharsetItem
*) bsearch(&key
, encoding_unicode_fallback
,
140 encoding_unicode_fallback_count
, sizeof(CharsetItem
), CompareCharsetItems
);
142 m_Table
[128 + i
] = (wxChar
)item
-> c
;
144 m_Table
[128 + i
] = 128 + i
; // don't know => don't touch
155 void wxEncodingConverter::Convert(const wxChar
* input
, wxChar
* output
)
159 wxStrcpy(output
, input
);
163 wxASSERT_MSG(m_Table
!= NULL
, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
169 for (i
= input
, o
= output
; *i
!= 0; i
++, o
++)
170 *o
= (wxChar
)(m_Table
[(wxUint16
)*i
]);
172 for (i
= input
, o
= output
; *i
!= 0; i
++, o
++)
173 *o
= (wxChar
)(m_Table
[(wxUint8
)*i
]);
178 #if wxUSE_UNICODE // otherwise wxChar === char
180 void wxEncodingConverter::Convert(const char* input
, wxChar
* output
)
182 wxASSERT_MSG(!m_UnicodeInput
, wxT("You cannot convert from unicode if input is const char*!"));
189 for (i
= input
, o
= output
; *i
!= 0;)
190 *(o
++) = (wxChar
)(*(i
++));
195 wxASSERT_MSG(m_Table
!= NULL
, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
197 for (i
= input
, o
= output
; *i
!= 0;)
198 *(o
++) = (wxChar
)(m_Table
[(wxUint8
)*(i
++)]);
204 void wxEncodingConverter::Convert(const wxChar
* input
, char* output
)
206 wxASSERT_MSG(!m_UnicodeOutput
, wxT("You cannot convert to unicode if output is const char*!"));
213 for (i
= input
, o
= output
; *i
!= 0;)
214 *(o
++) = (char)(*(i
++));
219 wxASSERT_MSG(m_Table
!= NULL
, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
222 for (i
= input
, o
= output
; *i
!= 0; i
++, o
++)
223 *o
= (char)(m_Table
[(wxUint16
)*i
]);
225 for (i
= input
, o
= output
; *i
!= 0; i
++, o
++)
226 *o
= (char)(m_Table
[(wxUint8
)*i
]);
232 void wxEncodingConverter::Convert(const char* input
, char* output
)
234 wxASSERT_MSG(!m_UnicodeOutput
, wxT("You cannot convert to unicode if output is const char*!"));
235 wxASSERT_MSG(!m_UnicodeInput
, wxT("You cannot convert from unicode if input is const char*!"));
242 strcpy(output
, input
);
246 wxASSERT_MSG(m_Table
!= NULL
, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
248 for (i
= input
, o
= output
; *i
!= 0;)
249 *(o
++) = (char)(m_Table
[(wxUint8
)*(i
++)]);
253 #endif // wxUSE_UNICODE
256 wxString
wxEncodingConverter::Convert(const wxString
& input
)
258 if (m_JustCopy
) return input
;
263 wxASSERT_MSG(m_Table
!= NULL
, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
266 for (i
= input
.c_str(); *i
!= 0; i
++)
267 s
<< (wxChar
)(m_Table
[(wxUint16
)*i
]);
269 for (i
= input
.c_str(); *i
!= 0; i
++)
270 s
<< (wxChar
)(m_Table
[(wxUint8
)*i
]);
280 // Following tables describe classes of encoding equivalence.
283 #define STOP wxFONTENCODING_SYSTEM
285 #define NUM_OF_PLATFORMS 4 /*must conform to enum wxPLATFORM_XXXX !!!*/
286 #define ENC_PER_PLATFORM 3
287 // max no. of encodings for one language used on one platform
288 // Anybody thinks 3 is not enough? ;-)
290 static wxFontEncoding
291 EquivalentEncodings
[][NUM_OF_PLATFORMS
][ENC_PER_PLATFORM
+1] = {
293 // *** Please put more common encodings as first! ***
297 /* unix */ {wxFONTENCODING_ISO8859_1
, wxFONTENCODING_ISO8859_15
, STOP
},
298 /* windows */ {wxFONTENCODING_CP1252
, STOP
},
305 /* unix */ {wxFONTENCODING_ISO8859_2
, STOP
},
306 /* windows */ {wxFONTENCODING_CP1250
, STOP
},
313 /* unix */ {wxFONTENCODING_ISO8859_13
, STOP
},
314 /* windows */ {wxFONTENCODING_CP1257
, STOP
},
321 /* unix */ {wxFONTENCODING_ISO8859_8
, STOP
},
322 /* windows */ {wxFONTENCODING_CP1255
, STOP
},
329 /* unix */ {wxFONTENCODING_ISO8859_7
, STOP
},
330 /* windows */ {wxFONTENCODING_CP1253
, STOP
},
337 /* unix */ {wxFONTENCODING_ISO8859_6
, STOP
},
338 /* windows */ {wxFONTENCODING_CP1256
, STOP
},
345 /* unix */ {wxFONTENCODING_ISO8859_9
, STOP
},
346 /* windows */ {wxFONTENCODING_CP1254
, STOP
},
353 /* unix */ {wxFONTENCODING_ISO8859_5
, STOP
},
354 /* windows */ {wxFONTENCODING_CP1251
, STOP
},
359 {{STOP
},{STOP
},{STOP
},{STOP
}} /* Terminator */
360 /* no, _not_ Arnold! */
366 wxFontEncodingArray
wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc
, int platform
)
368 if (platform
== wxPLATFORM_CURRENT
)
370 #if defined(__WXMSW__)
371 platform
= wxPLATFORM_WINDOWS
;
372 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
373 platform
= wxPLATFORM_UNIX
;
374 #elif defined(__WXOS2__)
375 platform
= wxPLATFORM_OS2
;
376 #elif defined(__WXMAC__)
377 platform
= wxPLATFORM_MAC
;
383 wxFontEncodingArray arr
;
386 while (EquivalentEncodings
[clas
][0][0] != STOP
)
388 for (i
= 0; i
< NUM_OF_PLATFORMS
; i
++)
389 for (e
= 0; EquivalentEncodings
[clas
][i
][e
] != STOP
; e
++)
390 if (EquivalentEncodings
[clas
][i
][e
] == enc
)
392 for (f
= EquivalentEncodings
[clas
][platform
]; *f
!= STOP
; f
++)
393 if (*f
== enc
) arr
.Add(enc
);
394 for (f
= EquivalentEncodings
[clas
][platform
]; *f
!= STOP
; f
++)
395 if (arr
.Index(*f
) == wxNOT_FOUND
) arr
.Add(*f
);
396 i
= NUM_OF_PLATFORMS
/*hack*/; break;
406 wxFontEncodingArray
wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc
)
410 wxFontEncodingArray arr
;
412 arr
= GetPlatformEquivalents(enc
); // we want them to be first items in array
415 while (EquivalentEncodings
[clas
][0][0] != STOP
)
417 for (i
= 0; i
< NUM_OF_PLATFORMS
; i
++)
418 for (e
= 0; EquivalentEncodings
[clas
][i
][e
] != STOP
; e
++)
419 if (EquivalentEncodings
[clas
][i
][e
] == enc
)
421 for (j
= 0; j
< NUM_OF_PLATFORMS
; j
++)
422 for (f
= EquivalentEncodings
[clas
][j
]; *f
!= STOP
; f
++)
423 if (arr
.Index(*f
) == wxNOT_FOUND
) arr
.Add(*f
);
424 i
= NUM_OF_PLATFORMS
/*hack*/; break;