| 1 | ///////////////////////////////////////////////////////////////////////////// |
| 2 | // Name: encconv.cpp |
| 3 | // Purpose: wxEncodingConverter class for converting between different |
| 4 | // font encodings |
| 5 | // Author: Vaclav Slavik |
| 6 | // Copyright: (c) 1999 Vaclav Slavik |
| 7 | // Licence: wxWindows Licence |
| 8 | ///////////////////////////////////////////////////////////////////////////// |
| 9 | |
| 10 | #ifdef __GNUG__ |
| 11 | #pragma implementation "encconv.h" |
| 12 | #endif |
| 13 | |
| 14 | // For compilers that support precompilation, includes "wx.h". |
| 15 | #include "wx/wxprec.h" |
| 16 | |
| 17 | #ifdef __BORLANDC__ |
| 18 | #pragma hdrstop |
| 19 | #endif |
| 20 | |
| 21 | #if wxUSE_FONTMAP |
| 22 | |
| 23 | #include "wx/encconv.h" |
| 24 | |
| 25 | #include <stdlib.h> |
| 26 | |
| 27 | // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl: |
| 28 | #ifdef __BORLANDC__ |
| 29 | #include "../common/unictabl.inc" |
| 30 | #else |
| 31 | #include "unictabl.inc" |
| 32 | #endif |
| 33 | |
| 34 | #if wxUSE_WCHAR_T |
| 35 | typedef wchar_t tchar; |
| 36 | #else |
| 37 | typedef char tchar; |
| 38 | #endif |
| 39 | |
| 40 | static wxUint16* LINKAGEMODE GetEncTable(wxFontEncoding enc) |
| 41 | { |
| 42 | for (int i = 0; encodings_list[i].table != NULL; i++) |
| 43 | { |
| 44 | if (encodings_list[i].encoding == enc) |
| 45 | return encodings_list[i].table; |
| 46 | } |
| 47 | return NULL; |
| 48 | } |
| 49 | |
| 50 | typedef struct { |
| 51 | wxUint16 u; |
| 52 | wxUint8 c; |
| 53 | } CharsetItem; |
| 54 | |
| 55 | |
| 56 | |
| 57 | static int LINKAGEMODE CompareCharsetItems(const void *i1, const void *i2) |
| 58 | { |
| 59 | return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u ); |
| 60 | } |
| 61 | |
| 62 | |
| 63 | static CharsetItem* LINKAGEMODE BuildReverseTable(wxUint16 *tbl) |
| 64 | { |
| 65 | CharsetItem *rev = new CharsetItem[128]; |
| 66 | |
| 67 | for (int i = 0; i < 128; i++) |
| 68 | rev[i].c = 128 + i, rev[i].u = tbl[i]; |
| 69 | |
| 70 | qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems); |
| 71 | |
| 72 | return rev; |
| 73 | } |
| 74 | |
| 75 | |
| 76 | |
| 77 | wxEncodingConverter::wxEncodingConverter() |
| 78 | { |
| 79 | m_Table = NULL; |
| 80 | m_UnicodeInput = m_UnicodeOutput = FALSE; |
| 81 | m_JustCopy = FALSE; |
| 82 | } |
| 83 | |
| 84 | |
| 85 | |
| 86 | bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method) |
| 87 | { |
| 88 | unsigned i; |
| 89 | wxUint16 *in_tbl = NULL, *out_tbl = NULL; |
| 90 | |
| 91 | if (m_Table) {delete[] m_Table; m_Table = NULL;} |
| 92 | |
| 93 | #if !wxUSE_WCHAR_T |
| 94 | if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return FALSE; |
| 95 | #endif |
| 96 | |
| 97 | if (input_enc == output_enc) {m_JustCopy = TRUE; return TRUE;} |
| 98 | |
| 99 | m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE); |
| 100 | m_JustCopy = FALSE; |
| 101 | |
| 102 | if (input_enc == wxFONTENCODING_UNICODE) |
| 103 | { |
| 104 | if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE; |
| 105 | |
| 106 | m_Table = new tchar[65536]; |
| 107 | for (i = 0; i < 128; i++) m_Table[i] = (tchar)i; // 7bit ASCII |
| 108 | for (i = 128; i < 65536; i++) m_Table[i] = (tchar)'?'; |
| 109 | // FIXME - this should be character that means `unicode to charset' impossible, not '?' |
| 110 | |
| 111 | if (method == wxCONVERT_SUBSTITUTE) |
| 112 | { |
| 113 | for (i = 0; i < encoding_unicode_fallback_count; i++) |
| 114 | m_Table[encoding_unicode_fallback[i].c] = (tchar) encoding_unicode_fallback[i].s; |
| 115 | } |
| 116 | |
| 117 | for (i = 0; i < 128; i++) |
| 118 | m_Table[out_tbl[i]] = (tchar)(128 + i); |
| 119 | |
| 120 | m_UnicodeInput = TRUE; |
| 121 | return TRUE; |
| 122 | } |
| 123 | |
| 124 | else |
| 125 | { |
| 126 | if ((in_tbl = GetEncTable(input_enc)) == NULL) return FALSE; |
| 127 | if (output_enc != wxFONTENCODING_UNICODE) |
| 128 | if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE; |
| 129 | |
| 130 | m_UnicodeInput = FALSE; |
| 131 | |
| 132 | m_Table = new tchar[256]; |
| 133 | for (i = 0; i < 128; i++) m_Table[i] = (tchar)i; // 7bit ASCII |
| 134 | |
| 135 | if (output_enc == wxFONTENCODING_UNICODE) |
| 136 | { |
| 137 | for (i = 0; i < 128; i++) m_Table[128 + i] = (tchar)in_tbl[i]; |
| 138 | return TRUE; |
| 139 | } |
| 140 | else |
| 141 | { |
| 142 | CharsetItem *rev = BuildReverseTable(out_tbl); |
| 143 | CharsetItem *item; |
| 144 | CharsetItem key; |
| 145 | |
| 146 | for (i = 0; i < 128; i++) |
| 147 | { |
| 148 | key.u = in_tbl[i]; |
| 149 | item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems); |
| 150 | if (item == NULL && method == wxCONVERT_SUBSTITUTE) |
| 151 | item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback, |
| 152 | encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems); |
| 153 | if (item) |
| 154 | m_Table[128 + i] = (tchar)item -> c; |
| 155 | else |
| 156 | #if wxUSE_WCHAR_T |
| 157 | m_Table[128 + i] = (wchar_t)(128 + i); |
| 158 | #else |
| 159 | m_Table[128 + i] = (char)(128 + i); |
| 160 | #endif |
| 161 | } |
| 162 | |
| 163 | delete[] rev; |
| 164 | return TRUE; |
| 165 | } |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | |
| 170 | |
| 171 | void wxEncodingConverter::Convert(const char* input, char* output) |
| 172 | { |
| 173 | wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!")); |
| 174 | wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!")); |
| 175 | |
| 176 | const char *i; |
| 177 | char *o; |
| 178 | |
| 179 | if (m_JustCopy) |
| 180 | { |
| 181 | strcpy(output, input); |
| 182 | return; |
| 183 | } |
| 184 | |
| 185 | wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!")); |
| 186 | |
| 187 | for (i = input, o = output; *i != 0;) |
| 188 | *(o++) = (char)(m_Table[(wxUint8)*(i++)]); |
| 189 | *o = 0; |
| 190 | } |
| 191 | |
| 192 | |
| 193 | #if wxUSE_WCHAR_T |
| 194 | |
| 195 | void wxEncodingConverter::Convert(const char* input, wchar_t* output) |
| 196 | { |
| 197 | wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!")); |
| 198 | wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!")); |
| 199 | |
| 200 | const char *i; |
| 201 | wchar_t *o; |
| 202 | |
| 203 | if (m_JustCopy) |
| 204 | { |
| 205 | for (i = input, o = output; *i != 0;) |
| 206 | *(o++) = (wchar_t)(*(i++)); |
| 207 | *o = 0; |
| 208 | return; |
| 209 | } |
| 210 | |
| 211 | wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!")); |
| 212 | |
| 213 | for (i = input, o = output; *i != 0;) |
| 214 | *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]); |
| 215 | *o = 0; |
| 216 | } |
| 217 | |
| 218 | |
| 219 | |
| 220 | void wxEncodingConverter::Convert(const wchar_t* input, char* output) |
| 221 | { |
| 222 | wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!")); |
| 223 | wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!")); |
| 224 | |
| 225 | const wchar_t *i; |
| 226 | char *o; |
| 227 | |
| 228 | if (m_JustCopy) |
| 229 | { |
| 230 | for (i = input, o = output; *i != 0;) |
| 231 | *(o++) = (char)(*(i++)); |
| 232 | *o = 0; |
| 233 | return; |
| 234 | } |
| 235 | |
| 236 | wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!")); |
| 237 | |
| 238 | for (i = input, o = output; *i != 0;) |
| 239 | *(o++) = (char)(m_Table[(wxUint16)*(i++)]); |
| 240 | *o = 0; |
| 241 | } |
| 242 | |
| 243 | |
| 244 | |
| 245 | void wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output) |
| 246 | { |
| 247 | wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!")); |
| 248 | wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!")); |
| 249 | |
| 250 | const wchar_t *i; |
| 251 | wchar_t *o; |
| 252 | |
| 253 | if (m_JustCopy) |
| 254 | { |
| 255 | // wcscpy() is not guaranteed to exist |
| 256 | for (i = input, o = output; *i != 0;) |
| 257 | *(o++) = (*(i++)); |
| 258 | *o = 0; |
| 259 | return; |
| 260 | } |
| 261 | |
| 262 | wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!")); |
| 263 | |
| 264 | for (i = input, o = output; *i != 0;) |
| 265 | *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]); |
| 266 | *o = 0; |
| 267 | } |
| 268 | |
| 269 | #endif // wxUSE_WCHAR_T |
| 270 | |
| 271 | |
| 272 | wxString wxEncodingConverter::Convert(const wxString& input) |
| 273 | { |
| 274 | if (m_JustCopy) return input; |
| 275 | |
| 276 | wxString s; |
| 277 | const wxChar *i; |
| 278 | |
| 279 | wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!")); |
| 280 | |
| 281 | if (m_UnicodeInput) |
| 282 | for (i = input.c_str(); *i != 0; i++) |
| 283 | s << (wxChar)(m_Table[(wxUint16)*i]); |
| 284 | else |
| 285 | for (i = input.c_str(); *i != 0; i++) |
| 286 | s << (wxChar)(m_Table[(wxUint8)*i]); |
| 287 | return s; |
| 288 | } |
| 289 | |
| 290 | |
| 291 | |
| 292 | |
| 293 | |
| 294 | |
| 295 | |
| 296 | // Following tables describe classes of encoding equivalence. |
| 297 | // |
| 298 | |
| 299 | #define STOP wxFONTENCODING_SYSTEM |
| 300 | |
| 301 | #define NUM_OF_PLATFORMS 4 /*must conform to enum wxPLATFORM_XXXX !!!*/ |
| 302 | #define ENC_PER_PLATFORM 5 |
| 303 | // max no. of encodings for one language used on one platform |
| 304 | // Anybody thinks 5 is not enough? ;-) |
| 305 | |
| 306 | static wxFontEncoding |
| 307 | EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = { |
| 308 | |
| 309 | // *** Please put more common encodings as first! *** |
| 310 | |
| 311 | // Western European |
| 312 | { |
| 313 | /* unix */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP}, |
| 314 | /* windows */ {wxFONTENCODING_CP1252, STOP}, |
| 315 | /* os2 */ {STOP}, |
| 316 | /* mac */ {STOP} |
| 317 | }, |
| 318 | |
| 319 | // Central European |
| 320 | { |
| 321 | /* unix */ {wxFONTENCODING_ISO8859_2, STOP}, |
| 322 | /* windows */ {wxFONTENCODING_CP1250, STOP}, |
| 323 | /* os2 */ {STOP}, |
| 324 | /* mac */ {STOP} |
| 325 | }, |
| 326 | |
| 327 | // Baltic |
| 328 | { |
| 329 | /* unix */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP}, |
| 330 | /* windows */ {wxFONTENCODING_CP1257, STOP}, |
| 331 | /* os2 */ {STOP}, |
| 332 | /* mac */ {STOP} |
| 333 | }, |
| 334 | |
| 335 | // Hebrew |
| 336 | { |
| 337 | /* unix */ {wxFONTENCODING_ISO8859_8, STOP}, |
| 338 | /* windows */ {wxFONTENCODING_CP1255, STOP}, |
| 339 | /* os2 */ {STOP}, |
| 340 | /* mac */ {STOP} |
| 341 | }, |
| 342 | |
| 343 | // Greek |
| 344 | { |
| 345 | /* unix */ {wxFONTENCODING_ISO8859_7, STOP}, |
| 346 | /* windows */ {wxFONTENCODING_CP1253, STOP}, |
| 347 | /* os2 */ {STOP}, |
| 348 | /* mac */ {STOP} |
| 349 | }, |
| 350 | |
| 351 | // Arabic |
| 352 | { |
| 353 | /* unix */ {wxFONTENCODING_ISO8859_6, STOP}, |
| 354 | /* windows */ {wxFONTENCODING_CP1256, STOP}, |
| 355 | /* os2 */ {STOP}, |
| 356 | /* mac */ {STOP} |
| 357 | }, |
| 358 | |
| 359 | // Turkish |
| 360 | { |
| 361 | /* unix */ {wxFONTENCODING_ISO8859_9, STOP}, |
| 362 | /* windows */ {wxFONTENCODING_CP1254, STOP}, |
| 363 | /* os2 */ {STOP}, |
| 364 | /* mac */ {STOP} |
| 365 | }, |
| 366 | |
| 367 | // Cyrillic |
| 368 | { |
| 369 | /* unix */ {wxFONTENCODING_KOI8, wxFONTENCODING_ISO8859_5, STOP}, |
| 370 | /* windows */ {wxFONTENCODING_CP1251, STOP}, |
| 371 | /* os2 */ {STOP}, |
| 372 | /* mac */ {STOP} |
| 373 | }, |
| 374 | |
| 375 | {{STOP},{STOP},{STOP},{STOP}} /* Terminator */ |
| 376 | /* no, _not_ Arnold! */ |
| 377 | }; |
| 378 | |
| 379 | |
| 380 | |
| 381 | |
| 382 | wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform) |
| 383 | { |
| 384 | if (platform == wxPLATFORM_CURRENT) |
| 385 | { |
| 386 | #if defined(__WXMSW__) |
| 387 | platform = wxPLATFORM_WINDOWS; |
| 388 | #elif defined(__WXGTK__) || defined(__WXMOTIF__) |
| 389 | platform = wxPLATFORM_UNIX; |
| 390 | #elif defined(__WXOS2__) |
| 391 | platform = wxPLATFORM_OS2; |
| 392 | #elif defined(__WXMAC__) |
| 393 | platform = wxPLATFORM_MAC; |
| 394 | #endif |
| 395 | } |
| 396 | |
| 397 | int i, clas, e ; |
| 398 | wxFontEncoding *f; |
| 399 | wxFontEncodingArray arr; |
| 400 | |
| 401 | clas = 0; |
| 402 | while (EquivalentEncodings[clas][0][0] != STOP) |
| 403 | { |
| 404 | for (i = 0; i < NUM_OF_PLATFORMS; i++) |
| 405 | for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++) |
| 406 | if (EquivalentEncodings[clas][i][e] == enc) |
| 407 | { |
| 408 | for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++) |
| 409 | if (*f == enc) arr.Add(enc); |
| 410 | for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++) |
| 411 | if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f); |
| 412 | i = NUM_OF_PLATFORMS/*hack*/; break; |
| 413 | } |
| 414 | clas++; |
| 415 | } |
| 416 | |
| 417 | return arr; |
| 418 | } |
| 419 | |
| 420 | |
| 421 | |
| 422 | wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc) |
| 423 | { |
| 424 | int i, clas, e, j ; |
| 425 | wxFontEncoding *f; |
| 426 | wxFontEncodingArray arr; |
| 427 | |
| 428 | arr = GetPlatformEquivalents(enc); // we want them to be first items in array |
| 429 | |
| 430 | clas = 0; |
| 431 | while (EquivalentEncodings[clas][0][0] != STOP) |
| 432 | { |
| 433 | for (i = 0; i < NUM_OF_PLATFORMS; i++) |
| 434 | for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++) |
| 435 | if (EquivalentEncodings[clas][i][e] == enc) |
| 436 | { |
| 437 | for (j = 0; j < NUM_OF_PLATFORMS; j++) |
| 438 | for (f = EquivalentEncodings[clas][j]; *f != STOP; f++) |
| 439 | if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f); |
| 440 | i = NUM_OF_PLATFORMS/*hack*/; break; |
| 441 | } |
| 442 | clas++; |
| 443 | } |
| 444 | |
| 445 | return arr; |
| 446 | } |
| 447 | |
| 448 | #endif // wxUSE_FONTMAP |