| 1 | ///////////////////////////////////////////////////////////////////////////// |
| 2 | // Name: encconv.cpp |
| 3 | // Purpose: wxEncodingConverter class for converting between different |
| 4 | // font encodings |
| 5 | // Author: Vaclav Slavik |
| 6 | // Copyright: (c) 1999 Vaclav Slavik |
| 7 | // Licence: wxWindows licence |
| 8 | ///////////////////////////////////////////////////////////////////////////// |
| 9 | |
| 10 | // For compilers that support precompilation, includes "wx.h". |
| 11 | #include "wx/wxprec.h" |
| 12 | |
| 13 | #ifdef __BORLANDC__ |
| 14 | #pragma hdrstop |
| 15 | #endif |
| 16 | |
| 17 | #include "wx/encconv.h" |
| 18 | |
| 19 | #include <stdlib.h> |
| 20 | |
| 21 | // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl: |
| 22 | #if defined( __BORLANDC__ ) || defined(__DARWIN__) |
| 23 | #include "../common/unictabl.inc" |
| 24 | #else |
| 25 | #include "unictabl.inc" |
| 26 | #endif |
| 27 | |
| 28 | #ifdef __WXMAC__ |
| 29 | #include "wx/osx/core/cfstring.h" |
| 30 | #include <CoreFoundation/CFStringEncodingExt.h> |
| 31 | |
| 32 | wxUint16 gMacEncodings[wxFONTENCODING_MACMAX-wxFONTENCODING_MACMIN+1][128] ; |
| 33 | bool gMacEncodingsInited[wxFONTENCODING_MACMAX-wxFONTENCODING_MACMIN+1] ; |
| 34 | #endif |
| 35 | |
| 36 | #ifdef __WXWINCE__ |
| 37 | #include "wx/msw/wince/missing.h" // for bsearch() |
| 38 | #endif |
| 39 | |
| 40 | static const wxUint16* GetEncTable(wxFontEncoding enc) |
| 41 | { |
| 42 | #ifdef __WXMAC__ |
| 43 | if( enc >= wxFONTENCODING_MACMIN && enc <= wxFONTENCODING_MACMAX ) |
| 44 | { |
| 45 | int i = enc-wxFONTENCODING_MACMIN ; |
| 46 | if ( gMacEncodingsInited[i] == false ) |
| 47 | { |
| 48 | // create |
| 49 | CFStringEncoding cfencoding = wxMacGetSystemEncFromFontEnc( enc ) ; |
| 50 | if( !CFStringIsEncodingAvailable( cfencoding ) ) |
| 51 | return NULL; |
| 52 | |
| 53 | memset( gMacEncodings[i] , 0 , 128 * 2 ); |
| 54 | char s[2] = { 0 , 0 }; |
| 55 | CFRange firstchar = CFRangeMake( 0, 1 ); |
| 56 | for( unsigned char c = 255 ; c >= 128 ; --c ) |
| 57 | { |
| 58 | s[0] = c ; |
| 59 | wxCFStringRef cfref( CFStringCreateWithCStringNoCopy( NULL, s, cfencoding , kCFAllocatorNull ) ); |
| 60 | CFStringGetCharacters( cfref, firstchar, (UniChar*) &gMacEncodings[i][c-128] ); |
| 61 | } |
| 62 | gMacEncodingsInited[i]=true; |
| 63 | } |
| 64 | return gMacEncodings[i] ; |
| 65 | } |
| 66 | #endif |
| 67 | |
| 68 | for (int i = 0; encodings_list[i].table != NULL; i++) |
| 69 | { |
| 70 | if (encodings_list[i].encoding == enc) |
| 71 | return encodings_list[i].table; |
| 72 | } |
| 73 | return NULL; |
| 74 | } |
| 75 | |
| 76 | typedef struct { |
| 77 | wxUint16 u; |
| 78 | wxUint8 c; |
| 79 | } CharsetItem; |
| 80 | |
| 81 | extern "C" |
| 82 | { |
| 83 | static int wxCMPFUNC_CONV |
| 84 | CompareCharsetItems(const void *i1, const void *i2) |
| 85 | { |
| 86 | return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u ); |
| 87 | } |
| 88 | } |
| 89 | |
| 90 | static CharsetItem* BuildReverseTable(const wxUint16 *tbl) |
| 91 | { |
| 92 | CharsetItem *rev = new CharsetItem[128]; |
| 93 | |
| 94 | for (int i = 0; i < 128; i++) |
| 95 | rev[i].c = wxUint8(128 + i), rev[i].u = tbl[i]; |
| 96 | |
| 97 | qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems); |
| 98 | |
| 99 | return rev; |
| 100 | } |
| 101 | |
| 102 | |
| 103 | |
| 104 | wxEncodingConverter::wxEncodingConverter() |
| 105 | { |
| 106 | m_Table = NULL; |
| 107 | m_UnicodeInput = m_UnicodeOutput = false; |
| 108 | m_JustCopy = false; |
| 109 | } |
| 110 | |
| 111 | |
| 112 | |
| 113 | bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method) |
| 114 | { |
| 115 | unsigned i; |
| 116 | const wxUint16 *in_tbl; |
| 117 | const wxUint16 *out_tbl = NULL; |
| 118 | |
| 119 | wxDELETEA(m_Table); |
| 120 | |
| 121 | if (input_enc == output_enc) {m_JustCopy = true; return true;} |
| 122 | |
| 123 | m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE); |
| 124 | m_JustCopy = false; |
| 125 | |
| 126 | if (input_enc == wxFONTENCODING_UNICODE) |
| 127 | { |
| 128 | if ((out_tbl = GetEncTable(output_enc)) == NULL) return false; |
| 129 | |
| 130 | m_Table = new wchar_t[65536]; |
| 131 | for (i = 0; i < 128; i++) m_Table[i] = (wchar_t)i; // 7bit ASCII |
| 132 | for (i = 128; i < 65536; i++) m_Table[i] = (wchar_t)0; |
| 133 | |
| 134 | if (method == wxCONVERT_SUBSTITUTE) |
| 135 | { |
| 136 | for (i = 0; i < encoding_unicode_fallback_count; i++) |
| 137 | m_Table[encoding_unicode_fallback[i].c] = (wchar_t) encoding_unicode_fallback[i].s; |
| 138 | } |
| 139 | |
| 140 | for (i = 0; i < 128; i++) |
| 141 | m_Table[out_tbl[i]] = (wchar_t)(128 + i); |
| 142 | |
| 143 | m_UnicodeInput = true; |
| 144 | } |
| 145 | else // input !Unicode |
| 146 | { |
| 147 | if ((in_tbl = GetEncTable(input_enc)) == NULL) return false; |
| 148 | if (output_enc != wxFONTENCODING_UNICODE) |
| 149 | if ((out_tbl = GetEncTable(output_enc)) == NULL) return false; |
| 150 | |
| 151 | m_UnicodeInput = false; |
| 152 | |
| 153 | m_Table = new wchar_t[256]; |
| 154 | for (i = 0; i < 128; i++) m_Table[i] = (wchar_t)i; // 7bit ASCII |
| 155 | |
| 156 | if (output_enc == wxFONTENCODING_UNICODE) |
| 157 | { |
| 158 | for (i = 0; i < 128; i++) m_Table[128 + i] = (wchar_t)in_tbl[i]; |
| 159 | return true; |
| 160 | } |
| 161 | else // output !Unicode |
| 162 | { |
| 163 | CharsetItem *rev = BuildReverseTable(out_tbl); |
| 164 | CharsetItem *item; |
| 165 | CharsetItem key; |
| 166 | |
| 167 | for (i = 0; i < 128; i++) |
| 168 | { |
| 169 | key.u = in_tbl[i]; |
| 170 | item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems); |
| 171 | if (item == NULL && method == wxCONVERT_SUBSTITUTE) |
| 172 | item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback, |
| 173 | encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems); |
| 174 | if (item) |
| 175 | m_Table[128 + i] = (wchar_t)item -> c; |
| 176 | else |
| 177 | m_Table[128 + i] = (wchar_t)(128 + i); |
| 178 | } |
| 179 | |
| 180 | delete[] rev; |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | return true; |
| 185 | } |
| 186 | |
| 187 | |
| 188 | #define REPLACEMENT_CHAR (L'?') |
| 189 | |
| 190 | inline wchar_t GetTableValue(const wchar_t *table, wchar_t value, bool& repl) |
| 191 | { |
| 192 | wchar_t r = table[value]; |
| 193 | if (r == 0 && value != 0) |
| 194 | { |
| 195 | r = REPLACEMENT_CHAR; |
| 196 | repl = true; |
| 197 | } |
| 198 | return r; |
| 199 | } |
| 200 | |
| 201 | |
| 202 | bool wxEncodingConverter::Convert(const char* input, char* output) const |
| 203 | { |
| 204 | wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!")); |
| 205 | wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!")); |
| 206 | |
| 207 | const char *i; |
| 208 | char *o; |
| 209 | |
| 210 | if (m_JustCopy) |
| 211 | { |
| 212 | strcpy(output, input); |
| 213 | return true; |
| 214 | } |
| 215 | |
| 216 | wxCHECK_MSG(m_Table != NULL, false, |
| 217 | wxT("You must call wxEncodingConverter::Init() before actually converting!")); |
| 218 | |
| 219 | bool replaced = false; |
| 220 | |
| 221 | for (i = input, o = output; *i != 0;) |
| 222 | *(o++) = (char)(GetTableValue(m_Table, (wxUint8)*(i++), replaced)); |
| 223 | *o = 0; |
| 224 | |
| 225 | return !replaced; |
| 226 | } |
| 227 | |
| 228 | |
| 229 | bool wxEncodingConverter::Convert(const char* input, wchar_t* output) const |
| 230 | { |
| 231 | wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!")); |
| 232 | wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!")); |
| 233 | |
| 234 | const char *i; |
| 235 | wchar_t *o; |
| 236 | |
| 237 | if (m_JustCopy) |
| 238 | { |
| 239 | for (i = input, o = output; *i != 0;) |
| 240 | *(o++) = (wchar_t)(*(i++)); |
| 241 | *o = 0; |
| 242 | return true; |
| 243 | } |
| 244 | |
| 245 | wxCHECK_MSG(m_Table != NULL, false, |
| 246 | wxT("You must call wxEncodingConverter::Init() before actually converting!")); |
| 247 | |
| 248 | bool replaced = false; |
| 249 | |
| 250 | for (i = input, o = output; *i != 0;) |
| 251 | *(o++) = (wchar_t)(GetTableValue(m_Table, (wxUint8)*(i++), replaced)); |
| 252 | *o = 0; |
| 253 | |
| 254 | return !replaced; |
| 255 | } |
| 256 | |
| 257 | |
| 258 | |
| 259 | bool wxEncodingConverter::Convert(const wchar_t* input, char* output) const |
| 260 | { |
| 261 | wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!")); |
| 262 | wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!")); |
| 263 | |
| 264 | const wchar_t *i; |
| 265 | char *o; |
| 266 | |
| 267 | if (m_JustCopy) |
| 268 | { |
| 269 | for (i = input, o = output; *i != 0;) |
| 270 | *(o++) = (char)(*(i++)); |
| 271 | *o = 0; |
| 272 | return true; |
| 273 | } |
| 274 | |
| 275 | wxCHECK_MSG(m_Table != NULL, false, |
| 276 | wxT("You must call wxEncodingConverter::Init() before actually converting!")); |
| 277 | |
| 278 | bool replaced = false; |
| 279 | |
| 280 | for (i = input, o = output; *i != 0;) |
| 281 | *(o++) = (char)(GetTableValue(m_Table, (wxUint16)*(i++), replaced)); |
| 282 | *o = 0; |
| 283 | |
| 284 | return !replaced; |
| 285 | } |
| 286 | |
| 287 | |
| 288 | |
| 289 | bool wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output) const |
| 290 | { |
| 291 | wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!")); |
| 292 | wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!")); |
| 293 | |
| 294 | const wchar_t *i; |
| 295 | wchar_t *o; |
| 296 | |
| 297 | if (m_JustCopy) |
| 298 | { |
| 299 | // wcscpy() is not guaranteed to exist |
| 300 | for (i = input, o = output; *i != 0;) |
| 301 | *(o++) = (*(i++)); |
| 302 | *o = 0; |
| 303 | return true; |
| 304 | } |
| 305 | |
| 306 | wxCHECK_MSG(m_Table != NULL, false, |
| 307 | wxT("You must call wxEncodingConverter::Init() before actually converting!")); |
| 308 | |
| 309 | bool replaced = false; |
| 310 | |
| 311 | for (i = input, o = output; *i != 0;) |
| 312 | *(o++) = (wchar_t)(GetTableValue(m_Table, (wxUint8)*(i++), replaced)); |
| 313 | *o = 0; |
| 314 | |
| 315 | return !replaced; |
| 316 | } |
| 317 | |
| 318 | |
| 319 | wxString wxEncodingConverter::Convert(const wxString& input) const |
| 320 | { |
| 321 | if (m_JustCopy) return input; |
| 322 | |
| 323 | wxString s; |
| 324 | const wxChar *i; |
| 325 | |
| 326 | wxCHECK_MSG(m_Table != NULL, s, |
| 327 | wxT("You must call wxEncodingConverter::Init() before actually converting!")); |
| 328 | |
| 329 | if (m_UnicodeInput) |
| 330 | { |
| 331 | for (i = input.c_str(); *i != 0; i++) |
| 332 | s << (wxChar)(m_Table[(wxUint16)*i]); |
| 333 | } |
| 334 | else |
| 335 | { |
| 336 | for (i = input.c_str(); *i != 0; i++) |
| 337 | s << (wxChar)(m_Table[(wxUint8)*i]); |
| 338 | } |
| 339 | |
| 340 | return s; |
| 341 | } |
| 342 | |
| 343 | |
| 344 | |
| 345 | |
| 346 | |
| 347 | |
| 348 | |
| 349 | // Following tables describe classes of encoding equivalence. |
| 350 | // |
| 351 | |
| 352 | #define STOP wxFONTENCODING_SYSTEM |
| 353 | |
| 354 | #define NUM_OF_PLATFORMS 4 /*must conform to enum wxPLATFORM_XXXX !!!*/ |
| 355 | #define ENC_PER_PLATFORM 3 |
| 356 | // max no. of encodings for one language used on one platform. |
| 357 | // Using maximum of everything at the current moment to not make the |
| 358 | // library larger than necessary. Make larger only if necessary - MR |
| 359 | |
| 360 | static const wxFontEncoding |
| 361 | EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = { |
| 362 | |
| 363 | // *** Please put more common encodings as first! *** |
| 364 | |
| 365 | // Western European |
| 366 | { |
| 367 | /* unix */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP}, |
| 368 | /* windows */ {wxFONTENCODING_CP1252, STOP}, |
| 369 | /* os2 */ {STOP}, |
| 370 | /* mac */ {wxFONTENCODING_MACROMAN, STOP} |
| 371 | }, |
| 372 | |
| 373 | // Central European |
| 374 | { |
| 375 | /* unix */ {wxFONTENCODING_ISO8859_2, STOP}, |
| 376 | /* windows */ {wxFONTENCODING_CP1250, STOP}, |
| 377 | /* os2 */ {STOP}, |
| 378 | /* mac */ {wxFONTENCODING_MACCENTRALEUR, STOP} |
| 379 | }, |
| 380 | |
| 381 | // Baltic |
| 382 | { |
| 383 | /* unix */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP}, |
| 384 | /* windows */ {wxFONTENCODING_CP1257, STOP}, |
| 385 | /* os2 */ {STOP}, |
| 386 | /* mac */ {STOP} |
| 387 | }, |
| 388 | |
| 389 | // Hebrew |
| 390 | { |
| 391 | /* unix */ {wxFONTENCODING_ISO8859_8, STOP}, |
| 392 | /* windows */ {wxFONTENCODING_CP1255, STOP}, |
| 393 | /* os2 */ {STOP}, |
| 394 | /* mac */ {wxFONTENCODING_MACHEBREW, STOP} |
| 395 | }, |
| 396 | |
| 397 | // Greek |
| 398 | { |
| 399 | /* unix */ {wxFONTENCODING_ISO8859_7, STOP}, |
| 400 | /* windows */ {wxFONTENCODING_CP1253, STOP}, |
| 401 | /* os2 */ {STOP}, |
| 402 | /* mac */ {wxFONTENCODING_MACGREEK, STOP} |
| 403 | }, |
| 404 | |
| 405 | // Arabic |
| 406 | { |
| 407 | /* unix */ {wxFONTENCODING_ISO8859_6, STOP}, |
| 408 | /* windows */ {wxFONTENCODING_CP1256, STOP}, |
| 409 | /* os2 */ {STOP}, |
| 410 | /* mac */ {wxFONTENCODING_MACARABIC, STOP} |
| 411 | }, |
| 412 | |
| 413 | // Turkish |
| 414 | { |
| 415 | /* unix */ {wxFONTENCODING_ISO8859_9, STOP}, |
| 416 | /* windows */ {wxFONTENCODING_CP1254, STOP}, |
| 417 | /* os2 */ {STOP}, |
| 418 | /* mac */ {wxFONTENCODING_MACTURKISH, STOP} |
| 419 | }, |
| 420 | |
| 421 | // Cyrillic |
| 422 | { |
| 423 | /* unix */ {wxFONTENCODING_KOI8, wxFONTENCODING_KOI8_U, wxFONTENCODING_ISO8859_5, STOP}, |
| 424 | /* windows */ {wxFONTENCODING_CP1251, STOP}, |
| 425 | /* os2 */ {STOP}, |
| 426 | /* mac */ {wxFONTENCODING_MACCYRILLIC, STOP} |
| 427 | }, |
| 428 | |
| 429 | {{STOP},{STOP},{STOP},{STOP}} /* Terminator */ |
| 430 | /* no, _not_ Arnold! */ |
| 431 | }; |
| 432 | |
| 433 | |
| 434 | static bool FindEncoding(const wxFontEncodingArray& arr, wxFontEncoding f) |
| 435 | { |
| 436 | for (wxFontEncodingArray::const_iterator it = arr.begin(), en = arr.end(); |
| 437 | it != en; ++it) |
| 438 | if (*it == f) |
| 439 | return true; |
| 440 | return false; |
| 441 | } |
| 442 | |
| 443 | wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform) |
| 444 | { |
| 445 | if (platform == wxPLATFORM_CURRENT) |
| 446 | { |
| 447 | #if defined(__WXMSW__) |
| 448 | platform = wxPLATFORM_WINDOWS; |
| 449 | #elif defined(__WXGTK__) || defined(__WXMOTIF__) |
| 450 | platform = wxPLATFORM_UNIX; |
| 451 | #elif defined(__WXPM__) |
| 452 | platform = wxPLATFORM_OS2; |
| 453 | #elif defined(__WXMAC__) |
| 454 | platform = wxPLATFORM_MAC; |
| 455 | #endif |
| 456 | } |
| 457 | |
| 458 | int i, clas, e ; |
| 459 | const wxFontEncoding *f; |
| 460 | wxFontEncodingArray arr; |
| 461 | |
| 462 | clas = 0; |
| 463 | while (EquivalentEncodings[clas][0][0] != STOP) |
| 464 | { |
| 465 | for (i = 0; i < NUM_OF_PLATFORMS; i++) |
| 466 | for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++) |
| 467 | if (EquivalentEncodings[clas][i][e] == enc) |
| 468 | { |
| 469 | for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++) |
| 470 | if (*f == enc) arr.push_back(enc); |
| 471 | for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++) |
| 472 | if (!FindEncoding(arr, *f)) arr.push_back(*f); |
| 473 | i = NUM_OF_PLATFORMS/*hack*/; break; |
| 474 | } |
| 475 | clas++; |
| 476 | } |
| 477 | |
| 478 | return arr; |
| 479 | } |
| 480 | |
| 481 | |
| 482 | |
| 483 | wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc) |
| 484 | { |
| 485 | int i, clas, e, j ; |
| 486 | const wxFontEncoding *f; |
| 487 | wxFontEncodingArray arr; |
| 488 | |
| 489 | arr = GetPlatformEquivalents(enc); // we want them to be first items in array |
| 490 | |
| 491 | clas = 0; |
| 492 | while (EquivalentEncodings[clas][0][0] != STOP) |
| 493 | { |
| 494 | for (i = 0; i < NUM_OF_PLATFORMS; i++) |
| 495 | for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++) |
| 496 | if (EquivalentEncodings[clas][i][e] == enc) |
| 497 | { |
| 498 | for (j = 0; j < NUM_OF_PLATFORMS; j++) |
| 499 | for (f = EquivalentEncodings[clas][j]; *f != STOP; f++) |
| 500 | if (!FindEncoding(arr, *f)) arr.push_back(*f); |
| 501 | i = NUM_OF_PLATFORMS/*hack*/; break; |
| 502 | } |
| 503 | clas++; |
| 504 | } |
| 505 | |
| 506 | return arr; |
| 507 | } |
| 508 | |