src/common/encconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        encconv.cpp
   3 // Purpose:     wxEncodingConverter class for converting between different
   4 //              font encodings
   5 // Author:      Vaclav Slavik
   6 // Copyright:   (c) 1999 Vaclav Slavik
   7 // Licence:     wxWindows licence
   8 /////////////////////////////////////////////////////////////////////////////
   9
  10 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  11 #pragma implementation "encconv.h"
  12 #endif
  13
  14 // For compilers that support precompilation, includes "wx.h".
  15 #include "wx/wxprec.h"
  16
  17 #ifdef __BORLANDC__
  18   #pragma hdrstop
  19 #endif
  20
  21 #if wxUSE_FONTMAP
  22
  23 #include "wx/encconv.h"
  24
  25 #include <stdlib.h>
  26
  27 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
  28 #if defined( __BORLANDC__ ) || defined(__DARWIN__)
  29     #include "../common/unictabl.inc"
  30 #else
  31     #include "unictabl.inc"
  32 #endif
  33
  34 #if wxUSE_WCHAR_T
  35     typedef wchar_t tchar;
  36 #else
  37     typedef char tchar;
  38 #endif
  39
  40 #ifdef __WXMAC__
  41     #include <ATSUnicode.h>
  42     #include <TextCommon.h>
  43     #include <TextEncodingConverter.h>
  44
  45     #include "wx/fontutil.h"
  46     #include "wx/mac/private.h"  // includes mac headers
  47
  48     wxUint16 gMacEncodings[wxFONTENCODING_MACMAX-wxFONTENCODING_MACMIN+1][128] ;
  49     bool gMacEncodingsInited[wxFONTENCODING_MACMAX-wxFONTENCODING_MACMIN+1] ;
  50 #endif
  51
  52 #ifdef __WXWINCE__
  53     #include "wx/msw/wince/missing.h"       // for bsearch()
  54 #endif
  55
  56 static wxUint16* GetEncTable(wxFontEncoding enc)
  57 {
  58 #ifdef __WXMAC__
  59     if( enc >= wxFONTENCODING_MACMIN && enc <= wxFONTENCODING_MACMAX )
  60     {
  61         int i = enc-wxFONTENCODING_MACMIN ;
  62         if ( gMacEncodingsInited[i] == false )
  63         {
  64             TECObjectRef converter ;
  65             TextEncodingBase code = wxMacGetSystemEncFromFontEnc( enc ) ;
  66             TextEncodingBase unicode = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
  67             OSStatus status = TECCreateConverter(&converter,code,unicode);
  68             char s[2] ;
  69             s[1] = 0 ;
  70             ByteCount byteInLen, byteOutLen ;
  71             for( unsigned char c = 255 ; c >= 128 ; --c )
  72             {
  73                 s[0] = c ;
  74                 status = TECConvertText(converter, (ConstTextPtr) &s , 1, &byteInLen,
  75                 (TextPtr) &gMacEncodings[i][c-128] , 2, &byteOutLen);
  76             }
  77             status = TECDisposeConverter(converter);
  78             gMacEncodingsInited[i]=true;
  79         }
  80         return gMacEncodings[i] ;
  81     }
  82 #endif
  83
  84     for (int i = 0; encodings_list[i].table != NULL; i++)
  85     {
  86         if (encodings_list[i].encoding == enc)
  87             return encodings_list[i].table;
  88     }
  89     return NULL;
  90 }
  91
  92 typedef struct {
  93     wxUint16 u;
  94     wxUint8  c;
  95 } CharsetItem;
  96
  97 extern "C" int wxCMPFUNC_CONV
  98 CompareCharsetItems(const void *i1, const void *i2)
  99 {
 100     return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
 101 }
 102
 103
 104 static CharsetItem* BuildReverseTable(wxUint16 *tbl)
 105 {
 106     CharsetItem *rev = new CharsetItem[128];
 107
 108     for (int i = 0; i < 128; i++)
 109         rev[i].c = wxUint8(128 + i), rev[i].u = tbl[i];
 110
 111     qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
 112
 113     return rev;
 114 }
 115
 116
 117
 118 wxEncodingConverter::wxEncodingConverter()
 119 {
 120     m_Table = NULL;
 121     m_UnicodeInput = m_UnicodeOutput = false;
 122     m_JustCopy = false;
 123 }
 124
 125
 126
 127 bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
 128 {
 129     unsigned i;
 130     wxUint16 *in_tbl, *out_tbl = NULL;
 131
 132     if (m_Table) {delete[] m_Table; m_Table = NULL;}
 133
 134 #if !wxUSE_WCHAR_T
 135     if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return false;
 136 #endif
 137
 138     if (input_enc == output_enc) {m_JustCopy = true; return true;}
 139
 140     m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
 141     m_JustCopy = false;
 142
 143     if (input_enc == wxFONTENCODING_UNICODE)
 144     {
 145         if ((out_tbl = GetEncTable(output_enc)) == NULL) return false;
 146
 147         m_Table = new tchar[65536];
 148         for (i = 0; i < 128; i++)  m_Table[i] = (tchar)i; // 7bit ASCII
 149         for (i = 128; i < 65536; i++)  m_Table[i] = (tchar)0;
 150
 151         if (method == wxCONVERT_SUBSTITUTE)
 152         {
 153             for (i = 0; i < encoding_unicode_fallback_count; i++)
 154                 m_Table[encoding_unicode_fallback[i].c] = (tchar) encoding_unicode_fallback[i].s;
 155         }
 156
 157         for (i = 0; i < 128; i++)
 158             m_Table[out_tbl[i]] = (tchar)(128 + i);
 159
 160         m_UnicodeInput = true;
 161     }
 162     else // input !Unicode
 163     {
 164         if ((in_tbl = GetEncTable(input_enc)) == NULL) return false;
 165         if (output_enc != wxFONTENCODING_UNICODE)
 166             if ((out_tbl = GetEncTable(output_enc)) == NULL) return false;
 167
 168         m_UnicodeInput = false;
 169
 170         m_Table = new tchar[256];
 171         for (i = 0; i < 128; i++)  m_Table[i] = (tchar)i; // 7bit ASCII
 172
 173         if (output_enc == wxFONTENCODING_UNICODE)
 174         {
 175             for (i = 0; i < 128; i++)  m_Table[128 + i] = (tchar)in_tbl[i];
 176             return true;
 177         }
 178         else // output !Unicode
 179         {
 180             CharsetItem *rev = BuildReverseTable(out_tbl);
 181             CharsetItem *item;
 182             CharsetItem key;
 183
 184             for (i = 0; i < 128; i++)
 185             {
 186                 key.u = in_tbl[i];
 187                 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
 188                 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
 189                     item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
 190                                 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
 191                 if (item)
 192                     m_Table[128 + i] = (tchar)item -> c;
 193                 else
 194 #if wxUSE_WCHAR_T
 195                     m_Table[128 + i] = (wchar_t)(128 + i);
 196 #else
 197                     m_Table[128 + i] = (char)(128 + i);
 198 #endif
 199             }
 200
 201             delete[] rev;
 202         }
 203     }
 204
 205     return true;
 206 }
 207
 208
 209 #define REPLACEMENT_CHAR  ((tchar)'?')
 210
 211 inline tchar GetTableValue(const tchar *table, tchar value, bool& repl)
 212 {
 213     tchar r = table[value];
 214     if (r == 0 && value != 0)
 215     {
 216         r = REPLACEMENT_CHAR;
 217         repl = true;
 218     }
 219     return r;
 220 }
 221
 222
 223 bool wxEncodingConverter::Convert(const char* input, char* output) const
 224 {
 225     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 226     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 227
 228     const char *i;
 229     char *o;
 230
 231     if (m_JustCopy)
 232     {
 233         strcpy(output, input);
 234         return true;
 235     }
 236
 237     wxCHECK_MSG(m_Table != NULL, false,
 238                 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 239
 240     bool replaced = false;
 241
 242     for (i = input, o = output; *i != 0;)
 243         *(o++) = (char)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
 244     *o = 0;
 245
 246     return !replaced;
 247 }
 248
 249
 250 #if wxUSE_WCHAR_T
 251
 252 bool wxEncodingConverter::Convert(const char* input, wchar_t* output) const
 253 {
 254     wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
 255     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 256
 257     const char *i;
 258     wchar_t *o;
 259
 260     if (m_JustCopy)
 261     {
 262         for (i = input, o = output; *i != 0;)
 263             *(o++) = (wchar_t)(*(i++));
 264         *o = 0;
 265         return true;
 266     }
 267
 268     wxCHECK_MSG(m_Table != NULL, false,
 269                 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 270
 271     bool replaced = false;
 272
 273     for (i = input, o = output; *i != 0;)
 274         *(o++) = (wchar_t)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
 275     *o = 0;
 276
 277     return !replaced;
 278 }
 279
 280
 281
 282 bool wxEncodingConverter::Convert(const wchar_t* input, char* output) const
 283 {
 284     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 285     wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
 286
 287     const wchar_t *i;
 288     char *o;
 289
 290     if (m_JustCopy)
 291     {
 292         for (i = input, o = output; *i != 0;)
 293             *(o++) = (char)(*(i++));
 294         *o = 0;
 295         return true;
 296     }
 297
 298     wxCHECK_MSG(m_Table != NULL, false,
 299                 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 300
 301     bool replaced = false;
 302
 303     for (i = input, o = output; *i != 0;)
 304         *(o++) = (char)(GetTableValue(m_Table, (wxUint16)*(i++), replaced));
 305     *o = 0;
 306
 307     return !replaced;
 308 }
 309
 310
 311
 312 bool wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output) const
 313 {
 314     wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
 315     wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
 316
 317     const wchar_t *i;
 318     wchar_t *o;
 319
 320     if (m_JustCopy)
 321     {
 322         // wcscpy() is not guaranteed to exist
 323         for (i = input, o = output; *i != 0;)
 324             *(o++) = (*(i++));
 325         *o = 0;
 326         return true;
 327     }
 328
 329     wxCHECK_MSG(m_Table != NULL, false,
 330                 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 331
 332     bool replaced = false;
 333
 334     for (i = input, o = output; *i != 0;)
 335         *(o++) = (wchar_t)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
 336     *o = 0;
 337
 338     return !replaced;
 339 }
 340
 341 #endif // wxUSE_WCHAR_T
 342
 343
 344 wxString wxEncodingConverter::Convert(const wxString& input) const
 345 {
 346     if (m_JustCopy) return input;
 347
 348     wxString s;
 349     const wxChar *i;
 350
 351     wxCHECK_MSG(m_Table != NULL, s,
 352                 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 353
 354     if (m_UnicodeInput)
 355     {
 356         for (i = input.c_str(); *i != 0; i++)
 357             s << (wxChar)(m_Table[(wxUint16)*i]);
 358     }
 359     else
 360     {
 361         for (i = input.c_str(); *i != 0; i++)
 362             s << (wxChar)(m_Table[(wxUint8)*i]);
 363     }
 364
 365     return s;
 366 }
 367
 368
 369
 370
 371
 372
 373
 374 // Following tables describe classes of encoding equivalence.
 375 //
 376
 377 #define STOP wxFONTENCODING_SYSTEM
 378
 379 #define NUM_OF_PLATFORMS  4 /*must conform to enum wxPLATFORM_XXXX !!!*/
 380 #define ENC_PER_PLATFORM  5
 381            // max no. of encodings for one language used on one platform
 382            // Anybody thinks 5 is not enough? ;-)
 383
 384 static wxFontEncoding
 385     EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
 386
 387     // *** Please put more common encodings as first! ***
 388
 389     // Western European
 390     {
 391         /* unix    */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
 392         /* windows */ {wxFONTENCODING_CP1252, STOP},
 393         /* os2     */ {STOP},
 394         /* mac     */ {wxFONTENCODING_MACROMAN, STOP}
 395     },
 396
 397     // Central European
 398     {
 399         /* unix    */ {wxFONTENCODING_ISO8859_2, STOP},
 400         /* windows */ {wxFONTENCODING_CP1250, STOP},
 401         /* os2     */ {STOP},
 402         /* mac     */ {wxFONTENCODING_MACCENTRALEUR, STOP}
 403     },
 404
 405     // Baltic
 406     {
 407         /* unix    */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP},
 408         /* windows */ {wxFONTENCODING_CP1257, STOP},
 409         /* os2     */ {STOP},
 410         /* mac     */ {STOP}
 411     },
 412
 413     // Hebrew
 414     {
 415         /* unix    */ {wxFONTENCODING_ISO8859_8, STOP},
 416         /* windows */ {wxFONTENCODING_CP1255, STOP},
 417         /* os2     */ {STOP},
 418         /* mac     */ {wxFONTENCODING_MACHEBREW, STOP}
 419     },
 420
 421     // Greek
 422     {
 423         /* unix    */ {wxFONTENCODING_ISO8859_7, STOP},
 424         /* windows */ {wxFONTENCODING_CP1253, STOP},
 425         /* os2     */ {STOP},
 426         /* mac     */ {wxFONTENCODING_MACGREEK, STOP}
 427     },
 428
 429     // Arabic
 430     {
 431         /* unix    */ {wxFONTENCODING_ISO8859_6, STOP},
 432         /* windows */ {wxFONTENCODING_CP1256, STOP},
 433         /* os2     */ {STOP},
 434         /* mac     */ {wxFONTENCODING_MACARABIC, STOP}
 435     },
 436
 437     // Turkish
 438     {
 439         /* unix    */ {wxFONTENCODING_ISO8859_9, STOP},
 440         /* windows */ {wxFONTENCODING_CP1254, STOP},
 441         /* os2     */ {STOP},
 442         /* mac     */ {wxFONTENCODING_MACTURKISH, STOP}
 443     },
 444
 445     // Cyrillic
 446     {
 447         /* unix    */ {wxFONTENCODING_KOI8, wxFONTENCODING_KOI8_U, wxFONTENCODING_ISO8859_5, STOP},
 448         /* windows */ {wxFONTENCODING_CP1251, STOP},
 449         /* os2     */ {STOP},
 450         /* mac     */ {wxFONTENCODING_MACCYRILLIC, STOP}
 451     },
 452
 453     {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
 454     /* no, _not_ Arnold! */
 455 };
 456
 457
 458 static bool FindEncoding(const wxFontEncodingArray& arr, wxFontEncoding f)
 459 {
 460     for (wxFontEncodingArray::const_iterator it = arr.begin(), en = arr.end();
 461          it != en; ++it)
 462         if (*it == f)
 463             return true;
 464     return false;
 465 }
 466
 467 wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
 468 {
 469     if (platform == wxPLATFORM_CURRENT)
 470     {
 471 #if defined(__WXMSW__)
 472         platform = wxPLATFORM_WINDOWS;
 473 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
 474         platform = wxPLATFORM_UNIX;
 475 #elif defined(__WXOS2__)
 476         platform = wxPLATFORM_OS2;
 477 #elif defined(__WXMAC__)
 478         platform = wxPLATFORM_MAC;
 479 #endif
 480     }
 481
 482     int i, clas, e ;
 483     wxFontEncoding *f;
 484     wxFontEncodingArray arr;
 485
 486     clas = 0;
 487     while (EquivalentEncodings[clas][0][0] != STOP)
 488     {
 489         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 490             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 491                 if (EquivalentEncodings[clas][i][e] == enc)
 492                 {
 493                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 494                         if (*f == enc) arr.push_back(enc);
 495                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 496                         if (!FindEncoding(arr, *f)) arr.push_back(*f);
 497                     i = NUM_OF_PLATFORMS/*hack*/; break;
 498                 }
 499         clas++;
 500     }
 501
 502     return arr;
 503 }
 504
 505
 506
 507 wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
 508 {
 509     int i, clas, e, j ;
 510     wxFontEncoding *f;
 511     wxFontEncodingArray arr;
 512
 513     arr = GetPlatformEquivalents(enc); // we want them to be first items in array
 514
 515     clas = 0;
 516     while (EquivalentEncodings[clas][0][0] != STOP)
 517     {
 518         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 519             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 520                 if (EquivalentEncodings[clas][i][e] == enc)
 521                 {
 522                     for (j = 0; j < NUM_OF_PLATFORMS; j++)
 523                         for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
 524                             if (!FindEncoding(arr, *f)) arr.push_back(*f);
 525                     i = NUM_OF_PLATFORMS/*hack*/; break;
 526                 }
 527         clas++;
 528     }
 529
 530     return arr;
 531 }
 532
 533 #endif // wxUSE_FONTMAP