src/common/encconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        encconv.cpp
   3 // Purpose:     wxEncodingConverter class for converting between different
   4 //              font encodings
   5 // Author:      Vaclav Slavik
   6 // Copyright:   (c) 1999 Vaclav Slavik
   7 // Licence:     wxWindows licence
   8 /////////////////////////////////////////////////////////////////////////////
   9
  10 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  11 #pragma implementation "encconv.h"
  12 #endif
  13
  14 // For compilers that support precompilation, includes "wx.h".
  15 #include "wx/wxprec.h"
  16
  17 #ifdef __BORLANDC__
  18   #pragma hdrstop
  19 #endif
  20
  21 #include "wx/encconv.h"
  22
  23 #include <stdlib.h>
  24
  25 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
  26 #if defined( __BORLANDC__ ) || defined(__DARWIN__)
  27     #include "../common/unictabl.inc"
  28 #else
  29     #include "unictabl.inc"
  30 #endif
  31
  32 #if wxUSE_WCHAR_T
  33     typedef wchar_t tchar;
  34 #else
  35     typedef char tchar;
  36 #endif
  37
  38 #ifdef __WXMAC__
  39 #ifdef __DARWIN__
  40 #include <Carbon/Carbon.h>
  41 #else
  42 #include <ATSUnicode.h>
  43 #include <TextCommon.h>
  44 #include <TextEncodingConverter.h>
  45 #endif
  46     #include "wx/fontutil.h"
  47     #include "wx/mac/private.h"  // includes mac headers
  48
  49     wxUint16 gMacEncodings[wxFONTENCODING_MACMAX-wxFONTENCODING_MACMIN+1][128] ;
  50     bool gMacEncodingsInited[wxFONTENCODING_MACMAX-wxFONTENCODING_MACMIN+1] ;
  51 #endif
  52
  53 #ifdef __WXWINCE__
  54     #include "wx/msw/wince/missing.h"       // for bsearch()
  55 #endif
  56
  57 static wxUint16* GetEncTable(wxFontEncoding enc)
  58 {
  59 #ifdef __WXMAC__
  60     if( enc >= wxFONTENCODING_MACMIN && enc <= wxFONTENCODING_MACMAX )
  61     {
  62         int i = enc-wxFONTENCODING_MACMIN ;
  63         if ( gMacEncodingsInited[i] == false )
  64         {
  65             TECObjectRef converter ;
  66             TextEncodingBase code = wxMacGetSystemEncFromFontEnc( enc ) ;
  67             TextEncodingBase unicode = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
  68             OSStatus status = TECCreateConverter(&converter,code,unicode);
  69             char s[2] ;
  70             s[1] = 0 ;
  71             ByteCount byteInLen, byteOutLen ;
  72             for( unsigned char c = 255 ; c >= 128 ; --c )
  73             {
  74                 s[0] = c ;
  75                 status = TECConvertText(converter, (ConstTextPtr) &s , 1, &byteInLen,
  76                 (TextPtr) &gMacEncodings[i][c-128] , 2, &byteOutLen);
  77             }
  78             status = TECDisposeConverter(converter);
  79             gMacEncodingsInited[i]=true;
  80         }
  81         return gMacEncodings[i] ;
  82     }
  83 #endif
  84
  85     for (int i = 0; encodings_list[i].table != NULL; i++)
  86     {
  87         if (encodings_list[i].encoding == enc)
  88             return encodings_list[i].table;
  89     }
  90     return NULL;
  91 }
  92
  93 typedef struct {
  94     wxUint16 u;
  95     wxUint8  c;
  96 } CharsetItem;
  97
  98 extern "C" int wxCMPFUNC_CONV
  99 CompareCharsetItems(const void *i1, const void *i2)
 100 {
 101     return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
 102 }
 103
 104
 105 static CharsetItem* BuildReverseTable(wxUint16 *tbl)
 106 {
 107     CharsetItem *rev = new CharsetItem[128];
 108
 109     for (int i = 0; i < 128; i++)
 110         rev[i].c = wxUint8(128 + i), rev[i].u = tbl[i];
 111
 112     qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
 113
 114     return rev;
 115 }
 116
 117
 118
 119 wxEncodingConverter::wxEncodingConverter()
 120 {
 121     m_Table = NULL;
 122     m_UnicodeInput = m_UnicodeOutput = false;
 123     m_JustCopy = false;
 124 }
 125
 126
 127
 128 bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
 129 {
 130     unsigned i;
 131     wxUint16 *in_tbl, *out_tbl = NULL;
 132
 133     if (m_Table) {delete[] m_Table; m_Table = NULL;}
 134
 135 #if !wxUSE_WCHAR_T
 136     if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return false;
 137 #endif
 138
 139     if (input_enc == output_enc) {m_JustCopy = true; return true;}
 140
 141     m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
 142     m_JustCopy = false;
 143
 144     if (input_enc == wxFONTENCODING_UNICODE)
 145     {
 146         if ((out_tbl = GetEncTable(output_enc)) == NULL) return false;
 147
 148         m_Table = new tchar[65536];
 149         for (i = 0; i < 128; i++)  m_Table[i] = (tchar)i; // 7bit ASCII
 150         for (i = 128; i < 65536; i++)  m_Table[i] = (tchar)0;
 151
 152         if (method == wxCONVERT_SUBSTITUTE)
 153         {
 154             for (i = 0; i < encoding_unicode_fallback_count; i++)
 155                 m_Table[encoding_unicode_fallback[i].c] = (tchar) encoding_unicode_fallback[i].s;
 156         }
 157
 158         for (i = 0; i < 128; i++)
 159             m_Table[out_tbl[i]] = (tchar)(128 + i);
 160
 161         m_UnicodeInput = true;
 162     }
 163     else // input !Unicode
 164     {
 165         if ((in_tbl = GetEncTable(input_enc)) == NULL) return false;
 166         if (output_enc != wxFONTENCODING_UNICODE)
 167             if ((out_tbl = GetEncTable(output_enc)) == NULL) return false;
 168
 169         m_UnicodeInput = false;
 170
 171         m_Table = new tchar[256];
 172         for (i = 0; i < 128; i++)  m_Table[i] = (tchar)i; // 7bit ASCII
 173
 174         if (output_enc == wxFONTENCODING_UNICODE)
 175         {
 176             for (i = 0; i < 128; i++)  m_Table[128 + i] = (tchar)in_tbl[i];
 177             return true;
 178         }
 179         else // output !Unicode
 180         {
 181             CharsetItem *rev = BuildReverseTable(out_tbl);
 182             CharsetItem *item;
 183             CharsetItem key;
 184
 185             for (i = 0; i < 128; i++)
 186             {
 187                 key.u = in_tbl[i];
 188                 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
 189                 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
 190                     item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
 191                                 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
 192                 if (item)
 193                     m_Table[128 + i] = (tchar)item -> c;
 194                 else
 195 #if wxUSE_WCHAR_T
 196                     m_Table[128 + i] = (wchar_t)(128 + i);
 197 #else
 198                     m_Table[128 + i] = (char)(128 + i);
 199 #endif
 200             }
 201
 202             delete[] rev;
 203         }
 204     }
 205
 206     return true;
 207 }
 208
 209
 210 #define REPLACEMENT_CHAR  ((tchar)'?')
 211
 212 inline tchar GetTableValue(const tchar *table, tchar value, bool& repl)
 213 {
 214     tchar r = table[value];
 215     if (r == 0 && value != 0)
 216     {
 217         r = REPLACEMENT_CHAR;
 218         repl = true;
 219     }
 220     return r;
 221 }
 222
 223
 224 bool wxEncodingConverter::Convert(const char* input, char* output) const
 225 {
 226     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 227     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 228
 229     const char *i;
 230     char *o;
 231
 232     if (m_JustCopy)
 233     {
 234         strcpy(output, input);
 235         return true;
 236     }
 237
 238     wxCHECK_MSG(m_Table != NULL, false,
 239                 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 240
 241     bool replaced = false;
 242
 243     for (i = input, o = output; *i != 0;)
 244         *(o++) = (char)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
 245     *o = 0;
 246
 247     return !replaced;
 248 }
 249
 250
 251 #if wxUSE_WCHAR_T
 252
 253 bool wxEncodingConverter::Convert(const char* input, wchar_t* output) const
 254 {
 255     wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
 256     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 257
 258     const char *i;
 259     wchar_t *o;
 260
 261     if (m_JustCopy)
 262     {
 263         for (i = input, o = output; *i != 0;)
 264             *(o++) = (wchar_t)(*(i++));
 265         *o = 0;
 266         return true;
 267     }
 268
 269     wxCHECK_MSG(m_Table != NULL, false,
 270                 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 271
 272     bool replaced = false;
 273
 274     for (i = input, o = output; *i != 0;)
 275         *(o++) = (wchar_t)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
 276     *o = 0;
 277
 278     return !replaced;
 279 }
 280
 281
 282
 283 bool wxEncodingConverter::Convert(const wchar_t* input, char* output) const
 284 {
 285     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 286     wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
 287
 288     const wchar_t *i;
 289     char *o;
 290
 291     if (m_JustCopy)
 292     {
 293         for (i = input, o = output; *i != 0;)
 294             *(o++) = (char)(*(i++));
 295         *o = 0;
 296         return true;
 297     }
 298
 299     wxCHECK_MSG(m_Table != NULL, false,
 300                 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 301
 302     bool replaced = false;
 303
 304     for (i = input, o = output; *i != 0;)
 305         *(o++) = (char)(GetTableValue(m_Table, (wxUint16)*(i++), replaced));
 306     *o = 0;
 307
 308     return !replaced;
 309 }
 310
 311
 312
 313 bool wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output) const
 314 {
 315     wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
 316     wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
 317
 318     const wchar_t *i;
 319     wchar_t *o;
 320
 321     if (m_JustCopy)
 322     {
 323         // wcscpy() is not guaranteed to exist
 324         for (i = input, o = output; *i != 0;)
 325             *(o++) = (*(i++));
 326         *o = 0;
 327         return true;
 328     }
 329
 330     wxCHECK_MSG(m_Table != NULL, false,
 331                 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 332
 333     bool replaced = false;
 334
 335     for (i = input, o = output; *i != 0;)
 336         *(o++) = (wchar_t)(GetTableValue(m_Table, (wxUint8)*(i++), replaced));
 337     *o = 0;
 338
 339     return !replaced;
 340 }
 341
 342 #endif // wxUSE_WCHAR_T
 343
 344
 345 wxString wxEncodingConverter::Convert(const wxString& input) const
 346 {
 347     if (m_JustCopy) return input;
 348
 349     wxString s;
 350     const wxChar *i;
 351
 352     wxCHECK_MSG(m_Table != NULL, s,
 353                 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 354
 355     if (m_UnicodeInput)
 356     {
 357         for (i = input.c_str(); *i != 0; i++)
 358             s << (wxChar)(m_Table[(wxUint16)*i]);
 359     }
 360     else
 361     {
 362         for (i = input.c_str(); *i != 0; i++)
 363             s << (wxChar)(m_Table[(wxUint8)*i]);
 364     }
 365
 366     return s;
 367 }
 368
 369
 370
 371
 372
 373
 374
 375 // Following tables describe classes of encoding equivalence.
 376 //
 377
 378 #define STOP wxFONTENCODING_SYSTEM
 379
 380 #define NUM_OF_PLATFORMS  4 /*must conform to enum wxPLATFORM_XXXX !!!*/
 381 #define ENC_PER_PLATFORM  5
 382            // max no. of encodings for one language used on one platform
 383            // Anybody thinks 5 is not enough? ;-)
 384
 385 static wxFontEncoding
 386     EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
 387
 388     // *** Please put more common encodings as first! ***
 389
 390     // Western European
 391     {
 392         /* unix    */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
 393         /* windows */ {wxFONTENCODING_CP1252, STOP},
 394         /* os2     */ {STOP},
 395         /* mac     */ {wxFONTENCODING_MACROMAN, STOP}
 396     },
 397
 398     // Central European
 399     {
 400         /* unix    */ {wxFONTENCODING_ISO8859_2, STOP},
 401         /* windows */ {wxFONTENCODING_CP1250, STOP},
 402         /* os2     */ {STOP},
 403         /* mac     */ {wxFONTENCODING_MACCENTRALEUR, STOP}
 404     },
 405
 406     // Baltic
 407     {
 408         /* unix    */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP},
 409         /* windows */ {wxFONTENCODING_CP1257, STOP},
 410         /* os2     */ {STOP},
 411         /* mac     */ {STOP}
 412     },
 413
 414     // Hebrew
 415     {
 416         /* unix    */ {wxFONTENCODING_ISO8859_8, STOP},
 417         /* windows */ {wxFONTENCODING_CP1255, STOP},
 418         /* os2     */ {STOP},
 419         /* mac     */ {wxFONTENCODING_MACHEBREW, STOP}
 420     },
 421
 422     // Greek
 423     {
 424         /* unix    */ {wxFONTENCODING_ISO8859_7, STOP},
 425         /* windows */ {wxFONTENCODING_CP1253, STOP},
 426         /* os2     */ {STOP},
 427         /* mac     */ {wxFONTENCODING_MACGREEK, STOP}
 428     },
 429
 430     // Arabic
 431     {
 432         /* unix    */ {wxFONTENCODING_ISO8859_6, STOP},
 433         /* windows */ {wxFONTENCODING_CP1256, STOP},
 434         /* os2     */ {STOP},
 435         /* mac     */ {wxFONTENCODING_MACARABIC, STOP}
 436     },
 437
 438     // Turkish
 439     {
 440         /* unix    */ {wxFONTENCODING_ISO8859_9, STOP},
 441         /* windows */ {wxFONTENCODING_CP1254, STOP},
 442         /* os2     */ {STOP},
 443         /* mac     */ {wxFONTENCODING_MACTURKISH, STOP}
 444     },
 445
 446     // Cyrillic
 447     {
 448         /* unix    */ {wxFONTENCODING_KOI8, wxFONTENCODING_KOI8_U, wxFONTENCODING_ISO8859_5, STOP},
 449         /* windows */ {wxFONTENCODING_CP1251, STOP},
 450         /* os2     */ {STOP},
 451         /* mac     */ {wxFONTENCODING_MACCYRILLIC, STOP}
 452     },
 453
 454     {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
 455     /* no, _not_ Arnold! */
 456 };
 457
 458
 459 static bool FindEncoding(const wxFontEncodingArray& arr, wxFontEncoding f)
 460 {
 461     for (wxFontEncodingArray::const_iterator it = arr.begin(), en = arr.end();
 462          it != en; ++it)
 463         if (*it == f)
 464             return true;
 465     return false;
 466 }
 467
 468 wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
 469 {
 470     if (platform == wxPLATFORM_CURRENT)
 471     {
 472 #if defined(__WXMSW__)
 473         platform = wxPLATFORM_WINDOWS;
 474 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
 475         platform = wxPLATFORM_UNIX;
 476 #elif defined(__WXOS2__)
 477         platform = wxPLATFORM_OS2;
 478 #elif defined(__WXMAC__)
 479         platform = wxPLATFORM_MAC;
 480 #endif
 481     }
 482
 483     int i, clas, e ;
 484     wxFontEncoding *f;
 485     wxFontEncodingArray arr;
 486
 487     clas = 0;
 488     while (EquivalentEncodings[clas][0][0] != STOP)
 489     {
 490         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 491             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 492                 if (EquivalentEncodings[clas][i][e] == enc)
 493                 {
 494                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 495                         if (*f == enc) arr.push_back(enc);
 496                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 497                         if (!FindEncoding(arr, *f)) arr.push_back(*f);
 498                     i = NUM_OF_PLATFORMS/*hack*/; break;
 499                 }
 500         clas++;
 501     }
 502
 503     return arr;
 504 }
 505
 506
 507
 508 wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
 509 {
 510     int i, clas, e, j ;
 511     wxFontEncoding *f;
 512     wxFontEncodingArray arr;
 513
 514     arr = GetPlatformEquivalents(enc); // we want them to be first items in array
 515
 516     clas = 0;
 517     while (EquivalentEncodings[clas][0][0] != STOP)
 518     {
 519         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 520             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 521                 if (EquivalentEncodings[clas][i][e] == enc)
 522                 {
 523                     for (j = 0; j < NUM_OF_PLATFORMS; j++)
 524                         for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
 525                             if (!FindEncoding(arr, *f)) arr.push_back(*f);
 526                     i = NUM_OF_PLATFORMS/*hack*/; break;
 527                 }
 528         clas++;
 529     }
 530
 531     return arr;
 532 }
 533