src/common/encconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        encconv.cpp
   3 // Purpose:     wxEncodingConverter class for converting between different
   4 //              font encodings
   5 // Author:      Vaclav Slavik
   6 // Copyright:   (c) 1999 Vaclav Slavik
   7 // Licence:     wxWindows Licence
   8 /////////////////////////////////////////////////////////////////////////////
   9
  10 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  11 #pragma implementation "encconv.h"
  12 #endif
  13
  14 // For compilers that support precompilation, includes "wx.h".
  15 #include "wx/wxprec.h"
  16
  17 #ifdef __BORLANDC__
  18   #pragma hdrstop
  19 #endif
  20
  21 #if wxUSE_FONTMAP
  22
  23 #include "wx/encconv.h"
  24
  25 #include <stdlib.h>
  26
  27 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
  28 #if defined( __BORLANDC__ ) || defined(__DARWIN__)
  29 #include "../common/unictabl.inc"
  30 #else
  31 #include "unictabl.inc"
  32 #endif
  33
  34 #if wxUSE_WCHAR_T
  35 typedef wchar_t tchar;
  36 #else
  37 typedef char tchar;
  38 #endif
  39
  40 #ifdef __WXWINCE__
  41 #undef LINKAGEMODE
  42 #define LINKAGEMODE __cdecl
  43 #endif
  44
  45 #ifdef __WXMAC__
  46
  47 #include <ATSUnicode.h>
  48 #include <TextCommon.h>
  49 #include <TextEncodingConverter.h>
  50
  51 #include "wx/fontutil.h"
  52 #include "wx/mac/private.h"  // includes mac headers
  53
  54 wxUint16 gMacEncodings[wxFONTENCODING_MACMAX-wxFONTENCODING_MACMIN+1][128] ;
  55 bool gMacEncodingsInited[wxFONTENCODING_MACMAX-wxFONTENCODING_MACMIN+1] ;
  56
  57 #endif
  58
  59 static wxUint16* LINKAGEMODE GetEncTable(wxFontEncoding enc)
  60 {
  61 #ifdef __WXMAC__
  62     if( enc >= wxFONTENCODING_MACMIN && enc <= wxFONTENCODING_MACMAX )
  63     {
  64         int i = enc-wxFONTENCODING_MACMIN ;
  65         if ( gMacEncodingsInited[i] == false )
  66         {
  67             TECObjectRef converter ;
  68             TextEncodingBase code = wxMacGetSystemEncFromFontEnc( enc ) ;
  69                 TextEncodingBase unicode = CreateTextEncoding(kTextEncodingUnicodeDefault,0,kUnicode16BitFormat) ;
  70             OSStatus status = TECCreateConverter(&converter,code,unicode);
  71             char s[2] ;
  72             s[1] = 0 ;
  73             ByteCount byteInLen, byteOutLen ;
  74             for( unsigned char c = 255 ; c >= 128 ; --c )
  75             {
  76                 s[0] = c ;
  77                 status = TECConvertText(converter, (ConstTextPtr) &s , 1, &byteInLen,
  78                 (TextPtr) &gMacEncodings[i][c-128] , 2, &byteOutLen);
  79             }
  80             status = TECDisposeConverter(converter);
  81             gMacEncodingsInited[i]=true;
  82         }
  83         return gMacEncodings[i] ;
  84     }
  85 #endif
  86
  87     for (int i = 0; encodings_list[i].table != NULL; i++)
  88     {
  89         if (encodings_list[i].encoding == enc)
  90             return encodings_list[i].table;
  91     }
  92     return NULL;
  93 }
  94
  95 typedef struct {
  96     wxUint16 u;
  97     wxUint8  c;
  98 } CharsetItem;
  99
 100 extern "C" int LINKAGEMODE CompareCharsetItems(const void *i1, const void *i2)
 101 {
 102     return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
 103 }
 104
 105
 106 static CharsetItem* LINKAGEMODE BuildReverseTable(wxUint16 *tbl)
 107 {
 108     CharsetItem *rev = new CharsetItem[128];
 109
 110     for (int i = 0; i < 128; i++)
 111         rev[i].c = 128 + i, rev[i].u = tbl[i];
 112
 113     qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
 114
 115     return rev;
 116 }
 117
 118
 119
 120 wxEncodingConverter::wxEncodingConverter()
 121 {
 122     m_Table = NULL;
 123     m_UnicodeInput = m_UnicodeOutput = FALSE;
 124     m_JustCopy = FALSE;
 125 }
 126
 127
 128
 129 bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
 130 {
 131     unsigned i;
 132     wxUint16 *in_tbl, *out_tbl = NULL;
 133
 134     if (m_Table) {delete[] m_Table; m_Table = NULL;}
 135
 136 #if !wxUSE_WCHAR_T
 137     if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return FALSE;
 138 #endif
 139
 140     if (input_enc == output_enc) {m_JustCopy = TRUE; return TRUE;}
 141
 142     m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
 143     m_JustCopy = FALSE;
 144
 145     if (input_enc == wxFONTENCODING_UNICODE)
 146     {
 147         if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
 148
 149         m_Table = new tchar[65536];
 150         for (i = 0; i < 128; i++)  m_Table[i] = (tchar)i; // 7bit ASCII
 151         for (i = 128; i < 65536; i++)  m_Table[i] = (tchar)'?';
 152                 // FIXME - this should be character that means `unicode to charset' impossible, not '?'
 153
 154         if (method == wxCONVERT_SUBSTITUTE)
 155         {
 156             for (i = 0; i < encoding_unicode_fallback_count; i++)
 157                 m_Table[encoding_unicode_fallback[i].c] = (tchar) encoding_unicode_fallback[i].s;
 158         }
 159
 160         for (i = 0; i < 128; i++)
 161             m_Table[out_tbl[i]] = (tchar)(128 + i);
 162
 163         m_UnicodeInput = TRUE;
 164     }
 165     else // input !Unicode
 166     {
 167         if ((in_tbl = GetEncTable(input_enc)) == NULL) return FALSE;
 168         if (output_enc != wxFONTENCODING_UNICODE)
 169             if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
 170
 171         m_UnicodeInput = FALSE;
 172
 173         m_Table = new tchar[256];
 174         for (i = 0; i < 128; i++)  m_Table[i] = (tchar)i; // 7bit ASCII
 175
 176         if (output_enc == wxFONTENCODING_UNICODE)
 177         {
 178             for (i = 0; i < 128; i++)  m_Table[128 + i] = (tchar)in_tbl[i];
 179             return TRUE;
 180         }
 181         // FIXME: write a substitute for bsearch
 182 #ifndef __WXWINCE__
 183         else // output !Unicode
 184         {
 185             CharsetItem *rev = BuildReverseTable(out_tbl);
 186             CharsetItem *item;
 187             CharsetItem key;
 188
 189             for (i = 0; i < 128; i++)
 190             {
 191                 key.u = in_tbl[i];
 192                 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
 193                 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
 194                     item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
 195                                 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
 196                 if (item)
 197                     m_Table[128 + i] = (tchar)item -> c;
 198                 else
 199 #if wxUSE_WCHAR_T
 200                     m_Table[128 + i] = (wchar_t)(128 + i);
 201 #else
 202                     m_Table[128 + i] = (char)(128 + i);
 203 #endif
 204             }
 205
 206             delete[] rev;
 207         }
 208 #endif // !__WXWINCE__
 209     }
 210
 211     return TRUE;
 212 }
 213
 214
 215
 216 void wxEncodingConverter::Convert(const char* input, char* output) const
 217 {
 218     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 219     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 220
 221     const char *i;
 222     char *o;
 223
 224     if (m_JustCopy)
 225     {
 226         strcpy(output, input);
 227         return;
 228     }
 229
 230     wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 231
 232     for (i = input, o = output; *i != 0;)
 233         *(o++) = (char)(m_Table[(wxUint8)*(i++)]);
 234     *o = 0;
 235 }
 236
 237
 238 #if wxUSE_WCHAR_T
 239
 240 void wxEncodingConverter::Convert(const char* input, wchar_t* output) const
 241 {
 242     wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
 243     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 244
 245     const char *i;
 246     wchar_t *o;
 247
 248     if (m_JustCopy)
 249     {
 250         for (i = input, o = output; *i != 0;)
 251             *(o++) = (wchar_t)(*(i++));
 252         *o = 0;
 253         return;
 254     }
 255
 256     wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 257
 258     for (i = input, o = output; *i != 0;)
 259         *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
 260     *o = 0;
 261 }
 262
 263
 264
 265 void wxEncodingConverter::Convert(const wchar_t* input, char* output) const
 266 {
 267     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 268     wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
 269
 270     const wchar_t *i;
 271     char *o;
 272
 273     if (m_JustCopy)
 274     {
 275         for (i = input, o = output; *i != 0;)
 276             *(o++) = (char)(*(i++));
 277         *o = 0;
 278         return;
 279     }
 280
 281     wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 282
 283     for (i = input, o = output; *i != 0;)
 284         *(o++) = (char)(m_Table[(wxUint16)*(i++)]);
 285     *o = 0;
 286 }
 287
 288
 289
 290 void wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output) const
 291 {
 292     wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
 293     wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
 294
 295     const wchar_t *i;
 296     wchar_t *o;
 297
 298     if (m_JustCopy)
 299     {
 300         // wcscpy() is not guaranteed to exist
 301         for (i = input, o = output; *i != 0;)
 302             *(o++) = (*(i++));
 303         *o = 0;
 304         return;
 305     }
 306
 307     wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 308
 309     for (i = input, o = output; *i != 0;)
 310         *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
 311     *o = 0;
 312 }
 313
 314 #endif // wxUSE_WCHAR_T
 315
 316
 317 wxString wxEncodingConverter::Convert(const wxString& input) const
 318 {
 319     if (m_JustCopy) return input;
 320
 321     wxString s;
 322     const wxChar *i;
 323
 324     wxCHECK_MSG(m_Table != NULL, s,
 325                 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 326
 327     if (m_UnicodeInput)
 328     {
 329         for (i = input.c_str(); *i != 0; i++)
 330             s << (wxChar)(m_Table[(wxUint16)*i]);
 331     }
 332     else
 333     {
 334         for (i = input.c_str(); *i != 0; i++)
 335             s << (wxChar)(m_Table[(wxUint8)*i]);
 336     }
 337
 338     return s;
 339 }
 340
 341
 342
 343
 344
 345
 346
 347 // Following tables describe classes of encoding equivalence.
 348 //
 349
 350 #define STOP wxFONTENCODING_SYSTEM
 351
 352 #define NUM_OF_PLATFORMS  4 /*must conform to enum wxPLATFORM_XXXX !!!*/
 353 #define ENC_PER_PLATFORM  5
 354            // max no. of encodings for one language used on one platform
 355            // Anybody thinks 5 is not enough? ;-)
 356
 357 static wxFontEncoding
 358     EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
 359
 360     // *** Please put more common encodings as first! ***
 361
 362     // Western European
 363     {
 364         /* unix    */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
 365         /* windows */ {wxFONTENCODING_CP1252, STOP},
 366         /* os2     */ {STOP},
 367         /* mac     */ {wxFONTENCODING_MACROMAN, STOP}
 368     },
 369
 370     // Central European
 371     {
 372         /* unix    */ {wxFONTENCODING_ISO8859_2, STOP},
 373         /* windows */ {wxFONTENCODING_CP1250, STOP},
 374         /* os2     */ {STOP},
 375         /* mac     */ {wxFONTENCODING_MACCENTRALEUR, STOP}
 376     },
 377
 378     // Baltic
 379     {
 380         /* unix    */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP},
 381         /* windows */ {wxFONTENCODING_CP1257, STOP},
 382         /* os2     */ {STOP},
 383         /* mac     */ {STOP}
 384     },
 385
 386     // Hebrew
 387     {
 388         /* unix    */ {wxFONTENCODING_ISO8859_8, STOP},
 389         /* windows */ {wxFONTENCODING_CP1255, STOP},
 390         /* os2     */ {STOP},
 391         /* mac     */ {wxFONTENCODING_MACHEBREW, STOP}
 392     },
 393
 394     // Greek
 395     {
 396         /* unix    */ {wxFONTENCODING_ISO8859_7, STOP},
 397         /* windows */ {wxFONTENCODING_CP1253, STOP},
 398         /* os2     */ {STOP},
 399         /* mac     */ {wxFONTENCODING_MACGREEK, STOP}
 400     },
 401
 402     // Arabic
 403     {
 404         /* unix    */ {wxFONTENCODING_ISO8859_6, STOP},
 405         /* windows */ {wxFONTENCODING_CP1256, STOP},
 406         /* os2     */ {STOP},
 407         /* mac     */ {wxFONTENCODING_MACARABIC, STOP}
 408     },
 409
 410     // Turkish
 411     {
 412         /* unix    */ {wxFONTENCODING_ISO8859_9, STOP},
 413         /* windows */ {wxFONTENCODING_CP1254, STOP},
 414         /* os2     */ {STOP},
 415         /* mac     */ {wxFONTENCODING_MACTURKISH, STOP}
 416     },
 417
 418     // Cyrillic
 419     {
 420         /* unix    */ {wxFONTENCODING_KOI8, wxFONTENCODING_ISO8859_5, STOP},
 421         /* windows */ {wxFONTENCODING_CP1251, STOP},
 422         /* os2     */ {STOP},
 423         /* mac     */ {wxFONTENCODING_MACCYRILLIC, STOP}
 424     },
 425
 426     {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
 427     /* no, _not_ Arnold! */
 428 };
 429
 430
 431 static bool FindEncoding(const wxFontEncodingArray& arr, wxFontEncoding f)
 432 {
 433     for (wxFontEncodingArray::const_iterator it = arr.begin(), en = arr.end();
 434          it != en; ++it)
 435         if (*it == f)
 436             return true;
 437     return false;
 438 }
 439
 440 wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
 441 {
 442     if (platform == wxPLATFORM_CURRENT)
 443     {
 444 #if defined(__WXMSW__)
 445         platform = wxPLATFORM_WINDOWS;
 446 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
 447         platform = wxPLATFORM_UNIX;
 448 #elif defined(__WXOS2__)
 449         platform = wxPLATFORM_OS2;
 450 #elif defined(__WXMAC__)
 451         platform = wxPLATFORM_MAC;
 452 #endif
 453     }
 454
 455     int i, clas, e ;
 456     wxFontEncoding *f;
 457     wxFontEncodingArray arr;
 458
 459     clas = 0;
 460     while (EquivalentEncodings[clas][0][0] != STOP)
 461     {
 462         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 463             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 464                 if (EquivalentEncodings[clas][i][e] == enc)
 465                 {
 466                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 467                         if (*f == enc) arr.push_back(enc);
 468                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 469                         if (!FindEncoding(arr, *f)) arr.push_back(*f);
 470                     i = NUM_OF_PLATFORMS/*hack*/; break;
 471                 }
 472         clas++;
 473     }
 474
 475     return arr;
 476 }
 477
 478
 479
 480 wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
 481 {
 482     int i, clas, e, j ;
 483     wxFontEncoding *f;
 484     wxFontEncodingArray arr;
 485
 486     arr = GetPlatformEquivalents(enc); // we want them to be first items in array
 487
 488     clas = 0;
 489     while (EquivalentEncodings[clas][0][0] != STOP)
 490     {
 491         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 492             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 493                 if (EquivalentEncodings[clas][i][e] == enc)
 494                 {
 495                     for (j = 0; j < NUM_OF_PLATFORMS; j++)
 496                         for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
 497                             if (!FindEncoding(arr, *f)) arr.push_back(*f);
 498                     i = NUM_OF_PLATFORMS/*hack*/; break;
 499                 }
 500         clas++;
 501     }
 502
 503     return arr;
 504 }
 505
 506 #endif // wxUSE_FONTMAP