src/common/encconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        encconv.cpp
   3 // Purpose:     wxEncodingConverter class for converting between different
   4 //              font encodings
   5 // Author:      Vaclav Slavik
   6 // Copyright:   (c) 1999 Vaclav Slavik
   7 // Licence:     wxWindows Licence
   8 /////////////////////////////////////////////////////////////////////////////
   9
  10 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  11 #pragma implementation "encconv.h"
  12 #endif
  13
  14 // For compilers that support precompilation, includes "wx.h".
  15 #include "wx/wxprec.h"
  16
  17 #ifdef __BORLANDC__
  18   #pragma hdrstop
  19 #endif
  20
  21 #if wxUSE_FONTMAP
  22
  23 #include "wx/encconv.h"
  24
  25 #include <stdlib.h>
  26
  27 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
  28 #ifdef __BORLANDC__
  29 #include "../common/unictabl.inc"
  30 #else
  31 #include "unictabl.inc"
  32 #endif
  33
  34 #if wxUSE_WCHAR_T
  35 typedef wchar_t tchar;
  36 #else
  37 typedef char tchar;
  38 #endif
  39
  40 #ifdef __WXWINCE__
  41 #undef LINKAGEMODE
  42 #define LINKAGEMODE __cdecl
  43 #endif
  44
  45 static wxUint16* LINKAGEMODE GetEncTable(wxFontEncoding enc)
  46 {
  47     for (int i = 0; encodings_list[i].table != NULL; i++)
  48     {
  49         if (encodings_list[i].encoding == enc)
  50             return encodings_list[i].table;
  51     }
  52     return NULL;
  53 }
  54
  55 typedef struct {
  56     wxUint16 u;
  57     wxUint8  c;
  58 } CharsetItem;
  59
  60 extern "C" int LINKAGEMODE CompareCharsetItems(const void *i1, const void *i2)
  61 {
  62     return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
  63 }
  64
  65
  66 static CharsetItem* LINKAGEMODE BuildReverseTable(wxUint16 *tbl)
  67 {
  68     CharsetItem *rev = new CharsetItem[128];
  69
  70     for (int i = 0; i < 128; i++)
  71         rev[i].c = 128 + i, rev[i].u = tbl[i];
  72
  73     qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
  74
  75     return rev;
  76 }
  77
  78
  79
  80 wxEncodingConverter::wxEncodingConverter()
  81 {
  82     m_Table = NULL;
  83     m_UnicodeInput = m_UnicodeOutput = FALSE;
  84     m_JustCopy = FALSE;
  85 }
  86
  87
  88
  89 bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
  90 {
  91     unsigned i;
  92     wxUint16 *in_tbl, *out_tbl = NULL;
  93
  94     if (m_Table) {delete[] m_Table; m_Table = NULL;}
  95
  96 #if !wxUSE_WCHAR_T
  97     if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return FALSE;
  98 #endif
  99
 100     if (input_enc == output_enc) {m_JustCopy = TRUE; return TRUE;}
 101
 102     m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
 103     m_JustCopy = FALSE;
 104
 105     if (input_enc == wxFONTENCODING_UNICODE)
 106     {
 107         if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
 108
 109         m_Table = new tchar[65536];
 110         for (i = 0; i < 128; i++)  m_Table[i] = (tchar)i; // 7bit ASCII
 111         for (i = 128; i < 65536; i++)  m_Table[i] = (tchar)'?';
 112                 // FIXME - this should be character that means `unicode to charset' impossible, not '?'
 113
 114         if (method == wxCONVERT_SUBSTITUTE)
 115         {
 116             for (i = 0; i < encoding_unicode_fallback_count; i++)
 117                 m_Table[encoding_unicode_fallback[i].c] = (tchar) encoding_unicode_fallback[i].s;
 118         }
 119
 120         for (i = 0; i < 128; i++)
 121             m_Table[out_tbl[i]] = (tchar)(128 + i);
 122
 123         m_UnicodeInput = TRUE;
 124     }
 125     else // input !Unicode
 126     {
 127         if ((in_tbl = GetEncTable(input_enc)) == NULL) return FALSE;
 128         if (output_enc != wxFONTENCODING_UNICODE)
 129             if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
 130
 131         m_UnicodeInput = FALSE;
 132
 133         m_Table = new tchar[256];
 134         for (i = 0; i < 128; i++)  m_Table[i] = (tchar)i; // 7bit ASCII
 135
 136         if (output_enc == wxFONTENCODING_UNICODE)
 137         {
 138             for (i = 0; i < 128; i++)  m_Table[128 + i] = (tchar)in_tbl[i];
 139             return TRUE;
 140         }
 141         // FIXME: write a substitute for bsearch
 142 #ifndef __WXWINCE__
 143         else // output !Unicode
 144         {
 145             CharsetItem *rev = BuildReverseTable(out_tbl);
 146             CharsetItem *item;
 147             CharsetItem key;
 148
 149             for (i = 0; i < 128; i++)
 150             {
 151                 key.u = in_tbl[i];
 152                 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
 153                 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
 154                     item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
 155                                 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
 156                 if (item)
 157                     m_Table[128 + i] = (tchar)item -> c;
 158                 else
 159 #if wxUSE_WCHAR_T
 160                     m_Table[128 + i] = (wchar_t)(128 + i);
 161 #else
 162                     m_Table[128 + i] = (char)(128 + i);
 163 #endif
 164             }
 165
 166             delete[] rev;
 167         }
 168 #endif // !__WXWINCE__
 169     }
 170
 171     return TRUE;
 172 }
 173
 174
 175
 176 void wxEncodingConverter::Convert(const char* input, char* output) const
 177 {
 178     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 179     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 180
 181     const char *i;
 182     char *o;
 183
 184     if (m_JustCopy)
 185     {
 186         strcpy(output, input);
 187         return;
 188     }
 189
 190     wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 191
 192     for (i = input, o = output; *i != 0;)
 193         *(o++) = (char)(m_Table[(wxUint8)*(i++)]);
 194     *o = 0;
 195 }
 196
 197
 198 #if wxUSE_WCHAR_T
 199
 200 void wxEncodingConverter::Convert(const char* input, wchar_t* output) const
 201 {
 202     wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
 203     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 204
 205     const char *i;
 206     wchar_t *o;
 207
 208     if (m_JustCopy)
 209     {
 210         for (i = input, o = output; *i != 0;)
 211             *(o++) = (wchar_t)(*(i++));
 212         *o = 0;
 213         return;
 214     }
 215
 216     wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 217
 218     for (i = input, o = output; *i != 0;)
 219         *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
 220     *o = 0;
 221 }
 222
 223
 224
 225 void wxEncodingConverter::Convert(const wchar_t* input, char* output) const
 226 {
 227     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 228     wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
 229
 230     const wchar_t *i;
 231     char *o;
 232
 233     if (m_JustCopy)
 234     {
 235         for (i = input, o = output; *i != 0;)
 236             *(o++) = (char)(*(i++));
 237         *o = 0;
 238         return;
 239     }
 240
 241     wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 242
 243     for (i = input, o = output; *i != 0;)
 244         *(o++) = (char)(m_Table[(wxUint16)*(i++)]);
 245     *o = 0;
 246 }
 247
 248
 249
 250 void wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output) const
 251 {
 252     wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
 253     wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
 254
 255     const wchar_t *i;
 256     wchar_t *o;
 257
 258     if (m_JustCopy)
 259     {
 260         // wcscpy() is not guaranteed to exist
 261         for (i = input, o = output; *i != 0;)
 262             *(o++) = (*(i++));
 263         *o = 0;
 264         return;
 265     }
 266
 267     wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 268
 269     for (i = input, o = output; *i != 0;)
 270         *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
 271     *o = 0;
 272 }
 273
 274 #endif // wxUSE_WCHAR_T
 275
 276
 277 wxString wxEncodingConverter::Convert(const wxString& input) const
 278 {
 279     if (m_JustCopy) return input;
 280
 281     wxString s;
 282     const wxChar *i;
 283
 284     wxCHECK_MSG(m_Table != NULL, s,
 285                 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 286
 287     if (m_UnicodeInput)
 288     {
 289         for (i = input.c_str(); *i != 0; i++)
 290             s << (wxChar)(m_Table[(wxUint16)*i]);
 291     }
 292     else
 293     {
 294         for (i = input.c_str(); *i != 0; i++)
 295             s << (wxChar)(m_Table[(wxUint8)*i]);
 296     }
 297
 298     return s;
 299 }
 300
 301
 302
 303
 304
 305
 306
 307 // Following tables describe classes of encoding equivalence.
 308 //
 309
 310 #define STOP wxFONTENCODING_SYSTEM
 311
 312 #define NUM_OF_PLATFORMS  4 /*must conform to enum wxPLATFORM_XXXX !!!*/
 313 #define ENC_PER_PLATFORM  5
 314            // max no. of encodings for one language used on one platform
 315            // Anybody thinks 5 is not enough? ;-)
 316
 317 static wxFontEncoding
 318     EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
 319
 320     // *** Please put more common encodings as first! ***
 321
 322     // Western European
 323     {
 324         /* unix    */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
 325         /* windows */ {wxFONTENCODING_CP1252, STOP},
 326         /* os2     */ {STOP},
 327         /* mac     */ {STOP}
 328     },
 329
 330     // Central European
 331     {
 332         /* unix    */ {wxFONTENCODING_ISO8859_2, STOP},
 333         /* windows */ {wxFONTENCODING_CP1250, STOP},
 334         /* os2     */ {STOP},
 335         /* mac     */ {STOP}
 336     },
 337
 338     // Baltic
 339     {
 340         /* unix    */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP},
 341         /* windows */ {wxFONTENCODING_CP1257, STOP},
 342         /* os2     */ {STOP},
 343         /* mac     */ {STOP}
 344     },
 345
 346     // Hebrew
 347     {
 348         /* unix    */ {wxFONTENCODING_ISO8859_8, STOP},
 349         /* windows */ {wxFONTENCODING_CP1255, STOP},
 350         /* os2     */ {STOP},
 351         /* mac     */ {STOP}
 352     },
 353
 354     // Greek
 355     {
 356         /* unix    */ {wxFONTENCODING_ISO8859_7, STOP},
 357         /* windows */ {wxFONTENCODING_CP1253, STOP},
 358         /* os2     */ {STOP},
 359         /* mac     */ {STOP}
 360     },
 361
 362     // Arabic
 363     {
 364         /* unix    */ {wxFONTENCODING_ISO8859_6, STOP},
 365         /* windows */ {wxFONTENCODING_CP1256, STOP},
 366         /* os2     */ {STOP},
 367         /* mac     */ {STOP}
 368     },
 369
 370     // Turkish
 371     {
 372         /* unix    */ {wxFONTENCODING_ISO8859_9, STOP},
 373         /* windows */ {wxFONTENCODING_CP1254, STOP},
 374         /* os2     */ {STOP},
 375         /* mac     */ {STOP}
 376     },
 377
 378     // Cyrillic
 379     {
 380         /* unix    */ {wxFONTENCODING_KOI8, wxFONTENCODING_ISO8859_5, STOP},
 381         /* windows */ {wxFONTENCODING_CP1251, STOP},
 382         /* os2     */ {STOP},
 383         /* mac     */ {STOP}
 384     },
 385
 386     {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
 387     /* no, _not_ Arnold! */
 388 };
 389
 390
 391 static bool FindEncoding(const wxFontEncodingArray& arr, wxFontEncoding f)
 392 {
 393     for (wxFontEncodingArray::const_iterator it = arr.begin(), en = arr.end();
 394          it != en; ++it)
 395         if (*it == f)
 396             return true;
 397     return false;
 398 }
 399
 400 wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
 401 {
 402     if (platform == wxPLATFORM_CURRENT)
 403     {
 404 #if defined(__WXMSW__)
 405         platform = wxPLATFORM_WINDOWS;
 406 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
 407         platform = wxPLATFORM_UNIX;
 408 #elif defined(__WXOS2__)
 409         platform = wxPLATFORM_OS2;
 410 #elif defined(__WXMAC__)
 411         platform = wxPLATFORM_MAC;
 412 #endif
 413     }
 414
 415     int i, clas, e ;
 416     wxFontEncoding *f;
 417     wxFontEncodingArray arr;
 418
 419     clas = 0;
 420     while (EquivalentEncodings[clas][0][0] != STOP)
 421     {
 422         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 423             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 424                 if (EquivalentEncodings[clas][i][e] == enc)
 425                 {
 426                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 427                         if (*f == enc) arr.push_back(enc);
 428                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 429                         if (!FindEncoding(arr, *f)) arr.push_back(*f);
 430                     i = NUM_OF_PLATFORMS/*hack*/; break;
 431                 }
 432         clas++;
 433     }
 434
 435     return arr;
 436 }
 437
 438
 439
 440 wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
 441 {
 442     int i, clas, e, j ;
 443     wxFontEncoding *f;
 444     wxFontEncodingArray arr;
 445
 446     arr = GetPlatformEquivalents(enc); // we want them to be first items in array
 447
 448     clas = 0;
 449     while (EquivalentEncodings[clas][0][0] != STOP)
 450     {
 451         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 452             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 453                 if (EquivalentEncodings[clas][i][e] == enc)
 454                 {
 455                     for (j = 0; j < NUM_OF_PLATFORMS; j++)
 456                         for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
 457                             if (!FindEncoding(arr, *f)) arr.push_back(*f);
 458                     i = NUM_OF_PLATFORMS/*hack*/; break;
 459                 }
 460         clas++;
 461     }
 462
 463     return arr;
 464 }
 465
 466 #endif // wxUSE_FONTMAP