src/common/encconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        encconv.cpp
   3 // Purpose:     wxEncodingConverter class for converting between different
   4 //              font encodings
   5 // Author:      Vaclav Slavik
   6 // Copyright:   (c) 1999 Vaclav Slavik
   7 // Licence:     wxWindows Licence
   8 /////////////////////////////////////////////////////////////////////////////
   9
  10 #ifdef __GNUG__
  11 #pragma implementation "encconv.h"
  12 #endif
  13
  14 // For compilers that support precompilation, includes "wx.h".
  15 #include "wx/wxprec.h"
  16
  17 #ifdef __BORLANDC__
  18   #pragma hdrstop
  19 #endif
  20
  21 #if wxUSE_FONTMAP
  22
  23 #include "wx/encconv.h"
  24
  25 #include <stdlib.h>
  26 #ifdef __WINE__
  27 #include <search.h>
  28 #endif
  29
  30 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
  31 #ifdef __BORLANDC__
  32 #include "../common/unictabl.inc"
  33 #else
  34 #include "unictabl.inc"
  35 #endif
  36
  37 #if wxUSE_WCHAR_T
  38 typedef wchar_t tchar;
  39 #else
  40 typedef char tchar;
  41 #endif
  42
  43 static wxUint16* LINKAGEMODE GetEncTable(wxFontEncoding enc)
  44 {
  45     for (int i = 0; encodings_list[i].table != NULL; i++)
  46     {
  47         if (encodings_list[i].encoding == enc)
  48             return encodings_list[i].table;
  49     }
  50     return NULL;
  51 }
  52
  53 typedef struct {
  54     wxUint16 u;
  55     wxUint8  c;
  56 } CharsetItem;
  57
  58
  59
  60 extern "C" int LINKAGEMODE CompareCharsetItems(const void *i1, const void *i2)
  61 {
  62     return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
  63 }
  64
  65
  66 static CharsetItem* LINKAGEMODE BuildReverseTable(wxUint16 *tbl)
  67 {
  68     CharsetItem *rev = new CharsetItem[128];
  69
  70     for (int i = 0; i < 128; i++)
  71         rev[i].c = 128 + i, rev[i].u = tbl[i];
  72
  73     qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
  74
  75     return rev;
  76 }
  77
  78
  79
  80 wxEncodingConverter::wxEncodingConverter()
  81 {
  82     m_Table = NULL;
  83     m_UnicodeInput = m_UnicodeOutput = FALSE;
  84     m_JustCopy = FALSE;
  85 }
  86
  87
  88
  89 bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
  90 {
  91     unsigned i;
  92     wxUint16 *in_tbl = NULL, *out_tbl = NULL;
  93
  94     if (m_Table) {delete[] m_Table; m_Table = NULL;}
  95
  96 #if !wxUSE_WCHAR_T
  97     if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return FALSE;
  98 #endif
  99
 100     if (input_enc == output_enc) {m_JustCopy = TRUE; return TRUE;}
 101
 102     m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
 103     m_JustCopy = FALSE;
 104
 105     if (input_enc == wxFONTENCODING_UNICODE)
 106     {
 107         if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
 108
 109         m_Table = new tchar[65536];
 110         for (i = 0; i < 128; i++)  m_Table[i] = (tchar)i; // 7bit ASCII
 111         for (i = 128; i < 65536; i++)  m_Table[i] = (tchar)'?';
 112                 // FIXME - this should be character that means `unicode to charset' impossible, not '?'
 113
 114         if (method == wxCONVERT_SUBSTITUTE)
 115         {
 116             for (i = 0; i < encoding_unicode_fallback_count; i++)
 117                 m_Table[encoding_unicode_fallback[i].c] = (tchar) encoding_unicode_fallback[i].s;
 118         }
 119
 120         for (i = 0; i < 128; i++)
 121             m_Table[out_tbl[i]] = (tchar)(128 + i);
 122
 123         m_UnicodeInput = TRUE;
 124         return TRUE;
 125     }
 126
 127     else
 128     {
 129         if ((in_tbl = GetEncTable(input_enc)) == NULL) return FALSE;
 130         if (output_enc != wxFONTENCODING_UNICODE)
 131             if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
 132
 133         m_UnicodeInput = FALSE;
 134
 135         m_Table = new tchar[256];
 136         for (i = 0; i < 128; i++)  m_Table[i] = (tchar)i; // 7bit ASCII
 137
 138         if (output_enc == wxFONTENCODING_UNICODE)
 139         {
 140             for (i = 0; i < 128; i++)  m_Table[128 + i] = (tchar)in_tbl[i];
 141             return TRUE;
 142         }
 143         else
 144         {
 145             CharsetItem *rev = BuildReverseTable(out_tbl);
 146             CharsetItem *item;
 147             CharsetItem key;
 148
 149             for (i = 0; i < 128; i++)
 150             {
 151                 key.u = in_tbl[i];
 152                 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
 153                 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
 154                     item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
 155                                 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
 156                 if (item)
 157                     m_Table[128 + i] = (tchar)item -> c;
 158                 else
 159 #if wxUSE_WCHAR_T
 160                     m_Table[128 + i] = (wchar_t)(128 + i);
 161 #else
 162                     m_Table[128 + i] = (char)(128 + i);
 163 #endif
 164             }
 165
 166             delete[] rev;
 167             return TRUE;
 168         }
 169     }
 170 }
 171
 172
 173
 174 void wxEncodingConverter::Convert(const char* input, char* output)
 175 {
 176     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 177     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 178
 179     const char *i;
 180     char *o;
 181
 182     if (m_JustCopy)
 183     {
 184         strcpy(output, input);
 185         return;
 186     }
 187
 188     wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 189
 190     for (i = input, o = output; *i != 0;)
 191         *(o++) = (char)(m_Table[(wxUint8)*(i++)]);
 192     *o = 0;
 193 }
 194
 195
 196 #if wxUSE_WCHAR_T
 197
 198 void wxEncodingConverter::Convert(const char* input, wchar_t* output)
 199 {
 200     wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
 201     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 202
 203     const char *i;
 204     wchar_t *o;
 205
 206     if (m_JustCopy)
 207     {
 208         for (i = input, o = output; *i != 0;)
 209             *(o++) = (wchar_t)(*(i++));
 210         *o = 0;
 211         return;
 212     }
 213
 214     wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 215
 216     for (i = input, o = output; *i != 0;)
 217         *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
 218     *o = 0;
 219 }
 220
 221
 222
 223 void wxEncodingConverter::Convert(const wchar_t* input, char* output)
 224 {
 225     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 226     wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
 227
 228     const wchar_t *i;
 229     char *o;
 230
 231     if (m_JustCopy)
 232     {
 233         for (i = input, o = output; *i != 0;)
 234             *(o++) = (char)(*(i++));
 235         *o = 0;
 236         return;
 237     }
 238
 239     wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 240
 241     for (i = input, o = output; *i != 0;)
 242         *(o++) = (char)(m_Table[(wxUint16)*(i++)]);
 243     *o = 0;
 244 }
 245
 246
 247
 248 void wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output)
 249 {
 250     wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
 251     wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
 252
 253     const wchar_t *i;
 254     wchar_t *o;
 255
 256     if (m_JustCopy)
 257     {
 258         // wcscpy() is not guaranteed to exist
 259         for (i = input, o = output; *i != 0;)
 260             *(o++) = (*(i++));
 261         *o = 0;
 262         return;
 263     }
 264
 265     wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 266
 267     for (i = input, o = output; *i != 0;)
 268         *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
 269     *o = 0;
 270 }
 271
 272 #endif // wxUSE_WCHAR_T
 273
 274
 275 wxString wxEncodingConverter::Convert(const wxString& input)
 276 {
 277     if (m_JustCopy) return input;
 278
 279     wxString s;
 280     const wxChar *i;
 281
 282     wxCHECK_MSG(m_Table != NULL, s,
 283                 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 284
 285     if (m_UnicodeInput)
 286     {
 287         for (i = input.c_str(); *i != 0; i++)
 288             s << (wxChar)(m_Table[(wxUint16)*i]);
 289     }
 290     else
 291     {
 292         for (i = input.c_str(); *i != 0; i++)
 293             s << (wxChar)(m_Table[(wxUint8)*i]);
 294     }
 295
 296     return s;
 297 }
 298
 299
 300
 301
 302
 303
 304
 305 // Following tables describe classes of encoding equivalence.
 306 //
 307
 308 #define STOP wxFONTENCODING_SYSTEM
 309
 310 #define NUM_OF_PLATFORMS  4 /*must conform to enum wxPLATFORM_XXXX !!!*/
 311 #define ENC_PER_PLATFORM  5
 312            // max no. of encodings for one language used on one platform
 313            // Anybody thinks 5 is not enough? ;-)
 314
 315 static wxFontEncoding
 316     EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
 317
 318     // *** Please put more common encodings as first! ***
 319
 320     // Western European
 321     {
 322         /* unix    */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
 323         /* windows */ {wxFONTENCODING_CP1252, STOP},
 324         /* os2     */ {STOP},
 325         /* mac     */ {STOP}
 326     },
 327
 328     // Central European
 329     {
 330         /* unix    */ {wxFONTENCODING_ISO8859_2, STOP},
 331         /* windows */ {wxFONTENCODING_CP1250, STOP},
 332         /* os2     */ {STOP},
 333         /* mac     */ {STOP}
 334     },
 335
 336     // Baltic
 337     {
 338         /* unix    */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP},
 339         /* windows */ {wxFONTENCODING_CP1257, STOP},
 340         /* os2     */ {STOP},
 341         /* mac     */ {STOP}
 342     },
 343
 344     // Hebrew
 345     {
 346         /* unix    */ {wxFONTENCODING_ISO8859_8, STOP},
 347         /* windows */ {wxFONTENCODING_CP1255, STOP},
 348         /* os2     */ {STOP},
 349         /* mac     */ {STOP}
 350     },
 351
 352     // Greek
 353     {
 354         /* unix    */ {wxFONTENCODING_ISO8859_7, STOP},
 355         /* windows */ {wxFONTENCODING_CP1253, STOP},
 356         /* os2     */ {STOP},
 357         /* mac     */ {STOP}
 358     },
 359
 360     // Arabic
 361     {
 362         /* unix    */ {wxFONTENCODING_ISO8859_6, STOP},
 363         /* windows */ {wxFONTENCODING_CP1256, STOP},
 364         /* os2     */ {STOP},
 365         /* mac     */ {STOP}
 366     },
 367
 368     // Turkish
 369     {
 370         /* unix    */ {wxFONTENCODING_ISO8859_9, STOP},
 371         /* windows */ {wxFONTENCODING_CP1254, STOP},
 372         /* os2     */ {STOP},
 373         /* mac     */ {STOP}
 374     },
 375
 376     // Cyrillic
 377     {
 378         /* unix    */ {wxFONTENCODING_KOI8, wxFONTENCODING_ISO8859_5, STOP},
 379         /* windows */ {wxFONTENCODING_CP1251, STOP},
 380         /* os2     */ {STOP},
 381         /* mac     */ {STOP}
 382     },
 383
 384     {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
 385     /* no, _not_ Arnold! */
 386 };
 387
 388
 389
 390
 391 wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
 392 {
 393     if (platform == wxPLATFORM_CURRENT)
 394     {
 395 #if defined(__WXMSW__)
 396         platform = wxPLATFORM_WINDOWS;
 397 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
 398         platform = wxPLATFORM_UNIX;
 399 #elif defined(__WXOS2__)
 400         platform = wxPLATFORM_OS2;
 401 #elif defined(__WXMAC__)
 402         platform = wxPLATFORM_MAC;
 403 #endif
 404     }
 405
 406     int i, clas, e ;
 407     wxFontEncoding *f;
 408     wxFontEncodingArray arr;
 409
 410     clas = 0;
 411     while (EquivalentEncodings[clas][0][0] != STOP)
 412     {
 413         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 414             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 415                 if (EquivalentEncodings[clas][i][e] == enc)
 416                 {
 417                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 418                         if (*f == enc) arr.Add(enc);
 419                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 420                         if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
 421                     i = NUM_OF_PLATFORMS/*hack*/; break;
 422                 }
 423         clas++;
 424     }
 425
 426     return arr;
 427 }
 428
 429
 430
 431 wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
 432 {
 433     int i, clas, e, j ;
 434     wxFontEncoding *f;
 435     wxFontEncodingArray arr;
 436
 437     arr = GetPlatformEquivalents(enc); // we want them to be first items in array
 438
 439     clas = 0;
 440     while (EquivalentEncodings[clas][0][0] != STOP)
 441     {
 442         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 443             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 444                 if (EquivalentEncodings[clas][i][e] == enc)
 445                 {
 446                     for (j = 0; j < NUM_OF_PLATFORMS; j++)
 447                         for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
 448                             if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
 449                     i = NUM_OF_PLATFORMS/*hack*/; break;
 450                 }
 451         clas++;
 452     }
 453
 454     return arr;
 455 }
 456
 457 #endif // wxUSE_FONTMAP