src/common/encconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        encconv.cpp
   3 // Purpose:     wxEncodingConverter class for converting between different
   4 //              font encodings
   5 // Author:      Vaclav Slavik
   6 // Copyright:   (c) 1999 Vaclav Slavik
   7 // Licence:     wxWindows Licence
   8 /////////////////////////////////////////////////////////////////////////////
   9
  10 #ifdef __GNUG__
  11 #pragma implementation "encconv.h"
  12 #endif
  13
  14 // For compilers that support precompilation, includes "wx.h".
  15 #include "wx/wxprec.h"
  16
  17 #ifdef __BORLANDC__
  18   #pragma hdrstop
  19 #endif
  20
  21 #include "wx/encconv.h"
  22
  23 #include <stdlib.h>
  24
  25 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
  26 #ifdef __BORLANDC__
  27 #include "../common/unictabl.inc"
  28 #else
  29 #include "unictabl.inc"
  30 #endif
  31
  32 #if wxUSE_WCHAR_T
  33 typedef wchar_t tchar;
  34 #else
  35 typedef char tchar;
  36 #endif
  37
  38 static wxUint16* LINKAGEMODE GetEncTable(wxFontEncoding enc)
  39 {
  40     for (int i = 0; encodings_list[i].table != NULL; i++)
  41     {
  42         if (encodings_list[i].encoding == enc)
  43             return encodings_list[i].table;
  44     }
  45     return NULL;
  46 }
  47
  48 typedef struct {
  49     wxUint16 u;
  50     wxUint8  c;
  51 } CharsetItem;
  52
  53
  54
  55 static int LINKAGEMODE CompareCharsetItems(const void *i1, const void *i2)
  56 {
  57     return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
  58 }
  59
  60
  61 static CharsetItem* LINKAGEMODE BuildReverseTable(wxUint16 *tbl)
  62 {
  63     CharsetItem *rev = new CharsetItem[128];
  64
  65     for (int i = 0; i < 128; i++)
  66         rev[i].c = 128 + i, rev[i].u = tbl[i];
  67
  68     qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
  69
  70     return rev;
  71 }
  72
  73
  74
  75 wxEncodingConverter::wxEncodingConverter()
  76 {
  77     m_Table = NULL;
  78     m_UnicodeInput = m_UnicodeOutput = FALSE;
  79     m_JustCopy = FALSE;
  80 }
  81
  82
  83
  84 bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
  85 {
  86     unsigned i;
  87     wxUint16 *in_tbl = NULL, *out_tbl = NULL;
  88
  89     if (m_Table) {delete[] m_Table; m_Table = NULL;}
  90
  91 #if !wxUSE_WCHAR_T
  92     if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return FALSE;
  93 #endif
  94
  95     if (input_enc == output_enc) {m_JustCopy = TRUE; return TRUE;}
  96
  97     m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
  98     m_JustCopy = FALSE;
  99
 100     if (input_enc == wxFONTENCODING_UNICODE)
 101     {
 102         if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
 103
 104         m_Table = new tchar[65536];
 105         for (i = 0; i < 128; i++)  m_Table[i] = (tchar)i; // 7bit ASCII
 106         for (i = 128; i < 65536; i++)  m_Table[i] = (tchar)'?';
 107                 // FIXME - this should be character that means `unicode to charset' impossible, not '?'
 108
 109         if (method == wxCONVERT_SUBSTITUTE)
 110         {
 111             for (i = 0; i < encoding_unicode_fallback_count; i++)
 112                 m_Table[encoding_unicode_fallback[i].c] = (tchar) encoding_unicode_fallback[i].s;
 113         }
 114
 115         for (i = 0; i < 128; i++)
 116             m_Table[out_tbl[i]] = (tchar)(128 + i);
 117
 118         m_UnicodeInput = TRUE;
 119         return TRUE;
 120     }
 121
 122     else
 123     {
 124         if ((in_tbl = GetEncTable(input_enc)) == NULL) return FALSE;
 125         if (output_enc != wxFONTENCODING_UNICODE)
 126             if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
 127
 128         m_UnicodeInput = FALSE;
 129
 130         m_Table = new tchar[256];
 131         for (i = 0; i < 128; i++)  m_Table[i] = (tchar)i; // 7bit ASCII
 132
 133         if (output_enc == wxFONTENCODING_UNICODE)
 134         {
 135             for (i = 0; i < 128; i++)  m_Table[128 + i] = (tchar)in_tbl[i];
 136             return TRUE;
 137         }
 138         else
 139         {
 140             CharsetItem *rev = BuildReverseTable(out_tbl);
 141             CharsetItem *item, key;
 142
 143             for (i = 0; i < 128; i++)
 144             {
 145                 key.u = in_tbl[i];
 146                 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
 147                 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
 148                     item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
 149                                 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
 150                 if (item)
 151                     m_Table[128 + i] = (tchar)item -> c;
 152                 else
 153                     m_Table[128 + i] = 128 + i; // don't know => don't touch
 154             }
 155
 156             delete[] rev;
 157             return TRUE;
 158         }
 159     }
 160 }
 161
 162
 163
 164 void wxEncodingConverter::Convert(const char* input, char* output)
 165 {
 166     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 167     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 168
 169     const char *i;
 170     char *o;
 171
 172     if (m_JustCopy)
 173     {
 174         strcpy(output, input);
 175         return;
 176     }
 177
 178     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 179
 180     for (i = input, o = output; *i != 0;)
 181         *(o++) = (char)(m_Table[(wxUint8)*(i++)]);
 182     *o = 0;
 183 }
 184
 185
 186 #if wxUSE_WCHAR_T
 187
 188 void wxEncodingConverter::Convert(const char* input, wchar_t* output)
 189 {
 190     wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
 191     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 192
 193     const char *i;
 194     wchar_t *o;
 195
 196     if (m_JustCopy)
 197     {
 198         for (i = input, o = output; *i != 0;)
 199             *(o++) = (wchar_t)(*(i++));
 200         *o = 0;
 201         return;
 202     }
 203
 204     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 205
 206     for (i = input, o = output; *i != 0;)
 207         *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
 208     *o = 0;
 209 }
 210
 211
 212
 213 void wxEncodingConverter::Convert(const wchar_t* input, char* output)
 214 {
 215     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 216     wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
 217
 218     const wchar_t *i;
 219     char *o;
 220
 221     if (m_JustCopy)
 222     {
 223         for (i = input, o = output; *i != 0;)
 224             *(o++) = (char)(*(i++));
 225         *o = 0;
 226         return;
 227     }
 228
 229     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 230
 231     for (i = input, o = output; *i != 0;)
 232         *(o++) = (char)(m_Table[(wxUint16)*(i++)]);
 233     *o = 0;
 234 }
 235
 236
 237
 238 void wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output)
 239 {
 240     wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
 241     wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
 242
 243     const wchar_t *i;
 244     wchar_t *o;
 245
 246     if (m_JustCopy)
 247     {
 248         // wcscpy() is not guaranteed to exist
 249         for (i = input, o = output; *i != 0;)
 250             *(o++) = (*(i++));
 251         *o = 0;
 252         return;
 253     }
 254
 255     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 256
 257     for (i = input, o = output; *i != 0;)
 258         *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
 259     *o = 0;
 260 }
 261
 262 #endif // wxUSE_WCHAR_T
 263
 264
 265 wxString wxEncodingConverter::Convert(const wxString& input)
 266 {
 267     if (m_JustCopy) return input;
 268
 269     wxString s;
 270     const wxChar *i;
 271
 272     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 273
 274     if (m_UnicodeInput)
 275         for (i = input.c_str(); *i != 0; i++)
 276             s << (wxChar)(m_Table[(wxUint16)*i]);
 277     else
 278         for (i = input.c_str(); *i != 0; i++)
 279             s << (wxChar)(m_Table[(wxUint8)*i]);
 280     return s;
 281 }
 282
 283
 284
 285
 286
 287
 288
 289 // Following tables describe classes of encoding equivalence.
 290 //
 291
 292 #define STOP wxFONTENCODING_SYSTEM
 293
 294 #define NUM_OF_PLATFORMS  4 /*must conform to enum wxPLATFORM_XXXX !!!*/
 295 #define ENC_PER_PLATFORM  5
 296            // max no. of encodings for one language used on one platform
 297            // Anybody thinks 5 is not enough? ;-)
 298
 299 static wxFontEncoding
 300     EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
 301
 302     // *** Please put more common encodings as first! ***
 303
 304     // Western European
 305     {
 306         /* unix    */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
 307         /* windows */ {wxFONTENCODING_CP1252, STOP},
 308         /* os2     */ {STOP},
 309         /* mac     */ {STOP}
 310     },
 311
 312     // Central European
 313     {
 314         /* unix    */ {wxFONTENCODING_ISO8859_2, STOP},
 315         /* windows */ {wxFONTENCODING_CP1250, STOP},
 316         /* os2     */ {STOP},
 317         /* mac     */ {STOP}
 318     },
 319
 320     // Baltic
 321     {
 322         /* unix    */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP},
 323         /* windows */ {wxFONTENCODING_CP1257, STOP},
 324         /* os2     */ {STOP},
 325         /* mac     */ {STOP}
 326     },
 327
 328     // Hebrew
 329     {
 330         /* unix    */ {wxFONTENCODING_ISO8859_8, STOP},
 331         /* windows */ {wxFONTENCODING_CP1255, STOP},
 332         /* os2     */ {STOP},
 333         /* mac     */ {STOP}
 334     },
 335
 336     // Greek
 337     {
 338         /* unix    */ {wxFONTENCODING_ISO8859_7, STOP},
 339         /* windows */ {wxFONTENCODING_CP1253, STOP},
 340         /* os2     */ {STOP},
 341         /* mac     */ {STOP}
 342     },
 343
 344     // Arabic
 345     {
 346         /* unix    */ {wxFONTENCODING_ISO8859_6, STOP},
 347         /* windows */ {wxFONTENCODING_CP1256, STOP},
 348         /* os2     */ {STOP},
 349         /* mac     */ {STOP}
 350     },
 351
 352     // Turkish
 353     {
 354         /* unix    */ {wxFONTENCODING_ISO8859_9, STOP},
 355         /* windows */ {wxFONTENCODING_CP1254, STOP},
 356         /* os2     */ {STOP},
 357         /* mac     */ {STOP}
 358     },
 359
 360     // Cyrillic
 361     {
 362         /* unix    */ {wxFONTENCODING_KOI8, wxFONTENCODING_ISO8859_5, STOP},
 363         /* windows */ {wxFONTENCODING_CP1251, STOP},
 364         /* os2     */ {STOP},
 365         /* mac     */ {STOP}
 366     },
 367
 368     {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
 369     /* no, _not_ Arnold! */
 370 };
 371
 372
 373
 374
 375 wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
 376 {
 377     if (platform == wxPLATFORM_CURRENT)
 378     {
 379 #if defined(__WXMSW__)
 380         platform = wxPLATFORM_WINDOWS;
 381 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
 382         platform = wxPLATFORM_UNIX;
 383 #elif defined(__WXOS2__)
 384         platform = wxPLATFORM_OS2;
 385 #elif defined(__WXMAC__)
 386         platform = wxPLATFORM_MAC;
 387 #endif
 388     }
 389
 390     int i, clas, e ;
 391     wxFontEncoding *f;
 392     wxFontEncodingArray arr;
 393
 394     clas = 0;
 395     while (EquivalentEncodings[clas][0][0] != STOP)
 396     {
 397         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 398             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 399                 if (EquivalentEncodings[clas][i][e] == enc)
 400                 {
 401                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 402                         if (*f == enc) arr.Add(enc);
 403                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 404                         if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
 405                     i = NUM_OF_PLATFORMS/*hack*/; break;
 406                 }
 407         clas++;
 408     }
 409
 410     return arr;
 411 }
 412
 413
 414
 415 wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
 416 {
 417     int i, clas, e, j ;
 418     wxFontEncoding *f;
 419     wxFontEncodingArray arr;
 420
 421     arr = GetPlatformEquivalents(enc); // we want them to be first items in array
 422
 423     clas = 0;
 424     while (EquivalentEncodings[clas][0][0] != STOP)
 425     {
 426         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 427             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 428                 if (EquivalentEncodings[clas][i][e] == enc)
 429                 {
 430                     for (j = 0; j < NUM_OF_PLATFORMS; j++)
 431                         for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
 432                             if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
 433                     i = NUM_OF_PLATFORMS/*hack*/; break;
 434                 }
 435         clas++;
 436     }
 437
 438     return arr;
 439 }