src/common/encconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        encconv.cpp
   3 // Purpose:     wxEncodingConverter class for converting between different
   4 //              font encodings
   5 // Author:      Vaclav Slavik
   6 // Copyright:   (c) 1999 Vaclav Slavik
   7 // Licence:     wxWindows Licence
   8 /////////////////////////////////////////////////////////////////////////////
   9
  10 #ifdef __GNUG__
  11 #pragma implementation "encconv.h"
  12 #endif
  13
  14 // For compilers that support precompilation, includes "wx.h".
  15 #include "wx/wxprec.h"
  16
  17 #ifdef __BORLANDC__
  18   #pragma hdrstop
  19 #endif
  20
  21 #include "wx/encconv.h"
  22
  23 #include <stdlib.h>
  24
  25 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
  26 #ifdef __BORLANDC__
  27 #include "../common/unictabl.inc"
  28 #else
  29 #include "unictabl.inc"
  30 #endif
  31
  32 #if wxUSE_WCHAR_T
  33 typedef wchar_t tchar;
  34 #else
  35 typedef char tchar;
  36 #endif
  37
  38 static wxUint16* LINKAGEMODE GetEncTable(wxFontEncoding enc)
  39 {
  40     for (int i = 0; encodings_list[i].table != NULL; i++)
  41     {
  42         if (encodings_list[i].encoding == enc)
  43             return encodings_list[i].table;
  44     }
  45     return NULL;
  46 }
  47
  48 typedef struct {
  49     wxUint16 u;
  50     wxUint8  c;
  51 } CharsetItem;
  52
  53
  54
  55 static int LINKAGEMODE CompareCharsetItems(const void *i1, const void *i2)
  56 {
  57     return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
  58 }
  59
  60
  61 static CharsetItem* LINKAGEMODE BuildReverseTable(wxUint16 *tbl)
  62 {
  63     CharsetItem *rev = new CharsetItem[128];
  64
  65     for (int i = 0; i < 128; i++)
  66         rev[i].c = 128 + i, rev[i].u = tbl[i];
  67
  68     qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
  69
  70     return rev;
  71 }
  72
  73
  74
  75 wxEncodingConverter::wxEncodingConverter()
  76 {
  77     m_Table = NULL;
  78     m_UnicodeInput = m_UnicodeOutput = FALSE;
  79     m_JustCopy = FALSE;
  80 }
  81
  82
  83
  84 bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
  85 {
  86     unsigned i;
  87     wxUint16 *in_tbl = NULL, *out_tbl = NULL;
  88
  89     if (m_Table) {delete[] m_Table; m_Table = NULL;}
  90
  91 #if !wxUSE_WCHAR_T
  92     if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return FALSE;
  93 #endif
  94
  95     if (input_enc == output_enc) {m_JustCopy = TRUE; return TRUE;}
  96
  97     m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
  98     m_JustCopy = FALSE;
  99
 100     if (input_enc == wxFONTENCODING_UNICODE)
 101     {
 102         if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
 103
 104         m_Table = new tchar[65536];
 105         for (i = 0; i < 128; i++)  m_Table[i] = (tchar)i; // 7bit ASCII
 106         for (i = 128; i < 65536; i++)  m_Table[i] = (tchar)'?';
 107                 // FIXME - this should be character that means `unicode to charset' impossible, not '?'
 108
 109         if (method == wxCONVERT_SUBSTITUTE)
 110         {
 111             for (i = 0; i < encoding_unicode_fallback_count; i++)
 112                 m_Table[encoding_unicode_fallback[i].c] = (tchar) encoding_unicode_fallback[i].s;
 113         }
 114
 115         for (i = 0; i < 128; i++)
 116             m_Table[out_tbl[i]] = (tchar)(128 + i);
 117
 118         m_UnicodeInput = TRUE;
 119         return TRUE;
 120     }
 121
 122     else
 123     {
 124         if ((in_tbl = GetEncTable(input_enc)) == NULL) return FALSE;
 125         if (output_enc != wxFONTENCODING_UNICODE)
 126             if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
 127
 128         m_UnicodeInput = FALSE;
 129
 130         m_Table = new tchar[256];
 131         for (i = 0; i < 128; i++)  m_Table[i] = (tchar)i; // 7bit ASCII
 132
 133         if (output_enc == wxFONTENCODING_UNICODE)
 134         {
 135             for (i = 0; i < 128; i++)  m_Table[128 + i] = (tchar)in_tbl[i];
 136             return TRUE;
 137         }
 138         else
 139         {
 140             CharsetItem *rev = BuildReverseTable(out_tbl);
 141             CharsetItem *item;
 142             CharsetItem key;
 143
 144             for (i = 0; i < 128; i++)
 145             {
 146                 key.u = in_tbl[i];
 147                 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
 148                 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
 149                     item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
 150                                 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
 151                 if (item)
 152                     m_Table[128 + i] = (tchar)item -> c;
 153                 else
 154 #if wxUSE_WCHAR_T
 155                     m_Table[128 + i] = (wchar_t)(128 + i);
 156 #else
 157                     m_Table[128 + i] = (char)(128 + i);
 158 #endif
 159             }
 160
 161             delete[] rev;
 162             return TRUE;
 163         }
 164     }
 165 }
 166
 167
 168
 169 void wxEncodingConverter::Convert(const char* input, char* output)
 170 {
 171     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 172     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 173
 174     const char *i;
 175     char *o;
 176
 177     if (m_JustCopy)
 178     {
 179         strcpy(output, input);
 180         return;
 181     }
 182
 183     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 184
 185     for (i = input, o = output; *i != 0;)
 186         *(o++) = (char)(m_Table[(wxUint8)*(i++)]);
 187     *o = 0;
 188 }
 189
 190
 191 #if wxUSE_WCHAR_T
 192
 193 void wxEncodingConverter::Convert(const char* input, wchar_t* output)
 194 {
 195     wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
 196     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 197
 198     const char *i;
 199     wchar_t *o;
 200
 201     if (m_JustCopy)
 202     {
 203         for (i = input, o = output; *i != 0;)
 204             *(o++) = (wchar_t)(*(i++));
 205         *o = 0;
 206         return;
 207     }
 208
 209     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 210
 211     for (i = input, o = output; *i != 0;)
 212         *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
 213     *o = 0;
 214 }
 215
 216
 217
 218 void wxEncodingConverter::Convert(const wchar_t* input, char* output)
 219 {
 220     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 221     wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
 222
 223     const wchar_t *i;
 224     char *o;
 225
 226     if (m_JustCopy)
 227     {
 228         for (i = input, o = output; *i != 0;)
 229             *(o++) = (char)(*(i++));
 230         *o = 0;
 231         return;
 232     }
 233
 234     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 235
 236     for (i = input, o = output; *i != 0;)
 237         *(o++) = (char)(m_Table[(wxUint16)*(i++)]);
 238     *o = 0;
 239 }
 240
 241
 242
 243 void wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output)
 244 {
 245     wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
 246     wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
 247
 248     const wchar_t *i;
 249     wchar_t *o;
 250
 251     if (m_JustCopy)
 252     {
 253         // wcscpy() is not guaranteed to exist
 254         for (i = input, o = output; *i != 0;)
 255             *(o++) = (*(i++));
 256         *o = 0;
 257         return;
 258     }
 259
 260     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 261
 262     for (i = input, o = output; *i != 0;)
 263         *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
 264     *o = 0;
 265 }
 266
 267 #endif // wxUSE_WCHAR_T
 268
 269
 270 wxString wxEncodingConverter::Convert(const wxString& input)
 271 {
 272     if (m_JustCopy) return input;
 273
 274     wxString s;
 275     const wxChar *i;
 276
 277     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 278
 279     if (m_UnicodeInput)
 280         for (i = input.c_str(); *i != 0; i++)
 281             s << (wxChar)(m_Table[(wxUint16)*i]);
 282     else
 283         for (i = input.c_str(); *i != 0; i++)
 284             s << (wxChar)(m_Table[(wxUint8)*i]);
 285     return s;
 286 }
 287
 288
 289
 290
 291
 292
 293
 294 // Following tables describe classes of encoding equivalence.
 295 //
 296
 297 #define STOP wxFONTENCODING_SYSTEM
 298
 299 #define NUM_OF_PLATFORMS  4 /*must conform to enum wxPLATFORM_XXXX !!!*/
 300 #define ENC_PER_PLATFORM  5
 301            // max no. of encodings for one language used on one platform
 302            // Anybody thinks 5 is not enough? ;-)
 303
 304 static wxFontEncoding
 305     EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
 306
 307     // *** Please put more common encodings as first! ***
 308
 309     // Western European
 310     {
 311         /* unix    */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
 312         /* windows */ {wxFONTENCODING_CP1252, STOP},
 313         /* os2     */ {STOP},
 314         /* mac     */ {STOP}
 315     },
 316
 317     // Central European
 318     {
 319         /* unix    */ {wxFONTENCODING_ISO8859_2, STOP},
 320         /* windows */ {wxFONTENCODING_CP1250, STOP},
 321         /* os2     */ {STOP},
 322         /* mac     */ {STOP}
 323     },
 324
 325     // Baltic
 326     {
 327         /* unix    */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP},
 328         /* windows */ {wxFONTENCODING_CP1257, STOP},
 329         /* os2     */ {STOP},
 330         /* mac     */ {STOP}
 331     },
 332
 333     // Hebrew
 334     {
 335         /* unix    */ {wxFONTENCODING_ISO8859_8, STOP},
 336         /* windows */ {wxFONTENCODING_CP1255, STOP},
 337         /* os2     */ {STOP},
 338         /* mac     */ {STOP}
 339     },
 340
 341     // Greek
 342     {
 343         /* unix    */ {wxFONTENCODING_ISO8859_7, STOP},
 344         /* windows */ {wxFONTENCODING_CP1253, STOP},
 345         /* os2     */ {STOP},
 346         /* mac     */ {STOP}
 347     },
 348
 349     // Arabic
 350     {
 351         /* unix    */ {wxFONTENCODING_ISO8859_6, STOP},
 352         /* windows */ {wxFONTENCODING_CP1256, STOP},
 353         /* os2     */ {STOP},
 354         /* mac     */ {STOP}
 355     },
 356
 357     // Turkish
 358     {
 359         /* unix    */ {wxFONTENCODING_ISO8859_9, STOP},
 360         /* windows */ {wxFONTENCODING_CP1254, STOP},
 361         /* os2     */ {STOP},
 362         /* mac     */ {STOP}
 363     },
 364
 365     // Cyrillic
 366     {
 367         /* unix    */ {wxFONTENCODING_KOI8, wxFONTENCODING_ISO8859_5, STOP},
 368         /* windows */ {wxFONTENCODING_CP1251, STOP},
 369         /* os2     */ {STOP},
 370         /* mac     */ {STOP}
 371     },
 372
 373     {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
 374     /* no, _not_ Arnold! */
 375 };
 376
 377
 378
 379
 380 wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
 381 {
 382     if (platform == wxPLATFORM_CURRENT)
 383     {
 384 #if defined(__WXMSW__)
 385         platform = wxPLATFORM_WINDOWS;
 386 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
 387         platform = wxPLATFORM_UNIX;
 388 #elif defined(__WXOS2__)
 389         platform = wxPLATFORM_OS2;
 390 #elif defined(__WXMAC__)
 391         platform = wxPLATFORM_MAC;
 392 #endif
 393     }
 394
 395     int i, clas, e ;
 396     wxFontEncoding *f;
 397     wxFontEncodingArray arr;
 398
 399     clas = 0;
 400     while (EquivalentEncodings[clas][0][0] != STOP)
 401     {
 402         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 403             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 404                 if (EquivalentEncodings[clas][i][e] == enc)
 405                 {
 406                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 407                         if (*f == enc) arr.Add(enc);
 408                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 409                         if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
 410                     i = NUM_OF_PLATFORMS/*hack*/; break;
 411                 }
 412         clas++;
 413     }
 414
 415     return arr;
 416 }
 417
 418
 419
 420 wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
 421 {
 422     int i, clas, e, j ;
 423     wxFontEncoding *f;
 424     wxFontEncodingArray arr;
 425
 426     arr = GetPlatformEquivalents(enc); // we want them to be first items in array
 427
 428     clas = 0;
 429     while (EquivalentEncodings[clas][0][0] != STOP)
 430     {
 431         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 432             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 433                 if (EquivalentEncodings[clas][i][e] == enc)
 434                 {
 435                     for (j = 0; j < NUM_OF_PLATFORMS; j++)
 436                         for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
 437                             if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
 438                     i = NUM_OF_PLATFORMS/*hack*/; break;
 439                 }
 440         clas++;
 441     }
 442
 443     return arr;
 444 }