src/common/encconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        encconv.cpp
   3 // Purpose:     wxEncodingConverter class for converting between different
   4 //              font encodings
   5 // Author:      Vaclav Slavik
   6 // Copyright:   (c) 1999 Vaclav Slavik
   7 // Licence:     wxWindows Licence
   8 /////////////////////////////////////////////////////////////////////////////
   9
  10 #ifdef __GNUG__
  11 #pragma implementation "encconv.h"
  12 #endif
  13
  14 // For compilers that support precompilation, includes "wx.h".
  15 #include "wx/wxprec.h"
  16
  17 #ifdef __BORLANDC__
  18   #pragma hdrstop
  19 #endif
  20
  21 #if wxUSE_FONTMAP
  22
  23 #include "wx/encconv.h"
  24
  25 #include <stdlib.h>
  26
  27 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
  28 #ifdef __BORLANDC__
  29 #include "../common/unictabl.inc"
  30 #else
  31 #include "unictabl.inc"
  32 #endif
  33
  34 #if wxUSE_WCHAR_T
  35 typedef wchar_t tchar;
  36 #else
  37 typedef char tchar;
  38 #endif
  39
  40 static wxUint16* LINKAGEMODE GetEncTable(wxFontEncoding enc)
  41 {
  42     for (int i = 0; encodings_list[i].table != NULL; i++)
  43     {
  44         if (encodings_list[i].encoding == enc)
  45             return encodings_list[i].table;
  46     }
  47     return NULL;
  48 }
  49
  50 typedef struct {
  51     wxUint16 u;
  52     wxUint8  c;
  53 } CharsetItem;
  54
  55
  56
  57 extern "C" int LINKAGEMODE CompareCharsetItems(const void *i1, const void *i2)
  58 {
  59     return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
  60 }
  61
  62
  63 static CharsetItem* LINKAGEMODE BuildReverseTable(wxUint16 *tbl)
  64 {
  65     CharsetItem *rev = new CharsetItem[128];
  66
  67     for (int i = 0; i < 128; i++)
  68         rev[i].c = 128 + i, rev[i].u = tbl[i];
  69
  70     qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
  71
  72     return rev;
  73 }
  74
  75
  76
  77 wxEncodingConverter::wxEncodingConverter()
  78 {
  79     m_Table = NULL;
  80     m_UnicodeInput = m_UnicodeOutput = FALSE;
  81     m_JustCopy = FALSE;
  82 }
  83
  84
  85
  86 bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
  87 {
  88     unsigned i;
  89     wxUint16 *in_tbl = NULL, *out_tbl = NULL;
  90
  91     if (m_Table) {delete[] m_Table; m_Table = NULL;}
  92
  93 #if !wxUSE_WCHAR_T
  94     if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return FALSE;
  95 #endif
  96
  97     if (input_enc == output_enc) {m_JustCopy = TRUE; return TRUE;}
  98
  99     m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
 100     m_JustCopy = FALSE;
 101
 102     if (input_enc == wxFONTENCODING_UNICODE)
 103     {
 104         if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
 105
 106         m_Table = new tchar[65536];
 107         for (i = 0; i < 128; i++)  m_Table[i] = (tchar)i; // 7bit ASCII
 108         for (i = 128; i < 65536; i++)  m_Table[i] = (tchar)'?';
 109                 // FIXME - this should be character that means `unicode to charset' impossible, not '?'
 110
 111         if (method == wxCONVERT_SUBSTITUTE)
 112         {
 113             for (i = 0; i < encoding_unicode_fallback_count; i++)
 114                 m_Table[encoding_unicode_fallback[i].c] = (tchar) encoding_unicode_fallback[i].s;
 115         }
 116
 117         for (i = 0; i < 128; i++)
 118             m_Table[out_tbl[i]] = (tchar)(128 + i);
 119
 120         m_UnicodeInput = TRUE;
 121         return TRUE;
 122     }
 123
 124     else
 125     {
 126         if ((in_tbl = GetEncTable(input_enc)) == NULL) return FALSE;
 127         if (output_enc != wxFONTENCODING_UNICODE)
 128             if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
 129
 130         m_UnicodeInput = FALSE;
 131
 132         m_Table = new tchar[256];
 133         for (i = 0; i < 128; i++)  m_Table[i] = (tchar)i; // 7bit ASCII
 134
 135         if (output_enc == wxFONTENCODING_UNICODE)
 136         {
 137             for (i = 0; i < 128; i++)  m_Table[128 + i] = (tchar)in_tbl[i];
 138             return TRUE;
 139         }
 140         else
 141         {
 142             CharsetItem *rev = BuildReverseTable(out_tbl);
 143             CharsetItem *item;
 144             CharsetItem key;
 145
 146             for (i = 0; i < 128; i++)
 147             {
 148                 key.u = in_tbl[i];
 149                 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
 150                 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
 151                     item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
 152                                 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
 153                 if (item)
 154                     m_Table[128 + i] = (tchar)item -> c;
 155                 else
 156 #if wxUSE_WCHAR_T
 157                     m_Table[128 + i] = (wchar_t)(128 + i);
 158 #else
 159                     m_Table[128 + i] = (char)(128 + i);
 160 #endif
 161             }
 162
 163             delete[] rev;
 164             return TRUE;
 165         }
 166     }
 167 }
 168
 169
 170
 171 void wxEncodingConverter::Convert(const char* input, char* output)
 172 {
 173     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 174     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 175
 176     const char *i;
 177     char *o;
 178
 179     if (m_JustCopy)
 180     {
 181         strcpy(output, input);
 182         return;
 183     }
 184
 185     wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 186
 187     for (i = input, o = output; *i != 0;)
 188         *(o++) = (char)(m_Table[(wxUint8)*(i++)]);
 189     *o = 0;
 190 }
 191
 192
 193 #if wxUSE_WCHAR_T
 194
 195 void wxEncodingConverter::Convert(const char* input, wchar_t* output)
 196 {
 197     wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
 198     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 199
 200     const char *i;
 201     wchar_t *o;
 202
 203     if (m_JustCopy)
 204     {
 205         for (i = input, o = output; *i != 0;)
 206             *(o++) = (wchar_t)(*(i++));
 207         *o = 0;
 208         return;
 209     }
 210
 211     wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 212
 213     for (i = input, o = output; *i != 0;)
 214         *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
 215     *o = 0;
 216 }
 217
 218
 219
 220 void wxEncodingConverter::Convert(const wchar_t* input, char* output)
 221 {
 222     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 223     wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
 224
 225     const wchar_t *i;
 226     char *o;
 227
 228     if (m_JustCopy)
 229     {
 230         for (i = input, o = output; *i != 0;)
 231             *(o++) = (char)(*(i++));
 232         *o = 0;
 233         return;
 234     }
 235
 236     wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 237
 238     for (i = input, o = output; *i != 0;)
 239         *(o++) = (char)(m_Table[(wxUint16)*(i++)]);
 240     *o = 0;
 241 }
 242
 243
 244
 245 void wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output)
 246 {
 247     wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
 248     wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
 249
 250     const wchar_t *i;
 251     wchar_t *o;
 252
 253     if (m_JustCopy)
 254     {
 255         // wcscpy() is not guaranteed to exist
 256         for (i = input, o = output; *i != 0;)
 257             *(o++) = (*(i++));
 258         *o = 0;
 259         return;
 260     }
 261
 262     wxCHECK_RET(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 263
 264     for (i = input, o = output; *i != 0;)
 265         *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
 266     *o = 0;
 267 }
 268
 269 #endif // wxUSE_WCHAR_T
 270
 271
 272 wxString wxEncodingConverter::Convert(const wxString& input)
 273 {
 274     if (m_JustCopy) return input;
 275
 276     wxString s;
 277     const wxChar *i;
 278
 279     wxCHECK_MSG(m_Table != NULL, s,
 280                 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 281
 282     if (m_UnicodeInput)
 283     {
 284         for (i = input.c_str(); *i != 0; i++)
 285             s << (wxChar)(m_Table[(wxUint16)*i]);
 286     }
 287     else
 288     {
 289         for (i = input.c_str(); *i != 0; i++)
 290             s << (wxChar)(m_Table[(wxUint8)*i]);
 291     }
 292
 293     return s;
 294 }
 295
 296
 297
 298
 299
 300
 301
 302 // Following tables describe classes of encoding equivalence.
 303 //
 304
 305 #define STOP wxFONTENCODING_SYSTEM
 306
 307 #define NUM_OF_PLATFORMS  4 /*must conform to enum wxPLATFORM_XXXX !!!*/
 308 #define ENC_PER_PLATFORM  5
 309            // max no. of encodings for one language used on one platform
 310            // Anybody thinks 5 is not enough? ;-)
 311
 312 static wxFontEncoding
 313     EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
 314
 315     // *** Please put more common encodings as first! ***
 316
 317     // Western European
 318     {
 319         /* unix    */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
 320         /* windows */ {wxFONTENCODING_CP1252, STOP},
 321         /* os2     */ {STOP},
 322         /* mac     */ {STOP}
 323     },
 324
 325     // Central European
 326     {
 327         /* unix    */ {wxFONTENCODING_ISO8859_2, STOP},
 328         /* windows */ {wxFONTENCODING_CP1250, STOP},
 329         /* os2     */ {STOP},
 330         /* mac     */ {STOP}
 331     },
 332
 333     // Baltic
 334     {
 335         /* unix    */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP},
 336         /* windows */ {wxFONTENCODING_CP1257, STOP},
 337         /* os2     */ {STOP},
 338         /* mac     */ {STOP}
 339     },
 340
 341     // Hebrew
 342     {
 343         /* unix    */ {wxFONTENCODING_ISO8859_8, STOP},
 344         /* windows */ {wxFONTENCODING_CP1255, STOP},
 345         /* os2     */ {STOP},
 346         /* mac     */ {STOP}
 347     },
 348
 349     // Greek
 350     {
 351         /* unix    */ {wxFONTENCODING_ISO8859_7, STOP},
 352         /* windows */ {wxFONTENCODING_CP1253, STOP},
 353         /* os2     */ {STOP},
 354         /* mac     */ {STOP}
 355     },
 356
 357     // Arabic
 358     {
 359         /* unix    */ {wxFONTENCODING_ISO8859_6, STOP},
 360         /* windows */ {wxFONTENCODING_CP1256, STOP},
 361         /* os2     */ {STOP},
 362         /* mac     */ {STOP}
 363     },
 364
 365     // Turkish
 366     {
 367         /* unix    */ {wxFONTENCODING_ISO8859_9, STOP},
 368         /* windows */ {wxFONTENCODING_CP1254, STOP},
 369         /* os2     */ {STOP},
 370         /* mac     */ {STOP}
 371     },
 372
 373     // Cyrillic
 374     {
 375         /* unix    */ {wxFONTENCODING_KOI8, wxFONTENCODING_ISO8859_5, STOP},
 376         /* windows */ {wxFONTENCODING_CP1251, STOP},
 377         /* os2     */ {STOP},
 378         /* mac     */ {STOP}
 379     },
 380
 381     {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
 382     /* no, _not_ Arnold! */
 383 };
 384
 385
 386
 387
 388 wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
 389 {
 390     if (platform == wxPLATFORM_CURRENT)
 391     {
 392 #if defined(__WXMSW__)
 393         platform = wxPLATFORM_WINDOWS;
 394 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
 395         platform = wxPLATFORM_UNIX;
 396 #elif defined(__WXOS2__)
 397         platform = wxPLATFORM_OS2;
 398 #elif defined(__WXMAC__)
 399         platform = wxPLATFORM_MAC;
 400 #endif
 401     }
 402
 403     int i, clas, e ;
 404     wxFontEncoding *f;
 405     wxFontEncodingArray arr;
 406
 407     clas = 0;
 408     while (EquivalentEncodings[clas][0][0] != STOP)
 409     {
 410         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 411             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 412                 if (EquivalentEncodings[clas][i][e] == enc)
 413                 {
 414                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 415                         if (*f == enc) arr.Add(enc);
 416                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 417                         if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
 418                     i = NUM_OF_PLATFORMS/*hack*/; break;
 419                 }
 420         clas++;
 421     }
 422
 423     return arr;
 424 }
 425
 426
 427
 428 wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
 429 {
 430     int i, clas, e, j ;
 431     wxFontEncoding *f;
 432     wxFontEncodingArray arr;
 433
 434     arr = GetPlatformEquivalents(enc); // we want them to be first items in array
 435
 436     clas = 0;
 437     while (EquivalentEncodings[clas][0][0] != STOP)
 438     {
 439         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 440             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 441                 if (EquivalentEncodings[clas][i][e] == enc)
 442                 {
 443                     for (j = 0; j < NUM_OF_PLATFORMS; j++)
 444                         for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
 445                             if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
 446                     i = NUM_OF_PLATFORMS/*hack*/; break;
 447                 }
 448         clas++;
 449     }
 450
 451     return arr;
 452 }
 453
 454 #endif // wxUSE_FONTMAP