src/common/encconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        encconv.cpp
   3 // Purpose:     wxEncodingConverter class for converting between different
   4 //              font encodings
   5 // Author:      Vaclav Slavik
   6 // Copyright:   (c) 1999 Vaclav Slavik
   7 // Licence:     wxWindows Licence
   8 /////////////////////////////////////////////////////////////////////////////
   9
  10 #ifdef __GNUG__
  11 #pragma implementation "encconv.h"
  12 #endif
  13
  14 // For compilers that support precompilation, includes "wx.h".
  15 #include "wx/wxprec.h"
  16
  17 #ifdef __BORLANDC__
  18   #pragma hdrstop
  19 #endif
  20
  21 #if wxUSE_FONTMAP
  22
  23 #include "wx/encconv.h"
  24
  25 #include <stdlib.h>
  26
  27 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
  28 #ifdef __BORLANDC__
  29 #include "../common/unictabl.inc"
  30 #else
  31 #include "unictabl.inc"
  32 #endif
  33
  34 #if wxUSE_WCHAR_T
  35 typedef wchar_t tchar;
  36 #else
  37 typedef char tchar;
  38 #endif
  39
  40 static wxUint16* LINKAGEMODE GetEncTable(wxFontEncoding enc)
  41 {
  42     for (int i = 0; encodings_list[i].table != NULL; i++)
  43     {
  44         if (encodings_list[i].encoding == enc)
  45             return encodings_list[i].table;
  46     }
  47     return NULL;
  48 }
  49
  50 typedef struct {
  51     wxUint16 u;
  52     wxUint8  c;
  53 } CharsetItem;
  54
  55
  56
  57 static int LINKAGEMODE CompareCharsetItems(const void *i1, const void *i2)
  58 {
  59     return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
  60 }
  61
  62
  63 static CharsetItem* LINKAGEMODE BuildReverseTable(wxUint16 *tbl)
  64 {
  65     CharsetItem *rev = new CharsetItem[128];
  66
  67     for (int i = 0; i < 128; i++)
  68         rev[i].c = 128 + i, rev[i].u = tbl[i];
  69
  70     qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
  71
  72     return rev;
  73 }
  74
  75
  76
  77 wxEncodingConverter::wxEncodingConverter()
  78 {
  79     m_Table = NULL;
  80     m_UnicodeInput = m_UnicodeOutput = FALSE;
  81     m_JustCopy = FALSE;
  82 }
  83
  84
  85
  86 bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
  87 {
  88     unsigned i;
  89     wxUint16 *in_tbl = NULL, *out_tbl = NULL;
  90
  91     if (m_Table) {delete[] m_Table; m_Table = NULL;}
  92
  93 #if !wxUSE_WCHAR_T
  94     if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return FALSE;
  95 #endif
  96
  97     if (input_enc == output_enc) {m_JustCopy = TRUE; return TRUE;}
  98
  99     m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
 100     m_JustCopy = FALSE;
 101
 102     if (input_enc == wxFONTENCODING_UNICODE)
 103     {
 104         if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
 105
 106         m_Table = new tchar[65536];
 107         for (i = 0; i < 128; i++)  m_Table[i] = (tchar)i; // 7bit ASCII
 108         for (i = 128; i < 65536; i++)  m_Table[i] = (tchar)'?';
 109                 // FIXME - this should be character that means `unicode to charset' impossible, not '?'
 110
 111         if (method == wxCONVERT_SUBSTITUTE)
 112         {
 113             for (i = 0; i < encoding_unicode_fallback_count; i++)
 114                 m_Table[encoding_unicode_fallback[i].c] = (tchar) encoding_unicode_fallback[i].s;
 115         }
 116
 117         for (i = 0; i < 128; i++)
 118             m_Table[out_tbl[i]] = (tchar)(128 + i);
 119
 120         m_UnicodeInput = TRUE;
 121         return TRUE;
 122     }
 123
 124     else
 125     {
 126         if ((in_tbl = GetEncTable(input_enc)) == NULL) return FALSE;
 127         if (output_enc != wxFONTENCODING_UNICODE)
 128             if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
 129
 130         m_UnicodeInput = FALSE;
 131
 132         m_Table = new tchar[256];
 133         for (i = 0; i < 128; i++)  m_Table[i] = (tchar)i; // 7bit ASCII
 134
 135         if (output_enc == wxFONTENCODING_UNICODE)
 136         {
 137             for (i = 0; i < 128; i++)  m_Table[128 + i] = (tchar)in_tbl[i];
 138             return TRUE;
 139         }
 140         else
 141         {
 142             CharsetItem *rev = BuildReverseTable(out_tbl);
 143             CharsetItem *item;
 144             CharsetItem key;
 145
 146             for (i = 0; i < 128; i++)
 147             {
 148                 key.u = in_tbl[i];
 149                 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
 150                 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
 151                     item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
 152                                 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
 153                 if (item)
 154                     m_Table[128 + i] = (tchar)item -> c;
 155                 else
 156 #if wxUSE_WCHAR_T
 157                     m_Table[128 + i] = (wchar_t)(128 + i);
 158 #else
 159                     m_Table[128 + i] = (char)(128 + i);
 160 #endif
 161             }
 162
 163             delete[] rev;
 164             return TRUE;
 165         }
 166     }
 167 }
 168
 169
 170
 171 void wxEncodingConverter::Convert(const char* input, char* output)
 172 {
 173     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 174     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 175
 176     const char *i;
 177     char *o;
 178
 179     if (m_JustCopy)
 180     {
 181         strcpy(output, input);
 182         return;
 183     }
 184
 185     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 186
 187     for (i = input, o = output; *i != 0;)
 188         *(o++) = (char)(m_Table[(wxUint8)*(i++)]);
 189     *o = 0;
 190 }
 191
 192
 193 #if wxUSE_WCHAR_T
 194
 195 void wxEncodingConverter::Convert(const char* input, wchar_t* output)
 196 {
 197     wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
 198     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 199
 200     const char *i;
 201     wchar_t *o;
 202
 203     if (m_JustCopy)
 204     {
 205         for (i = input, o = output; *i != 0;)
 206             *(o++) = (wchar_t)(*(i++));
 207         *o = 0;
 208         return;
 209     }
 210
 211     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 212
 213     for (i = input, o = output; *i != 0;)
 214         *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
 215     *o = 0;
 216 }
 217
 218
 219
 220 void wxEncodingConverter::Convert(const wchar_t* input, char* output)
 221 {
 222     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 223     wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
 224
 225     const wchar_t *i;
 226     char *o;
 227
 228     if (m_JustCopy)
 229     {
 230         for (i = input, o = output; *i != 0;)
 231             *(o++) = (char)(*(i++));
 232         *o = 0;
 233         return;
 234     }
 235
 236     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 237
 238     for (i = input, o = output; *i != 0;)
 239         *(o++) = (char)(m_Table[(wxUint16)*(i++)]);
 240     *o = 0;
 241 }
 242
 243
 244
 245 void wxEncodingConverter::Convert(const wchar_t* input, wchar_t* output)
 246 {
 247     wxASSERT_MSG(m_UnicodeOutput, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
 248     wxASSERT_MSG(m_UnicodeInput, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
 249
 250     const wchar_t *i;
 251     wchar_t *o;
 252
 253     if (m_JustCopy)
 254     {
 255         // wcscpy() is not guaranteed to exist
 256         for (i = input, o = output; *i != 0;)
 257             *(o++) = (*(i++));
 258         *o = 0;
 259         return;
 260     }
 261
 262     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 263
 264     for (i = input, o = output; *i != 0;)
 265         *(o++) = (wchar_t)(m_Table[(wxUint8)*(i++)]);
 266     *o = 0;
 267 }
 268
 269 #endif // wxUSE_WCHAR_T
 270
 271
 272 wxString wxEncodingConverter::Convert(const wxString& input)
 273 {
 274     if (m_JustCopy) return input;
 275
 276     wxString s;
 277     const wxChar *i;
 278
 279     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 280
 281     if (m_UnicodeInput)
 282         for (i = input.c_str(); *i != 0; i++)
 283             s << (wxChar)(m_Table[(wxUint16)*i]);
 284     else
 285         for (i = input.c_str(); *i != 0; i++)
 286             s << (wxChar)(m_Table[(wxUint8)*i]);
 287     return s;
 288 }
 289
 290
 291
 292
 293
 294
 295
 296 // Following tables describe classes of encoding equivalence.
 297 //
 298
 299 #define STOP wxFONTENCODING_SYSTEM
 300
 301 #define NUM_OF_PLATFORMS  4 /*must conform to enum wxPLATFORM_XXXX !!!*/
 302 #define ENC_PER_PLATFORM  5
 303            // max no. of encodings for one language used on one platform
 304            // Anybody thinks 5 is not enough? ;-)
 305
 306 static wxFontEncoding
 307     EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
 308
 309     // *** Please put more common encodings as first! ***
 310
 311     // Western European
 312     {
 313         /* unix    */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
 314         /* windows */ {wxFONTENCODING_CP1252, STOP},
 315         /* os2     */ {STOP},
 316         /* mac     */ {STOP}
 317     },
 318
 319     // Central European
 320     {
 321         /* unix    */ {wxFONTENCODING_ISO8859_2, STOP},
 322         /* windows */ {wxFONTENCODING_CP1250, STOP},
 323         /* os2     */ {STOP},
 324         /* mac     */ {STOP}
 325     },
 326
 327     // Baltic
 328     {
 329         /* unix    */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4, STOP},
 330         /* windows */ {wxFONTENCODING_CP1257, STOP},
 331         /* os2     */ {STOP},
 332         /* mac     */ {STOP}
 333     },
 334
 335     // Hebrew
 336     {
 337         /* unix    */ {wxFONTENCODING_ISO8859_8, STOP},
 338         /* windows */ {wxFONTENCODING_CP1255, STOP},
 339         /* os2     */ {STOP},
 340         /* mac     */ {STOP}
 341     },
 342
 343     // Greek
 344     {
 345         /* unix    */ {wxFONTENCODING_ISO8859_7, STOP},
 346         /* windows */ {wxFONTENCODING_CP1253, STOP},
 347         /* os2     */ {STOP},
 348         /* mac     */ {STOP}
 349     },
 350
 351     // Arabic
 352     {
 353         /* unix    */ {wxFONTENCODING_ISO8859_6, STOP},
 354         /* windows */ {wxFONTENCODING_CP1256, STOP},
 355         /* os2     */ {STOP},
 356         /* mac     */ {STOP}
 357     },
 358
 359     // Turkish
 360     {
 361         /* unix    */ {wxFONTENCODING_ISO8859_9, STOP},
 362         /* windows */ {wxFONTENCODING_CP1254, STOP},
 363         /* os2     */ {STOP},
 364         /* mac     */ {STOP}
 365     },
 366
 367     // Cyrillic
 368     {
 369         /* unix    */ {wxFONTENCODING_KOI8, wxFONTENCODING_ISO8859_5, STOP},
 370         /* windows */ {wxFONTENCODING_CP1251, STOP},
 371         /* os2     */ {STOP},
 372         /* mac     */ {STOP}
 373     },
 374
 375     {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
 376     /* no, _not_ Arnold! */
 377 };
 378
 379
 380
 381
 382 wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
 383 {
 384     if (platform == wxPLATFORM_CURRENT)
 385     {
 386 #if defined(__WXMSW__)
 387         platform = wxPLATFORM_WINDOWS;
 388 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
 389         platform = wxPLATFORM_UNIX;
 390 #elif defined(__WXOS2__)
 391         platform = wxPLATFORM_OS2;
 392 #elif defined(__WXMAC__)
 393         platform = wxPLATFORM_MAC;
 394 #endif
 395     }
 396
 397     int i, clas, e ;
 398     wxFontEncoding *f;
 399     wxFontEncodingArray arr;
 400
 401     clas = 0;
 402     while (EquivalentEncodings[clas][0][0] != STOP)
 403     {
 404         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 405             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 406                 if (EquivalentEncodings[clas][i][e] == enc)
 407                 {
 408                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 409                         if (*f == enc) arr.Add(enc);
 410                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 411                         if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
 412                     i = NUM_OF_PLATFORMS/*hack*/; break;
 413                 }
 414         clas++;
 415     }
 416
 417     return arr;
 418 }
 419
 420
 421
 422 wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
 423 {
 424     int i, clas, e, j ;
 425     wxFontEncoding *f;
 426     wxFontEncodingArray arr;
 427
 428     arr = GetPlatformEquivalents(enc); // we want them to be first items in array
 429
 430     clas = 0;
 431     while (EquivalentEncodings[clas][0][0] != STOP)
 432     {
 433         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 434             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 435                 if (EquivalentEncodings[clas][i][e] == enc)
 436                 {
 437                     for (j = 0; j < NUM_OF_PLATFORMS; j++)
 438                         for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
 439                             if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
 440                     i = NUM_OF_PLATFORMS/*hack*/; break;
 441                 }
 442         clas++;
 443     }
 444
 445     return arr;
 446 }
 447
 448 #endif // wxUSE_FONTMAP