src/common/encconv.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        encconv.cpp
   3 // Purpose:     wxEncodingConverter class for converting between different
   4 //              font encodings
   5 // Author:      Vaclav Slavik
   6 // Copyright:   (c) 1999 Vaclav Slavik
   7 // Licence:     wxWindows Licence
   8 /////////////////////////////////////////////////////////////////////////////
   9
  10 #ifdef __GNUG__
  11 #pragma implementation "encconv.h"
  12 #endif
  13
  14 // For compilers that support precompilation, includes "wx.h".
  15 #include "wx/wxprec.h"
  16
  17 #ifdef __BORLANDC__
  18   #pragma hdrstop
  19 #endif
  20
  21 #include "wx/encconv.h"
  22
  23 #include <stdlib.h>
  24
  25 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
  26 #ifdef __BORLANDC__
  27 #include "../common/unictabl.inc"
  28 #else
  29 #include "unictabl.inc"
  30 #endif
  31
  32
  33 static wxUint16* LINKAGEMODE GetEncTable(wxFontEncoding enc)
  34 {
  35     for (int i = 0; encodings_list[i].table != NULL; i++)
  36     {
  37         if (encodings_list[i].encoding == enc)
  38             return encodings_list[i].table;
  39     }
  40     return NULL;
  41 }
  42
  43 typedef struct {
  44     wxUint16 u;
  45     wxUint8  c;
  46 } CharsetItem;
  47
  48
  49
  50 static int LINKAGEMODE CompareCharsetItems(const void *i1, const void *i2)
  51 {
  52     return ( ((CharsetItem*)i1) -> u - ((CharsetItem*)i2) -> u );
  53 }
  54
  55
  56 static CharsetItem* LINKAGEMODE BuildReverseTable(wxUint16 *tbl)
  57 {
  58     CharsetItem *rev = new CharsetItem[128];
  59
  60     for (int i = 0; i < 128; i++)
  61         rev[i].c = 128 + i, rev[i].u = tbl[i];
  62
  63     qsort(rev, 128, sizeof(CharsetItem), CompareCharsetItems);
  64
  65     return rev;
  66 }
  67
  68
  69
  70 wxEncodingConverter::wxEncodingConverter()
  71 {
  72     m_Table = NULL;
  73     m_UnicodeInput = m_UnicodeOutput = FALSE;
  74     m_JustCopy = FALSE;
  75 }
  76
  77
  78
  79 bool wxEncodingConverter::Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method)
  80 {
  81     unsigned i;
  82     wxUint16 *in_tbl = NULL, *out_tbl = NULL;
  83
  84     if (m_Table) {delete[] m_Table; m_Table = NULL;}
  85
  86 #if !wxUSE_UNICODE
  87     if (input_enc == wxFONTENCODING_UNICODE || output_enc == wxFONTENCODING_UNICODE) return FALSE;
  88 #endif
  89
  90     if (input_enc == output_enc) {m_JustCopy = TRUE; return TRUE;}
  91
  92     m_UnicodeOutput = (output_enc == wxFONTENCODING_UNICODE);
  93     m_JustCopy = FALSE;
  94
  95     if (input_enc == wxFONTENCODING_UNICODE)
  96     {
  97         if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
  98
  99         m_Table = new wxChar[65536];
 100         for (i = 0; i < 128; i++)  m_Table[i] = (wxChar)i; // 7bit ASCII
 101         for (i = 128; i < 65536; i++)  m_Table[i] = (wxChar)'?';
 102                 // FIXME - this should be character that means `unicode to charset' impossible, not '?'
 103
 104         if (method == wxCONVERT_SUBSTITUTE)
 105         {
 106             for (i = 0; i < encoding_unicode_fallback_count; i++)
 107                 m_Table[encoding_unicode_fallback[i].c] = (wxChar) encoding_unicode_fallback[i].s;
 108         }
 109
 110         for (i = 0; i < 128; i++)
 111             m_Table[out_tbl[i]] = (wxChar)(128 + i);
 112
 113         m_UnicodeInput = TRUE;
 114         return TRUE;
 115     }
 116
 117     else
 118     {
 119         if ((in_tbl = GetEncTable(input_enc)) == NULL) return FALSE;
 120         if (output_enc != wxFONTENCODING_UNICODE)
 121             if ((out_tbl = GetEncTable(output_enc)) == NULL) return FALSE;
 122
 123         m_UnicodeInput = FALSE;
 124
 125         m_Table = new wxChar[256];
 126         for (i = 0; i < 128; i++)  m_Table[i] = (wxChar)i; // 7bit ASCII
 127
 128         if (output_enc == wxFONTENCODING_UNICODE)
 129         {
 130             for (i = 0; i < 128; i++)  m_Table[128 + i] = (wxChar)in_tbl[i]; // wxChar is 2byte now
 131             return TRUE;
 132         }
 133         else
 134         {
 135             CharsetItem *rev = BuildReverseTable(out_tbl);
 136             CharsetItem *item, key;
 137
 138             for (i = 0; i < 128; i++)
 139             {
 140                 key.u = in_tbl[i];
 141                 item = (CharsetItem*) bsearch(&key, rev, 128, sizeof(CharsetItem), CompareCharsetItems);
 142                 if (item == NULL && method == wxCONVERT_SUBSTITUTE)
 143                     item = (CharsetItem*) bsearch(&key, encoding_unicode_fallback,
 144                                 encoding_unicode_fallback_count, sizeof(CharsetItem), CompareCharsetItems);
 145                 if (item)
 146                     m_Table[128 + i] = (wxChar)item -> c;
 147                 else
 148                     m_Table[128 + i] = 128 + i; // don't know => don't touch
 149             }
 150
 151             delete[] rev;
 152             return TRUE;
 153         }
 154     }
 155 }
 156
 157
 158
 159 void wxEncodingConverter::Convert(const wxChar* input, wxChar* output)
 160 {
 161     if (m_JustCopy)
 162     {
 163         wxStrcpy(output, input);
 164         return;
 165     }
 166
 167     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 168
 169     const wxChar *i;
 170     wxChar *o;
 171
 172     if (m_UnicodeInput)
 173         for (i = input, o = output; *i != 0; i++, o++)
 174             *o = (wxChar)(m_Table[(wxUint16)*i]);
 175     else
 176         for (i = input, o = output; *i != 0; i++, o++)
 177             *o = (wxChar)(m_Table[(wxUint8)*i]);
 178     *o = 0;
 179 }
 180
 181
 182 #if wxUSE_UNICODE // otherwise wxChar === char
 183
 184 void wxEncodingConverter::Convert(const char* input, wxChar* output)
 185 {
 186     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 187
 188     const char *i;
 189     wxChar *o;
 190
 191     if (m_JustCopy)
 192     {
 193         for (i = input, o = output; *i != 0;)
 194             *(o++) = (wxChar)(*(i++));
 195         *o = 0;
 196         return;
 197     }
 198
 199     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 200
 201     for (i = input, o = output; *i != 0;)
 202         *(o++) = (wxChar)(m_Table[(wxUint8)*(i++)]);
 203     *o = 0;
 204 }
 205
 206
 207
 208 void wxEncodingConverter::Convert(const wxChar* input, char* output)
 209 {
 210     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 211
 212     const wxChar *i;
 213     char *o;
 214
 215     if (m_JustCopy)
 216     {
 217         for (i = input, o = output; *i != 0;)
 218             *(o++) = (char)(*(i++));
 219         *o = 0;
 220         return;
 221     }
 222
 223     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 224
 225     if (m_UnicodeInput)
 226         for (i = input, o = output; *i != 0; i++, o++)
 227             *o = (char)(m_Table[(wxUint16)*i]);
 228     else
 229         for (i = input, o = output; *i != 0; i++, o++)
 230             *o = (char)(m_Table[(wxUint8)*i]);
 231     *o = 0;
 232 }
 233
 234
 235
 236 void wxEncodingConverter::Convert(const char* input, char* output)
 237 {
 238     wxASSERT_MSG(!m_UnicodeOutput, wxT("You cannot convert to unicode if output is const char*!"));
 239     wxASSERT_MSG(!m_UnicodeInput, wxT("You cannot convert from unicode if input is const char*!"));
 240
 241     const char *i;
 242     char *o;
 243
 244     if (m_JustCopy)
 245     {
 246         strcpy(output, input);
 247         return;
 248     }
 249
 250     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 251
 252     for (i = input, o = output; *i != 0;)
 253         *(o++) = (char)(m_Table[(wxUint8)*(i++)]);
 254     *o = 0;
 255 }
 256
 257 #endif // wxUSE_UNICODE
 258
 259
 260 wxString wxEncodingConverter::Convert(const wxString& input)
 261 {
 262     if (m_JustCopy) return input;
 263
 264     wxString s;
 265     const wxChar *i;
 266
 267     wxASSERT_MSG(m_Table != NULL, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
 268
 269     if (m_UnicodeInput)
 270         for (i = input.c_str(); *i != 0; i++)
 271             s << (wxChar)(m_Table[(wxUint16)*i]);
 272     else
 273         for (i = input.c_str(); *i != 0; i++)
 274             s << (wxChar)(m_Table[(wxUint8)*i]);
 275     return s;
 276 }
 277
 278
 279
 280
 281
 282
 283
 284 // Following tables describe classes of encoding equivalence.
 285 //
 286
 287 #define STOP wxFONTENCODING_SYSTEM
 288
 289 #define NUM_OF_PLATFORMS  4 /*must conform to enum wxPLATFORM_XXXX !!!*/
 290 #define ENC_PER_PLATFORM  5
 291            // max no. of encodings for one language used on one platform
 292            // Anybody thinks 5 is not enough? ;-)
 293
 294 static wxFontEncoding
 295     EquivalentEncodings[][NUM_OF_PLATFORMS][ENC_PER_PLATFORM+1] = {
 296
 297     // *** Please put more common encodings as first! ***
 298
 299     // West European
 300     {
 301         /* unix    */ {wxFONTENCODING_ISO8859_1, wxFONTENCODING_ISO8859_15, STOP},
 302         /* windows */ {wxFONTENCODING_CP1252, STOP},
 303         /* os2     */ {STOP},
 304         /* mac     */ {STOP}
 305     },
 306
 307     // Central European
 308     {
 309         /* unix    */ {wxFONTENCODING_ISO8859_2, STOP},
 310         /* windows */ {wxFONTENCODING_CP1250, STOP},
 311         /* os2     */ {STOP},
 312         /* mac     */ {STOP}
 313     },
 314
 315     // Baltic
 316     {
 317         /* unix    */ {wxFONTENCODING_ISO8859_13, STOP},
 318         /* windows */ {wxFONTENCODING_CP1257, STOP},
 319         /* os2     */ {STOP},
 320         /* mac     */ {STOP}
 321     },
 322
 323     // Hebrew
 324     {
 325         /* unix    */ {wxFONTENCODING_ISO8859_8, STOP},
 326         /* windows */ {wxFONTENCODING_CP1255, STOP},
 327         /* os2     */ {STOP},
 328         /* mac     */ {STOP}
 329     },
 330
 331     // Greek
 332     {
 333         /* unix    */ {wxFONTENCODING_ISO8859_7, STOP},
 334         /* windows */ {wxFONTENCODING_CP1253, STOP},
 335         /* os2     */ {STOP},
 336         /* mac     */ {STOP}
 337     },
 338
 339     // Arabic
 340     {
 341         /* unix    */ {wxFONTENCODING_ISO8859_6, STOP},
 342         /* windows */ {wxFONTENCODING_CP1256, STOP},
 343         /* os2     */ {STOP},
 344         /* mac     */ {STOP}
 345     },
 346
 347     // Turkish
 348     {
 349         /* unix    */ {wxFONTENCODING_ISO8859_9, STOP},
 350         /* windows */ {wxFONTENCODING_CP1254, STOP},
 351         /* os2     */ {STOP},
 352         /* mac     */ {STOP}
 353     },
 354
 355     // Cyrillic
 356     {
 357         /* unix    */ {wxFONTENCODING_ISO8859_13, wxFONTENCODING_ISO8859_4,
 358                        wxFONTENCODING_ISO8859_15, wxFONTENCODING_ISO8859_1, STOP},
 359         /* windows */ {wxFONTENCODING_CP1257, wxFONTENCODING_CP1252, STOP},
 360         /* os2     */ {STOP},
 361         /* mac     */ {STOP}
 362     },
 363
 364     // Russia and other KOI-8 users:
 365     {
 366         /* unix    */ {wxFONTENCODING_KOI8, wxFONTENCODING_ISO8859_5, STOP},
 367         /* windows */ {wxFONTENCODING_CP1251, STOP},
 368         /* os2     */ {STOP},
 369         /* mac     */ {STOP}
 370     },
 371
 372     {{STOP},{STOP},{STOP},{STOP}} /* Terminator */
 373     /* no, _not_ Arnold! */
 374 };
 375
 376
 377
 378
 379 wxFontEncodingArray wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc, int platform)
 380 {
 381     if (platform == wxPLATFORM_CURRENT)
 382     {
 383 #if defined(__WXMSW__)
 384         platform = wxPLATFORM_WINDOWS;
 385 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
 386         platform = wxPLATFORM_UNIX;
 387 #elif defined(__WXOS2__)
 388         platform = wxPLATFORM_OS2;
 389 #elif defined(__WXMAC__)
 390         platform = wxPLATFORM_MAC;
 391 #endif
 392     }
 393
 394     int i, clas, e ;
 395     wxFontEncoding *f;
 396     wxFontEncodingArray arr;
 397
 398     clas = 0;
 399     while (EquivalentEncodings[clas][0][0] != STOP)
 400     {
 401         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 402             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 403                 if (EquivalentEncodings[clas][i][e] == enc)
 404                 {
 405                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 406                         if (*f == enc) arr.Add(enc);
 407                     for (f = EquivalentEncodings[clas][platform]; *f != STOP; f++)
 408                         if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
 409                     i = NUM_OF_PLATFORMS/*hack*/; break;
 410                 }
 411         clas++;
 412     }
 413
 414     return arr;
 415 }
 416
 417
 418
 419 wxFontEncodingArray wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc)
 420 {
 421     int i, clas, e, j ;
 422     wxFontEncoding *f;
 423     wxFontEncodingArray arr;
 424
 425     arr = GetPlatformEquivalents(enc); // we want them to be first items in array
 426
 427     clas = 0;
 428     while (EquivalentEncodings[clas][0][0] != STOP)
 429     {
 430         for (i = 0; i < NUM_OF_PLATFORMS; i++)
 431             for (e = 0; EquivalentEncodings[clas][i][e] != STOP; e++)
 432                 if (EquivalentEncodings[clas][i][e] == enc)
 433                 {
 434                     for (j = 0; j < NUM_OF_PLATFORMS; j++)
 435                         for (f = EquivalentEncodings[clas][j]; *f != STOP; f++)
 436                             if (arr.Index(*f) == wxNOT_FOUND) arr.Add(*f);
 437                     i = NUM_OF_PLATFORMS/*hack*/; break;
 438                 }
 439         clas++;
 440     }
 441
 442     return arr;
 443 }