1 ///////////////////////////////////////////////////////////////////////////// 
   3 // Purpose:     wxEncodingConverter class for converting between different 
   5 // Author:      Vaclav Slavik 
   6 // Copyright:   (c) 1999 Vaclav Slavik 
   7 // Licence:     wxWindows Licence 
   8 ///////////////////////////////////////////////////////////////////////////// 
  10 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) 
  11 #pragma implementation "encconv.h" 
  14 // For compilers that support precompilation, includes "wx.h". 
  15 #include "wx/wxprec.h" 
  23 #include "wx/encconv.h" 
  27 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl: 
  29 #include "../common/unictabl.inc" 
  31 #include "unictabl.inc" 
  35 typedef wchar_t tchar
; 
  42 #define LINKAGEMODE __cdecl 
  45 static wxUint16
* LINKAGEMODE 
GetEncTable(wxFontEncoding enc
) 
  47     for (int i 
= 0; encodings_list
[i
].table 
!= NULL
; i
++) 
  49         if (encodings_list
[i
].encoding 
== enc
) 
  50             return encodings_list
[i
].table
; 
  60 extern "C" int LINKAGEMODE 
CompareCharsetItems(const void *i1
, const void *i2
) 
  62     return ( ((CharsetItem
*)i1
) -> u 
- ((CharsetItem
*)i2
) -> u 
); 
  66 static CharsetItem
* LINKAGEMODE 
BuildReverseTable(wxUint16 
*tbl
) 
  68     CharsetItem 
*rev 
= new CharsetItem
[128]; 
  70     for (int i 
= 0; i 
< 128; i
++) 
  71         rev
[i
].c 
= 128 + i
, rev
[i
].u 
= tbl
[i
]; 
  73     qsort(rev
, 128, sizeof(CharsetItem
), CompareCharsetItems
); 
  80 wxEncodingConverter::wxEncodingConverter() 
  83     m_UnicodeInput 
= m_UnicodeOutput 
= FALSE
; 
  89 bool wxEncodingConverter::Init(wxFontEncoding input_enc
, wxFontEncoding output_enc
, int method
) 
  92     wxUint16 
*in_tbl
, *out_tbl 
= NULL
; 
  94     if (m_Table
) {delete[] m_Table
; m_Table 
= NULL
;} 
  97     if (input_enc 
== wxFONTENCODING_UNICODE 
|| output_enc 
== wxFONTENCODING_UNICODE
) return FALSE
; 
 100     if (input_enc 
== output_enc
) {m_JustCopy 
= TRUE
; return TRUE
;} 
 102     m_UnicodeOutput 
= (output_enc 
== wxFONTENCODING_UNICODE
); 
 105     if (input_enc 
== wxFONTENCODING_UNICODE
) 
 107         if ((out_tbl 
= GetEncTable(output_enc
)) == NULL
) return FALSE
; 
 109         m_Table 
= new tchar
[65536]; 
 110         for (i 
= 0; i 
< 128; i
++)  m_Table
[i
] = (tchar
)i
; // 7bit ASCII 
 111         for (i 
= 128; i 
< 65536; i
++)  m_Table
[i
] = (tchar
)'?'; 
 112                 // FIXME - this should be character that means `unicode to charset' impossible, not '?' 
 114         if (method 
== wxCONVERT_SUBSTITUTE
) 
 116             for (i 
= 0; i 
< encoding_unicode_fallback_count
; i
++) 
 117                 m_Table
[encoding_unicode_fallback
[i
].c
] = (tchar
) encoding_unicode_fallback
[i
].s
; 
 120         for (i 
= 0; i 
< 128; i
++) 
 121             m_Table
[out_tbl
[i
]] = (tchar
)(128 + i
); 
 123         m_UnicodeInput 
= TRUE
; 
 125     else // input !Unicode 
 127         if ((in_tbl 
= GetEncTable(input_enc
)) == NULL
) return FALSE
; 
 128         if (output_enc 
!= wxFONTENCODING_UNICODE
) 
 129             if ((out_tbl 
= GetEncTable(output_enc
)) == NULL
) return FALSE
; 
 131         m_UnicodeInput 
= FALSE
; 
 133         m_Table 
= new tchar
[256]; 
 134         for (i 
= 0; i 
< 128; i
++)  m_Table
[i
] = (tchar
)i
; // 7bit ASCII 
 136         if (output_enc 
== wxFONTENCODING_UNICODE
) 
 138             for (i 
= 0; i 
< 128; i
++)  m_Table
[128 + i
] = (tchar
)in_tbl
[i
]; 
 141         // FIXME: write a substitute for bsearch 
 143         else // output !Unicode 
 145             CharsetItem 
*rev 
= BuildReverseTable(out_tbl
); 
 149             for (i 
= 0; i 
< 128; i
++) 
 152                 item 
= (CharsetItem
*) bsearch(&key
, rev
, 128, sizeof(CharsetItem
), CompareCharsetItems
); 
 153                 if (item 
== NULL 
&& method 
== wxCONVERT_SUBSTITUTE
) 
 154                     item 
= (CharsetItem
*) bsearch(&key
, encoding_unicode_fallback
, 
 155                                 encoding_unicode_fallback_count
, sizeof(CharsetItem
), CompareCharsetItems
); 
 157                     m_Table
[128 + i
] = (tchar
)item 
-> c
; 
 160                     m_Table
[128 + i
] = (wchar_t)(128 + i
); 
 162                     m_Table
[128 + i
] = (char)(128 + i
); 
 168 #endif // !__WXWINCE__ 
 176 void wxEncodingConverter::Convert(const char* input
, char* output
) const 
 178     wxASSERT_MSG(!m_UnicodeOutput
, wxT("You cannot convert to unicode if output is const char*!")); 
 179     wxASSERT_MSG(!m_UnicodeInput
, wxT("You cannot convert from unicode if input is const char*!")); 
 186         strcpy(output
, input
); 
 190     wxCHECK_RET(m_Table 
!= NULL
, wxT("You must call wxEncodingConverter::Init() before actually converting!")); 
 192     for (i 
= input
, o 
= output
; *i 
!= 0;) 
 193         *(o
++) = (char)(m_Table
[(wxUint8
)*(i
++)]); 
 200 void wxEncodingConverter::Convert(const char* input
, wchar_t* output
) const 
 202     wxASSERT_MSG(m_UnicodeOutput
, wxT("You cannot convert to 8-bit if output is const wchar_t*!")); 
 203     wxASSERT_MSG(!m_UnicodeInput
, wxT("You cannot convert from unicode if input is const char*!")); 
 210         for (i 
= input
, o 
= output
; *i 
!= 0;) 
 211             *(o
++) = (wchar_t)(*(i
++)); 
 216     wxCHECK_RET(m_Table 
!= NULL
, wxT("You must call wxEncodingConverter::Init() before actually converting!")); 
 218     for (i 
= input
, o 
= output
; *i 
!= 0;) 
 219         *(o
++) = (wchar_t)(m_Table
[(wxUint8
)*(i
++)]); 
 225 void wxEncodingConverter::Convert(const wchar_t* input
, char* output
) const 
 227     wxASSERT_MSG(!m_UnicodeOutput
, wxT("You cannot convert to unicode if output is const char*!")); 
 228     wxASSERT_MSG(m_UnicodeInput
, wxT("You cannot convert from 8-bit if input is const wchar_t*!")); 
 235         for (i 
= input
, o 
= output
; *i 
!= 0;) 
 236             *(o
++) = (char)(*(i
++)); 
 241     wxCHECK_RET(m_Table 
!= NULL
, wxT("You must call wxEncodingConverter::Init() before actually converting!")); 
 243     for (i 
= input
, o 
= output
; *i 
!= 0;) 
 244         *(o
++) = (char)(m_Table
[(wxUint16
)*(i
++)]); 
 250 void wxEncodingConverter::Convert(const wchar_t* input
, wchar_t* output
) const 
 252     wxASSERT_MSG(m_UnicodeOutput
, wxT("You cannot convert to 8-bit if output is const wchar_t*!")); 
 253     wxASSERT_MSG(m_UnicodeInput
, wxT("You cannot convert from 8-bit if input is const wchar_t*!")); 
 260         // wcscpy() is not guaranteed to exist 
 261         for (i 
= input
, o 
= output
; *i 
!= 0;) 
 267     wxCHECK_RET(m_Table 
!= NULL
, wxT("You must call wxEncodingConverter::Init() before actually converting!")); 
 269     for (i 
= input
, o 
= output
; *i 
!= 0;) 
 270         *(o
++) = (wchar_t)(m_Table
[(wxUint8
)*(i
++)]); 
 274 #endif // wxUSE_WCHAR_T 
 277 wxString 
wxEncodingConverter::Convert(const wxString
& input
) const 
 279     if (m_JustCopy
) return input
; 
 284     wxCHECK_MSG(m_Table 
!= NULL
, s
, 
 285                 wxT("You must call wxEncodingConverter::Init() before actually converting!")); 
 289         for (i 
= input
.c_str(); *i 
!= 0; i
++) 
 290             s 
<< (wxChar
)(m_Table
[(wxUint16
)*i
]); 
 294         for (i 
= input
.c_str(); *i 
!= 0; i
++) 
 295             s 
<< (wxChar
)(m_Table
[(wxUint8
)*i
]); 
 307 // Following tables describe classes of encoding equivalence. 
 310 #define STOP wxFONTENCODING_SYSTEM 
 312 #define NUM_OF_PLATFORMS  4 /*must conform to enum wxPLATFORM_XXXX !!!*/ 
 313 #define ENC_PER_PLATFORM  5 
 314            // max no. of encodings for one language used on one platform 
 315            // Anybody thinks 5 is not enough? ;-) 
 317 static wxFontEncoding
 
 318     EquivalentEncodings
[][NUM_OF_PLATFORMS
][ENC_PER_PLATFORM
+1] = { 
 320     // *** Please put more common encodings as first! *** 
 324         /* unix    */ {wxFONTENCODING_ISO8859_1
, wxFONTENCODING_ISO8859_15
, STOP
}, 
 325         /* windows */ {wxFONTENCODING_CP1252
, STOP
}, 
 332         /* unix    */ {wxFONTENCODING_ISO8859_2
, STOP
}, 
 333         /* windows */ {wxFONTENCODING_CP1250
, STOP
}, 
 340         /* unix    */ {wxFONTENCODING_ISO8859_13
, wxFONTENCODING_ISO8859_4
, STOP
}, 
 341         /* windows */ {wxFONTENCODING_CP1257
, STOP
}, 
 348         /* unix    */ {wxFONTENCODING_ISO8859_8
, STOP
}, 
 349         /* windows */ {wxFONTENCODING_CP1255
, STOP
}, 
 356         /* unix    */ {wxFONTENCODING_ISO8859_7
, STOP
}, 
 357         /* windows */ {wxFONTENCODING_CP1253
, STOP
}, 
 364         /* unix    */ {wxFONTENCODING_ISO8859_6
, STOP
}, 
 365         /* windows */ {wxFONTENCODING_CP1256
, STOP
}, 
 372         /* unix    */ {wxFONTENCODING_ISO8859_9
, STOP
}, 
 373         /* windows */ {wxFONTENCODING_CP1254
, STOP
}, 
 380         /* unix    */ {wxFONTENCODING_KOI8
, wxFONTENCODING_ISO8859_5
, STOP
}, 
 381         /* windows */ {wxFONTENCODING_CP1251
, STOP
}, 
 386     {{STOP
},{STOP
},{STOP
},{STOP
}} /* Terminator */ 
 387     /* no, _not_ Arnold! */ 
 391 static bool FindEncoding(const wxFontEncodingArray
& arr
, wxFontEncoding f
) 
 393     for (wxFontEncodingArray::const_iterator it 
= arr
.begin(), en 
= arr
.end(); 
 400 wxFontEncodingArray 
wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc
, int platform
) 
 402     if (platform 
== wxPLATFORM_CURRENT
) 
 404 #if defined(__WXMSW__) 
 405         platform 
= wxPLATFORM_WINDOWS
; 
 406 #elif defined(__WXGTK__) || defined(__WXMOTIF__) 
 407         platform 
= wxPLATFORM_UNIX
; 
 408 #elif defined(__WXOS2__) 
 409         platform 
= wxPLATFORM_OS2
; 
 410 #elif defined(__WXMAC__) 
 411         platform 
= wxPLATFORM_MAC
; 
 417     wxFontEncodingArray arr
; 
 420     while (EquivalentEncodings
[clas
][0][0] != STOP
) 
 422         for (i 
= 0; i 
< NUM_OF_PLATFORMS
; i
++) 
 423             for (e 
= 0; EquivalentEncodings
[clas
][i
][e
] != STOP
; e
++) 
 424                 if (EquivalentEncodings
[clas
][i
][e
] == enc
) 
 426                     for (f 
= EquivalentEncodings
[clas
][platform
]; *f 
!= STOP
; f
++) 
 427                         if (*f 
== enc
) arr
.push_back(enc
); 
 428                     for (f 
= EquivalentEncodings
[clas
][platform
]; *f 
!= STOP
; f
++) 
 429                         if (!FindEncoding(arr
, *f
)) arr
.push_back(*f
); 
 430                     i 
= NUM_OF_PLATFORMS
/*hack*/; break; 
 440 wxFontEncodingArray 
wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc
) 
 444     wxFontEncodingArray arr
; 
 446     arr 
= GetPlatformEquivalents(enc
); // we want them to be first items in array 
 449     while (EquivalentEncodings
[clas
][0][0] != STOP
) 
 451         for (i 
= 0; i 
< NUM_OF_PLATFORMS
; i
++) 
 452             for (e 
= 0; EquivalentEncodings
[clas
][i
][e
] != STOP
; e
++) 
 453                 if (EquivalentEncodings
[clas
][i
][e
] == enc
) 
 455                     for (j 
= 0; j 
< NUM_OF_PLATFORMS
; j
++) 
 456                         for (f 
= EquivalentEncodings
[clas
][j
]; *f 
!= STOP
; f
++) 
 457                             if (!FindEncoding(arr
, *f
)) arr
.push_back(*f
); 
 458                     i 
= NUM_OF_PLATFORMS
/*hack*/; break; 
 466 #endif // wxUSE_FONTMAP