1 ///////////////////////////////////////////////////////////////////////////// 
   3 // Purpose:     wxEncodingConverter class for converting between different 
   5 // Author:      Vaclav Slavik 
   6 // Copyright:   (c) 1999 Vaclav Slavik 
   7 // Licence:     wxWindows licence 
   8 ///////////////////////////////////////////////////////////////////////////// 
  10 // For compilers that support precompilation, includes "wx.h". 
  11 #include "wx/wxprec.h" 
  17 #include "wx/encconv.h" 
  21 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl: 
  22 #if defined( __BORLANDC__ ) || defined(__DARWIN__) 
  23     #include "../common/unictabl.inc" 
  25     #include "unictabl.inc" 
  29     typedef wchar_t tchar
; 
  35     #include "wx/mac/corefoundation/cfstring.h" 
  36     #include <CoreFoundation/CFStringEncodingExt.h> 
  38     wxUint16 gMacEncodings
[wxFONTENCODING_MACMAX
-wxFONTENCODING_MACMIN
+1][128] ; 
  39     bool gMacEncodingsInited
[wxFONTENCODING_MACMAX
-wxFONTENCODING_MACMIN
+1] ; 
  43     #include "wx/msw/wince/missing.h"       // for bsearch() 
  46 static const wxUint16
* GetEncTable(wxFontEncoding enc
) 
  49     if( enc 
>= wxFONTENCODING_MACMIN 
&& enc 
<= wxFONTENCODING_MACMAX 
) 
  51         int i 
= enc
-wxFONTENCODING_MACMIN 
; 
  52         if ( gMacEncodingsInited
[i
] == false ) 
  55             CFStringEncoding cfencoding 
= wxMacGetSystemEncFromFontEnc( enc 
) ; 
  56             if( !CFStringIsEncodingAvailable( cfencoding 
) ) 
  59             memset( gMacEncodings
[i
] , 0 , 128 * 2 ); 
  60             char s
[2] = { 0 , 0 }; 
  61             CFRange firstchar 
= CFRangeMake( 0, 1 ); 
  62             for( unsigned char c 
= 255 ; c 
>= 128 ; --c 
) 
  65                 wxCFStringRef 
cfref( CFStringCreateWithCStringNoCopy( NULL
, s
, cfencoding 
, kCFAllocatorNull 
) ); 
  66                 CFStringGetCharacters( cfref
, firstchar
, (UniChar
*)  &gMacEncodings
[i
][c
-128] ); 
  68             gMacEncodingsInited
[i
]=true; 
  70         return gMacEncodings
[i
] ; 
  74     for (int i 
= 0; encodings_list
[i
].table 
!= NULL
; i
++) 
  76         if (encodings_list
[i
].encoding 
== enc
) 
  77             return encodings_list
[i
].table
; 
  87 extern "C" int wxCMPFUNC_CONV
 
  88 CompareCharsetItems(const void *i1
, const void *i2
) 
  90     return ( ((CharsetItem
*)i1
) -> u 
- ((CharsetItem
*)i2
) -> u 
); 
  94 static CharsetItem
* BuildReverseTable(const wxUint16 
*tbl
) 
  96     CharsetItem 
*rev 
= new CharsetItem
[128]; 
  98     for (int i 
= 0; i 
< 128; i
++) 
  99         rev
[i
].c 
= wxUint8(128 + i
), rev
[i
].u 
= tbl
[i
]; 
 101     qsort(rev
, 128, sizeof(CharsetItem
), CompareCharsetItems
); 
 108 wxEncodingConverter::wxEncodingConverter() 
 111     m_UnicodeInput 
= m_UnicodeOutput 
= false; 
 117 bool wxEncodingConverter::Init(wxFontEncoding input_enc
, wxFontEncoding output_enc
, int method
) 
 120     const wxUint16 
*in_tbl
; 
 121     const wxUint16 
*out_tbl 
= NULL
; 
 123     if (m_Table
) {delete[] m_Table
; m_Table 
= NULL
;} 
 126     if (input_enc 
== wxFONTENCODING_UNICODE 
|| output_enc 
== wxFONTENCODING_UNICODE
) return false; 
 129     if (input_enc 
== output_enc
) {m_JustCopy 
= true; return true;} 
 131     m_UnicodeOutput 
= (output_enc 
== wxFONTENCODING_UNICODE
); 
 134     if (input_enc 
== wxFONTENCODING_UNICODE
) 
 136         if ((out_tbl 
= GetEncTable(output_enc
)) == NULL
) return false; 
 138         m_Table 
= new tchar
[65536]; 
 139         for (i 
= 0; i 
< 128; i
++)  m_Table
[i
] = (tchar
)i
; // 7bit ASCII 
 140         for (i 
= 128; i 
< 65536; i
++)  m_Table
[i
] = (tchar
)0; 
 142         if (method 
== wxCONVERT_SUBSTITUTE
) 
 144             for (i 
= 0; i 
< encoding_unicode_fallback_count
; i
++) 
 145                 m_Table
[encoding_unicode_fallback
[i
].c
] = (tchar
) encoding_unicode_fallback
[i
].s
; 
 148         for (i 
= 0; i 
< 128; i
++) 
 149             m_Table
[out_tbl
[i
]] = (tchar
)(128 + i
); 
 151         m_UnicodeInput 
= true; 
 153     else // input !Unicode 
 155         if ((in_tbl 
= GetEncTable(input_enc
)) == NULL
) return false; 
 156         if (output_enc 
!= wxFONTENCODING_UNICODE
) 
 157             if ((out_tbl 
= GetEncTable(output_enc
)) == NULL
) return false; 
 159         m_UnicodeInput 
= false; 
 161         m_Table 
= new tchar
[256]; 
 162         for (i 
= 0; i 
< 128; i
++)  m_Table
[i
] = (tchar
)i
; // 7bit ASCII 
 164         if (output_enc 
== wxFONTENCODING_UNICODE
) 
 166             for (i 
= 0; i 
< 128; i
++)  m_Table
[128 + i
] = (tchar
)in_tbl
[i
]; 
 169         else // output !Unicode 
 171             CharsetItem 
*rev 
= BuildReverseTable(out_tbl
); 
 175             for (i 
= 0; i 
< 128; i
++) 
 178                 item 
= (CharsetItem
*) bsearch(&key
, rev
, 128, sizeof(CharsetItem
), CompareCharsetItems
); 
 179                 if (item 
== NULL 
&& method 
== wxCONVERT_SUBSTITUTE
) 
 180                     item 
= (CharsetItem
*) bsearch(&key
, encoding_unicode_fallback
, 
 181                                 encoding_unicode_fallback_count
, sizeof(CharsetItem
), CompareCharsetItems
); 
 183                     m_Table
[128 + i
] = (tchar
)item 
-> c
; 
 186                     m_Table
[128 + i
] = (wchar_t)(128 + i
); 
 188                     m_Table
[128 + i
] = (char)(128 + i
); 
 200 #define REPLACEMENT_CHAR  ((tchar)'?') 
 202 inline tchar 
GetTableValue(const tchar 
*table
, tchar value
, bool& repl
) 
 204     tchar r 
= table
[value
]; 
 205     if (r 
== 0 && value 
!= 0) 
 207         r 
= REPLACEMENT_CHAR
; 
 214 bool wxEncodingConverter::Convert(const char* input
, char* output
) const 
 216     wxASSERT_MSG(!m_UnicodeOutput
, wxT("You cannot convert to unicode if output is const char*!")); 
 217     wxASSERT_MSG(!m_UnicodeInput
, wxT("You cannot convert from unicode if input is const char*!")); 
 224         strcpy(output
, input
); 
 228     wxCHECK_MSG(m_Table 
!= NULL
, false, 
 229                 wxT("You must call wxEncodingConverter::Init() before actually converting!")); 
 231     bool replaced 
= false; 
 233     for (i 
= input
, o 
= output
; *i 
!= 0;) 
 234         *(o
++) = (char)(GetTableValue(m_Table
, (wxUint8
)*(i
++), replaced
)); 
 243 bool wxEncodingConverter::Convert(const char* input
, wchar_t* output
) const 
 245     wxASSERT_MSG(m_UnicodeOutput
, wxT("You cannot convert to 8-bit if output is const wchar_t*!")); 
 246     wxASSERT_MSG(!m_UnicodeInput
, wxT("You cannot convert from unicode if input is const char*!")); 
 253         for (i 
= input
, o 
= output
; *i 
!= 0;) 
 254             *(o
++) = (wchar_t)(*(i
++)); 
 259     wxCHECK_MSG(m_Table 
!= NULL
, false, 
 260                 wxT("You must call wxEncodingConverter::Init() before actually converting!")); 
 262     bool replaced 
= false; 
 264     for (i 
= input
, o 
= output
; *i 
!= 0;) 
 265         *(o
++) = (wchar_t)(GetTableValue(m_Table
, (wxUint8
)*(i
++), replaced
)); 
 273 bool wxEncodingConverter::Convert(const wchar_t* input
, char* output
) const 
 275     wxASSERT_MSG(!m_UnicodeOutput
, wxT("You cannot convert to unicode if output is const char*!")); 
 276     wxASSERT_MSG(m_UnicodeInput
, wxT("You cannot convert from 8-bit if input is const wchar_t*!")); 
 283         for (i 
= input
, o 
= output
; *i 
!= 0;) 
 284             *(o
++) = (char)(*(i
++)); 
 289     wxCHECK_MSG(m_Table 
!= NULL
, false, 
 290                 wxT("You must call wxEncodingConverter::Init() before actually converting!")); 
 292     bool replaced 
= false; 
 294     for (i 
= input
, o 
= output
; *i 
!= 0;) 
 295         *(o
++) = (char)(GetTableValue(m_Table
, (wxUint16
)*(i
++), replaced
)); 
 303 bool wxEncodingConverter::Convert(const wchar_t* input
, wchar_t* output
) const 
 305     wxASSERT_MSG(m_UnicodeOutput
, wxT("You cannot convert to 8-bit if output is const wchar_t*!")); 
 306     wxASSERT_MSG(m_UnicodeInput
, wxT("You cannot convert from 8-bit if input is const wchar_t*!")); 
 313         // wcscpy() is not guaranteed to exist 
 314         for (i 
= input
, o 
= output
; *i 
!= 0;) 
 320     wxCHECK_MSG(m_Table 
!= NULL
, false, 
 321                 wxT("You must call wxEncodingConverter::Init() before actually converting!")); 
 323     bool replaced 
= false; 
 325     for (i 
= input
, o 
= output
; *i 
!= 0;) 
 326         *(o
++) = (wchar_t)(GetTableValue(m_Table
, (wxUint8
)*(i
++), replaced
)); 
 332 #endif // wxUSE_WCHAR_T 
 335 wxString 
wxEncodingConverter::Convert(const wxString
& input
) const 
 337     if (m_JustCopy
) return input
; 
 342     wxCHECK_MSG(m_Table 
!= NULL
, s
, 
 343                 wxT("You must call wxEncodingConverter::Init() before actually converting!")); 
 347         for (i 
= input
.c_str(); *i 
!= 0; i
++) 
 348             s 
<< (wxChar
)(m_Table
[(wxUint16
)*i
]); 
 352         for (i 
= input
.c_str(); *i 
!= 0; i
++) 
 353             s 
<< (wxChar
)(m_Table
[(wxUint8
)*i
]); 
 365 // Following tables describe classes of encoding equivalence. 
 368 #define STOP wxFONTENCODING_SYSTEM 
 370 #define NUM_OF_PLATFORMS  4 /*must conform to enum wxPLATFORM_XXXX !!!*/ 
 371 #define ENC_PER_PLATFORM  3 
 372            // max no. of encodings for one language used on one platform. 
 373            // Using maximum of everything at the current moment to not make the 
 374            // library larger than necessary. Make larger only if necessary - MR 
 376 static const wxFontEncoding
 
 377     EquivalentEncodings
[][NUM_OF_PLATFORMS
][ENC_PER_PLATFORM
+1] = { 
 379     // *** Please put more common encodings as first! *** 
 383         /* unix    */ {wxFONTENCODING_ISO8859_1
, wxFONTENCODING_ISO8859_15
, STOP
}, 
 384         /* windows */ {wxFONTENCODING_CP1252
, STOP
}, 
 386         /* mac     */ {wxFONTENCODING_MACROMAN
, STOP
} 
 391         /* unix    */ {wxFONTENCODING_ISO8859_2
, STOP
}, 
 392         /* windows */ {wxFONTENCODING_CP1250
, STOP
}, 
 394         /* mac     */ {wxFONTENCODING_MACCENTRALEUR
, STOP
} 
 399         /* unix    */ {wxFONTENCODING_ISO8859_13
, wxFONTENCODING_ISO8859_4
, STOP
}, 
 400         /* windows */ {wxFONTENCODING_CP1257
, STOP
}, 
 407         /* unix    */ {wxFONTENCODING_ISO8859_8
, STOP
}, 
 408         /* windows */ {wxFONTENCODING_CP1255
, STOP
}, 
 410         /* mac     */ {wxFONTENCODING_MACHEBREW
, STOP
} 
 415         /* unix    */ {wxFONTENCODING_ISO8859_7
, STOP
}, 
 416         /* windows */ {wxFONTENCODING_CP1253
, STOP
}, 
 418         /* mac     */ {wxFONTENCODING_MACGREEK
, STOP
} 
 423         /* unix    */ {wxFONTENCODING_ISO8859_6
, STOP
}, 
 424         /* windows */ {wxFONTENCODING_CP1256
, STOP
}, 
 426         /* mac     */ {wxFONTENCODING_MACARABIC
, STOP
} 
 431         /* unix    */ {wxFONTENCODING_ISO8859_9
, STOP
}, 
 432         /* windows */ {wxFONTENCODING_CP1254
, STOP
}, 
 434         /* mac     */ {wxFONTENCODING_MACTURKISH
, STOP
} 
 439         /* unix    */ {wxFONTENCODING_KOI8
, wxFONTENCODING_KOI8_U
, wxFONTENCODING_ISO8859_5
, STOP
}, 
 440         /* windows */ {wxFONTENCODING_CP1251
, STOP
}, 
 442         /* mac     */ {wxFONTENCODING_MACCYRILLIC
, STOP
} 
 445     {{STOP
},{STOP
},{STOP
},{STOP
}} /* Terminator */ 
 446     /* no, _not_ Arnold! */ 
 450 static bool FindEncoding(const wxFontEncodingArray
& arr
, wxFontEncoding f
) 
 452     for (wxFontEncodingArray::const_iterator it 
= arr
.begin(), en 
= arr
.end(); 
 459 wxFontEncodingArray 
wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc
, int platform
) 
 461     if (platform 
== wxPLATFORM_CURRENT
) 
 463 #if defined(__WXMSW__) 
 464         platform 
= wxPLATFORM_WINDOWS
; 
 465 #elif defined(__WXGTK__) || defined(__WXMOTIF__) 
 466         platform 
= wxPLATFORM_UNIX
; 
 467 #elif defined(__WXPM__) 
 468         platform 
= wxPLATFORM_OS2
; 
 469 #elif defined(__WXMAC__) 
 470         platform 
= wxPLATFORM_MAC
; 
 475     const wxFontEncoding 
*f
; 
 476     wxFontEncodingArray arr
; 
 479     while (EquivalentEncodings
[clas
][0][0] != STOP
) 
 481         for (i 
= 0; i 
< NUM_OF_PLATFORMS
; i
++) 
 482             for (e 
= 0; EquivalentEncodings
[clas
][i
][e
] != STOP
; e
++) 
 483                 if (EquivalentEncodings
[clas
][i
][e
] == enc
) 
 485                     for (f 
= EquivalentEncodings
[clas
][platform
]; *f 
!= STOP
; f
++) 
 486                         if (*f 
== enc
) arr
.push_back(enc
); 
 487                     for (f 
= EquivalentEncodings
[clas
][platform
]; *f 
!= STOP
; f
++) 
 488                         if (!FindEncoding(arr
, *f
)) arr
.push_back(*f
); 
 489                     i 
= NUM_OF_PLATFORMS
/*hack*/; break; 
 499 wxFontEncodingArray 
wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc
) 
 502     const wxFontEncoding 
*f
; 
 503     wxFontEncodingArray arr
; 
 505     arr 
= GetPlatformEquivalents(enc
); // we want them to be first items in array 
 508     while (EquivalentEncodings
[clas
][0][0] != STOP
) 
 510         for (i 
= 0; i 
< NUM_OF_PLATFORMS
; i
++) 
 511             for (e 
= 0; EquivalentEncodings
[clas
][i
][e
] != STOP
; e
++) 
 512                 if (EquivalentEncodings
[clas
][i
][e
] == enc
) 
 514                     for (j 
= 0; j 
< NUM_OF_PLATFORMS
; j
++) 
 515                         for (f 
= EquivalentEncodings
[clas
][j
]; *f 
!= STOP
; f
++) 
 516                             if (!FindEncoding(arr
, *f
)) arr
.push_back(*f
); 
 517                     i 
= NUM_OF_PLATFORMS
/*hack*/; break;