1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: wxEncodingConverter class for converting between different
5 // Author: Vaclav Slavik
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows Licence
8 /////////////////////////////////////////////////////////////////////////////
10 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
11 #pragma implementation "encconv.h"
14 // For compilers that support precompilation, includes "wx.h".
15 #include "wx/wxprec.h"
23 #include "wx/encconv.h"
27 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
29 #include "../common/unictabl.inc"
31 #include "unictabl.inc"
35 typedef wchar_t tchar
;
42 #define LINKAGEMODE __cdecl
47 #include "ATSUnicode.h"
48 #include "TextCommon.h"
49 #include "TextEncodingConverter.h"
51 #include "wx/mac/private.h" // includes mac headers
55 TextEncodingBase mac
;
58 MacCP gMacCodePages
[] =
60 wxFONTENCODING_MACROMAN
, kTextEncodingMacRoman
,
61 wxFONTENCODING_MACCENTRALEUR
, kTextEncodingMacCentralEurRoman
,
62 wxFONTENCODING_MACHEBREW
, kTextEncodingMacHebrew
,
63 wxFONTENCODING_MACGREEK
, kTextEncodingMacGreek
,
64 wxFONTENCODING_MACARABIC
, kTextEncodingMacArabic
,
65 wxFONTENCODING_MACTURKISH
, kTextEncodingMacTurkish
,
66 wxFONTENCODING_MACCYRILLIC
, kTextEncodingMacCyrillic
,
69 wxUint16 gMacEncodings
[WXSIZEOF(gMacCodePages
)][128] ;
70 bool gMacEncodingsInited
[WXSIZEOF(gMacCodePages
)] ;
74 static wxUint16
* LINKAGEMODE
GetEncTable(wxFontEncoding enc
)
77 for (int i
= 0 ; i
< WXSIZEOF(gMacCodePages
) ; ++i
)
79 if ( gMacCodePages
[i
].enc
== enc
)
81 if ( gMacEncodingsInited
[i
] == false )
83 TECObjectRef converter
;
84 TextEncodingBase code
= gMacCodePages
[i
].mac
;
85 TextEncodingBase unicode
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
86 OSStatus status
= TECCreateConverter(&converter
,code
,unicode
);
89 ByteCount byteInLen
, byteOutLen
;
90 for( char c
= 255 ; c
>= 128 ; --c
)
93 status
= TECConvertText(converter
, (ConstTextPtr
) &s
, 1, &byteInLen
,
94 (TextPtr
) &gMacEncodings
[i
][c
-128] , 2, &byteOutLen
);
96 status
= TECDisposeConverter(converter
);
97 gMacEncodingsInited
[i
]=true;
99 return gMacEncodings
[i
] ;
104 for (int i
= 0; encodings_list
[i
].table
!= NULL
; i
++)
106 if (encodings_list
[i
].encoding
== enc
)
107 return encodings_list
[i
].table
;
117 extern "C" int LINKAGEMODE
CompareCharsetItems(const void *i1
, const void *i2
)
119 return ( ((CharsetItem
*)i1
) -> u
- ((CharsetItem
*)i2
) -> u
);
123 static CharsetItem
* LINKAGEMODE
BuildReverseTable(wxUint16
*tbl
)
125 CharsetItem
*rev
= new CharsetItem
[128];
127 for (int i
= 0; i
< 128; i
++)
128 rev
[i
].c
= 128 + i
, rev
[i
].u
= tbl
[i
];
130 qsort(rev
, 128, sizeof(CharsetItem
), CompareCharsetItems
);
137 wxEncodingConverter::wxEncodingConverter()
140 m_UnicodeInput
= m_UnicodeOutput
= FALSE
;
146 bool wxEncodingConverter::Init(wxFontEncoding input_enc
, wxFontEncoding output_enc
, int method
)
149 wxUint16
*in_tbl
, *out_tbl
= NULL
;
151 if (m_Table
) {delete[] m_Table
; m_Table
= NULL
;}
154 if (input_enc
== wxFONTENCODING_UNICODE
|| output_enc
== wxFONTENCODING_UNICODE
) return FALSE
;
157 if (input_enc
== output_enc
) {m_JustCopy
= TRUE
; return TRUE
;}
159 m_UnicodeOutput
= (output_enc
== wxFONTENCODING_UNICODE
);
162 if (input_enc
== wxFONTENCODING_UNICODE
)
164 if ((out_tbl
= GetEncTable(output_enc
)) == NULL
) return FALSE
;
166 m_Table
= new tchar
[65536];
167 for (i
= 0; i
< 128; i
++) m_Table
[i
] = (tchar
)i
; // 7bit ASCII
168 for (i
= 128; i
< 65536; i
++) m_Table
[i
] = (tchar
)'?';
169 // FIXME - this should be character that means `unicode to charset' impossible, not '?'
171 if (method
== wxCONVERT_SUBSTITUTE
)
173 for (i
= 0; i
< encoding_unicode_fallback_count
; i
++)
174 m_Table
[encoding_unicode_fallback
[i
].c
] = (tchar
) encoding_unicode_fallback
[i
].s
;
177 for (i
= 0; i
< 128; i
++)
178 m_Table
[out_tbl
[i
]] = (tchar
)(128 + i
);
180 m_UnicodeInput
= TRUE
;
182 else // input !Unicode
184 if ((in_tbl
= GetEncTable(input_enc
)) == NULL
) return FALSE
;
185 if (output_enc
!= wxFONTENCODING_UNICODE
)
186 if ((out_tbl
= GetEncTable(output_enc
)) == NULL
) return FALSE
;
188 m_UnicodeInput
= FALSE
;
190 m_Table
= new tchar
[256];
191 for (i
= 0; i
< 128; i
++) m_Table
[i
] = (tchar
)i
; // 7bit ASCII
193 if (output_enc
== wxFONTENCODING_UNICODE
)
195 for (i
= 0; i
< 128; i
++) m_Table
[128 + i
] = (tchar
)in_tbl
[i
];
198 // FIXME: write a substitute for bsearch
200 else // output !Unicode
202 CharsetItem
*rev
= BuildReverseTable(out_tbl
);
206 for (i
= 0; i
< 128; i
++)
209 item
= (CharsetItem
*) bsearch(&key
, rev
, 128, sizeof(CharsetItem
), CompareCharsetItems
);
210 if (item
== NULL
&& method
== wxCONVERT_SUBSTITUTE
)
211 item
= (CharsetItem
*) bsearch(&key
, encoding_unicode_fallback
,
212 encoding_unicode_fallback_count
, sizeof(CharsetItem
), CompareCharsetItems
);
214 m_Table
[128 + i
] = (tchar
)item
-> c
;
217 m_Table
[128 + i
] = (wchar_t)(128 + i
);
219 m_Table
[128 + i
] = (char)(128 + i
);
225 #endif // !__WXWINCE__
233 void wxEncodingConverter::Convert(const char* input
, char* output
) const
235 wxASSERT_MSG(!m_UnicodeOutput
, wxT("You cannot convert to unicode if output is const char*!"));
236 wxASSERT_MSG(!m_UnicodeInput
, wxT("You cannot convert from unicode if input is const char*!"));
243 strcpy(output
, input
);
247 wxCHECK_RET(m_Table
!= NULL
, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
249 for (i
= input
, o
= output
; *i
!= 0;)
250 *(o
++) = (char)(m_Table
[(wxUint8
)*(i
++)]);
257 void wxEncodingConverter::Convert(const char* input
, wchar_t* output
) const
259 wxASSERT_MSG(m_UnicodeOutput
, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
260 wxASSERT_MSG(!m_UnicodeInput
, wxT("You cannot convert from unicode if input is const char*!"));
267 for (i
= input
, o
= output
; *i
!= 0;)
268 *(o
++) = (wchar_t)(*(i
++));
273 wxCHECK_RET(m_Table
!= NULL
, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
275 for (i
= input
, o
= output
; *i
!= 0;)
276 *(o
++) = (wchar_t)(m_Table
[(wxUint8
)*(i
++)]);
282 void wxEncodingConverter::Convert(const wchar_t* input
, char* output
) const
284 wxASSERT_MSG(!m_UnicodeOutput
, wxT("You cannot convert to unicode if output is const char*!"));
285 wxASSERT_MSG(m_UnicodeInput
, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
292 for (i
= input
, o
= output
; *i
!= 0;)
293 *(o
++) = (char)(*(i
++));
298 wxCHECK_RET(m_Table
!= NULL
, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
300 for (i
= input
, o
= output
; *i
!= 0;)
301 *(o
++) = (char)(m_Table
[(wxUint16
)*(i
++)]);
307 void wxEncodingConverter::Convert(const wchar_t* input
, wchar_t* output
) const
309 wxASSERT_MSG(m_UnicodeOutput
, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
310 wxASSERT_MSG(m_UnicodeInput
, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
317 // wcscpy() is not guaranteed to exist
318 for (i
= input
, o
= output
; *i
!= 0;)
324 wxCHECK_RET(m_Table
!= NULL
, wxT("You must call wxEncodingConverter::Init() before actually converting!"));
326 for (i
= input
, o
= output
; *i
!= 0;)
327 *(o
++) = (wchar_t)(m_Table
[(wxUint8
)*(i
++)]);
331 #endif // wxUSE_WCHAR_T
334 wxString
wxEncodingConverter::Convert(const wxString
& input
) const
336 if (m_JustCopy
) return input
;
341 wxCHECK_MSG(m_Table
!= NULL
, s
,
342 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
346 for (i
= input
.c_str(); *i
!= 0; i
++)
347 s
<< (wxChar
)(m_Table
[(wxUint16
)*i
]);
351 for (i
= input
.c_str(); *i
!= 0; i
++)
352 s
<< (wxChar
)(m_Table
[(wxUint8
)*i
]);
364 // Following tables describe classes of encoding equivalence.
367 #define STOP wxFONTENCODING_SYSTEM
369 #define NUM_OF_PLATFORMS 4 /*must conform to enum wxPLATFORM_XXXX !!!*/
370 #define ENC_PER_PLATFORM 5
371 // max no. of encodings for one language used on one platform
372 // Anybody thinks 5 is not enough? ;-)
374 static wxFontEncoding
375 EquivalentEncodings
[][NUM_OF_PLATFORMS
][ENC_PER_PLATFORM
+1] = {
377 // *** Please put more common encodings as first! ***
381 /* unix */ {wxFONTENCODING_ISO8859_1
, wxFONTENCODING_ISO8859_15
, STOP
},
382 /* windows */ {wxFONTENCODING_CP1252
, STOP
},
384 /* mac */ {wxFONTENCODING_MACROMAN
, STOP
}
389 /* unix */ {wxFONTENCODING_ISO8859_2
, STOP
},
390 /* windows */ {wxFONTENCODING_CP1250
, STOP
},
392 /* mac */ {wxFONTENCODING_MACCENTRALEUR
, STOP
}
397 /* unix */ {wxFONTENCODING_ISO8859_13
, wxFONTENCODING_ISO8859_4
, STOP
},
398 /* windows */ {wxFONTENCODING_CP1257
, STOP
},
405 /* unix */ {wxFONTENCODING_ISO8859_8
, STOP
},
406 /* windows */ {wxFONTENCODING_CP1255
, STOP
},
408 /* mac */ {wxFONTENCODING_MACHEBREW
, STOP
}
413 /* unix */ {wxFONTENCODING_ISO8859_7
, STOP
},
414 /* windows */ {wxFONTENCODING_CP1253
, STOP
},
416 /* mac */ {wxFONTENCODING_MACGREEK
, STOP
}
421 /* unix */ {wxFONTENCODING_ISO8859_6
, STOP
},
422 /* windows */ {wxFONTENCODING_CP1256
, STOP
},
424 /* mac */ {wxFONTENCODING_MACARABIC
, STOP
}
429 /* unix */ {wxFONTENCODING_ISO8859_9
, STOP
},
430 /* windows */ {wxFONTENCODING_CP1254
, STOP
},
432 /* mac */ {wxFONTENCODING_MACTURKISH
, STOP
}
437 /* unix */ {wxFONTENCODING_KOI8
, wxFONTENCODING_ISO8859_5
, STOP
},
438 /* windows */ {wxFONTENCODING_CP1251
, STOP
},
440 /* mac */ {wxFONTENCODING_MACCYRILLIC
, STOP
}
443 {{STOP
},{STOP
},{STOP
},{STOP
}} /* Terminator */
444 /* no, _not_ Arnold! */
448 static bool FindEncoding(const wxFontEncodingArray
& arr
, wxFontEncoding f
)
450 for (wxFontEncodingArray::const_iterator it
= arr
.begin(), en
= arr
.end();
457 wxFontEncodingArray
wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc
, int platform
)
459 if (platform
== wxPLATFORM_CURRENT
)
461 #if defined(__WXMSW__)
462 platform
= wxPLATFORM_WINDOWS
;
463 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
464 platform
= wxPLATFORM_UNIX
;
465 #elif defined(__WXOS2__)
466 platform
= wxPLATFORM_OS2
;
467 #elif defined(__WXMAC__)
468 platform
= wxPLATFORM_MAC
;
474 wxFontEncodingArray arr
;
477 while (EquivalentEncodings
[clas
][0][0] != STOP
)
479 for (i
= 0; i
< NUM_OF_PLATFORMS
; i
++)
480 for (e
= 0; EquivalentEncodings
[clas
][i
][e
] != STOP
; e
++)
481 if (EquivalentEncodings
[clas
][i
][e
] == enc
)
483 for (f
= EquivalentEncodings
[clas
][platform
]; *f
!= STOP
; f
++)
484 if (*f
== enc
) arr
.push_back(enc
);
485 for (f
= EquivalentEncodings
[clas
][platform
]; *f
!= STOP
; f
++)
486 if (!FindEncoding(arr
, *f
)) arr
.push_back(*f
);
487 i
= NUM_OF_PLATFORMS
/*hack*/; break;
497 wxFontEncodingArray
wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc
)
501 wxFontEncodingArray arr
;
503 arr
= GetPlatformEquivalents(enc
); // we want them to be first items in array
506 while (EquivalentEncodings
[clas
][0][0] != STOP
)
508 for (i
= 0; i
< NUM_OF_PLATFORMS
; i
++)
509 for (e
= 0; EquivalentEncodings
[clas
][i
][e
] != STOP
; e
++)
510 if (EquivalentEncodings
[clas
][i
][e
] == enc
)
512 for (j
= 0; j
< NUM_OF_PLATFORMS
; j
++)
513 for (f
= EquivalentEncodings
[clas
][j
]; *f
!= STOP
; f
++)
514 if (!FindEncoding(arr
, *f
)) arr
.push_back(*f
);
515 i
= NUM_OF_PLATFORMS
/*hack*/; break;
523 #endif // wxUSE_FONTMAP