1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: wxEncodingConverter class for converting between different
5 // Author: Vaclav Slavik
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
10 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
11 #pragma implementation "encconv.h"
14 // For compilers that support precompilation, includes "wx.h".
15 #include "wx/wxprec.h"
21 #include "wx/encconv.h"
25 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
26 #if defined( __BORLANDC__ ) || defined(__DARWIN__)
27 #include "../common/unictabl.inc"
29 #include "unictabl.inc"
33 typedef wchar_t tchar
;
40 #include <Carbon/Carbon.h>
42 #include <ATSUnicode.h>
43 #include <TextCommon.h>
44 #include <TextEncodingConverter.h>
46 #include "wx/fontutil.h"
47 #include "wx/mac/private.h" // includes mac headers
49 wxUint16 gMacEncodings
[wxFONTENCODING_MACMAX
-wxFONTENCODING_MACMIN
+1][128] ;
50 bool gMacEncodingsInited
[wxFONTENCODING_MACMAX
-wxFONTENCODING_MACMIN
+1] ;
54 #include "wx/msw/wince/missing.h" // for bsearch()
57 static wxUint16
* GetEncTable(wxFontEncoding enc
)
60 if( enc
>= wxFONTENCODING_MACMIN
&& enc
<= wxFONTENCODING_MACMAX
)
62 int i
= enc
-wxFONTENCODING_MACMIN
;
63 if ( gMacEncodingsInited
[i
] == false )
65 TECObjectRef converter
;
66 TextEncodingBase code
= wxMacGetSystemEncFromFontEnc( enc
) ;
67 TextEncodingBase unicode
= CreateTextEncoding(kTextEncodingUnicodeDefault
,0,kUnicode16BitFormat
) ;
68 OSStatus status
= TECCreateConverter(&converter
,code
,unicode
);
71 ByteCount byteInLen
, byteOutLen
;
72 for( unsigned char c
= 255 ; c
>= 128 ; --c
)
75 status
= TECConvertText(converter
, (ConstTextPtr
) &s
, 1, &byteInLen
,
76 (TextPtr
) &gMacEncodings
[i
][c
-128] , 2, &byteOutLen
);
78 status
= TECDisposeConverter(converter
);
79 gMacEncodingsInited
[i
]=true;
81 return gMacEncodings
[i
] ;
85 for (int i
= 0; encodings_list
[i
].table
!= NULL
; i
++)
87 if (encodings_list
[i
].encoding
== enc
)
88 return encodings_list
[i
].table
;
98 extern "C" int wxCMPFUNC_CONV
99 CompareCharsetItems(const void *i1
, const void *i2
)
101 return ( ((CharsetItem
*)i1
) -> u
- ((CharsetItem
*)i2
) -> u
);
105 static CharsetItem
* BuildReverseTable(wxUint16
*tbl
)
107 CharsetItem
*rev
= new CharsetItem
[128];
109 for (int i
= 0; i
< 128; i
++)
110 rev
[i
].c
= wxUint8(128 + i
), rev
[i
].u
= tbl
[i
];
112 qsort(rev
, 128, sizeof(CharsetItem
), CompareCharsetItems
);
119 wxEncodingConverter::wxEncodingConverter()
122 m_UnicodeInput
= m_UnicodeOutput
= false;
128 bool wxEncodingConverter::Init(wxFontEncoding input_enc
, wxFontEncoding output_enc
, int method
)
131 wxUint16
*in_tbl
, *out_tbl
= NULL
;
133 if (m_Table
) {delete[] m_Table
; m_Table
= NULL
;}
136 if (input_enc
== wxFONTENCODING_UNICODE
|| output_enc
== wxFONTENCODING_UNICODE
) return false;
139 if (input_enc
== output_enc
) {m_JustCopy
= true; return true;}
141 m_UnicodeOutput
= (output_enc
== wxFONTENCODING_UNICODE
);
144 if (input_enc
== wxFONTENCODING_UNICODE
)
146 if ((out_tbl
= GetEncTable(output_enc
)) == NULL
) return false;
148 m_Table
= new tchar
[65536];
149 for (i
= 0; i
< 128; i
++) m_Table
[i
] = (tchar
)i
; // 7bit ASCII
150 for (i
= 128; i
< 65536; i
++) m_Table
[i
] = (tchar
)0;
152 if (method
== wxCONVERT_SUBSTITUTE
)
154 for (i
= 0; i
< encoding_unicode_fallback_count
; i
++)
155 m_Table
[encoding_unicode_fallback
[i
].c
] = (tchar
) encoding_unicode_fallback
[i
].s
;
158 for (i
= 0; i
< 128; i
++)
159 m_Table
[out_tbl
[i
]] = (tchar
)(128 + i
);
161 m_UnicodeInput
= true;
163 else // input !Unicode
165 if ((in_tbl
= GetEncTable(input_enc
)) == NULL
) return false;
166 if (output_enc
!= wxFONTENCODING_UNICODE
)
167 if ((out_tbl
= GetEncTable(output_enc
)) == NULL
) return false;
169 m_UnicodeInput
= false;
171 m_Table
= new tchar
[256];
172 for (i
= 0; i
< 128; i
++) m_Table
[i
] = (tchar
)i
; // 7bit ASCII
174 if (output_enc
== wxFONTENCODING_UNICODE
)
176 for (i
= 0; i
< 128; i
++) m_Table
[128 + i
] = (tchar
)in_tbl
[i
];
179 else // output !Unicode
181 CharsetItem
*rev
= BuildReverseTable(out_tbl
);
185 for (i
= 0; i
< 128; i
++)
188 item
= (CharsetItem
*) bsearch(&key
, rev
, 128, sizeof(CharsetItem
), CompareCharsetItems
);
189 if (item
== NULL
&& method
== wxCONVERT_SUBSTITUTE
)
190 item
= (CharsetItem
*) bsearch(&key
, encoding_unicode_fallback
,
191 encoding_unicode_fallback_count
, sizeof(CharsetItem
), CompareCharsetItems
);
193 m_Table
[128 + i
] = (tchar
)item
-> c
;
196 m_Table
[128 + i
] = (wchar_t)(128 + i
);
198 m_Table
[128 + i
] = (char)(128 + i
);
210 #define REPLACEMENT_CHAR ((tchar)'?')
212 inline tchar
GetTableValue(const tchar
*table
, tchar value
, bool& repl
)
214 tchar r
= table
[value
];
215 if (r
== 0 && value
!= 0)
217 r
= REPLACEMENT_CHAR
;
224 bool wxEncodingConverter::Convert(const char* input
, char* output
) const
226 wxASSERT_MSG(!m_UnicodeOutput
, wxT("You cannot convert to unicode if output is const char*!"));
227 wxASSERT_MSG(!m_UnicodeInput
, wxT("You cannot convert from unicode if input is const char*!"));
234 strcpy(output
, input
);
238 wxCHECK_MSG(m_Table
!= NULL
, false,
239 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
241 bool replaced
= false;
243 for (i
= input
, o
= output
; *i
!= 0;)
244 *(o
++) = (char)(GetTableValue(m_Table
, (wxUint8
)*(i
++), replaced
));
253 bool wxEncodingConverter::Convert(const char* input
, wchar_t* output
) const
255 wxASSERT_MSG(m_UnicodeOutput
, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
256 wxASSERT_MSG(!m_UnicodeInput
, wxT("You cannot convert from unicode if input is const char*!"));
263 for (i
= input
, o
= output
; *i
!= 0;)
264 *(o
++) = (wchar_t)(*(i
++));
269 wxCHECK_MSG(m_Table
!= NULL
, false,
270 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
272 bool replaced
= false;
274 for (i
= input
, o
= output
; *i
!= 0;)
275 *(o
++) = (wchar_t)(GetTableValue(m_Table
, (wxUint8
)*(i
++), replaced
));
283 bool wxEncodingConverter::Convert(const wchar_t* input
, char* output
) const
285 wxASSERT_MSG(!m_UnicodeOutput
, wxT("You cannot convert to unicode if output is const char*!"));
286 wxASSERT_MSG(m_UnicodeInput
, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
293 for (i
= input
, o
= output
; *i
!= 0;)
294 *(o
++) = (char)(*(i
++));
299 wxCHECK_MSG(m_Table
!= NULL
, false,
300 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
302 bool replaced
= false;
304 for (i
= input
, o
= output
; *i
!= 0;)
305 *(o
++) = (char)(GetTableValue(m_Table
, (wxUint16
)*(i
++), replaced
));
313 bool wxEncodingConverter::Convert(const wchar_t* input
, wchar_t* output
) const
315 wxASSERT_MSG(m_UnicodeOutput
, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
316 wxASSERT_MSG(m_UnicodeInput
, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
323 // wcscpy() is not guaranteed to exist
324 for (i
= input
, o
= output
; *i
!= 0;)
330 wxCHECK_MSG(m_Table
!= NULL
, false,
331 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
333 bool replaced
= false;
335 for (i
= input
, o
= output
; *i
!= 0;)
336 *(o
++) = (wchar_t)(GetTableValue(m_Table
, (wxUint8
)*(i
++), replaced
));
342 #endif // wxUSE_WCHAR_T
345 wxString
wxEncodingConverter::Convert(const wxString
& input
) const
347 if (m_JustCopy
) return input
;
352 wxCHECK_MSG(m_Table
!= NULL
, s
,
353 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
357 for (i
= input
.c_str(); *i
!= 0; i
++)
358 s
<< (wxChar
)(m_Table
[(wxUint16
)*i
]);
362 for (i
= input
.c_str(); *i
!= 0; i
++)
363 s
<< (wxChar
)(m_Table
[(wxUint8
)*i
]);
375 // Following tables describe classes of encoding equivalence.
378 #define STOP wxFONTENCODING_SYSTEM
380 #define NUM_OF_PLATFORMS 4 /*must conform to enum wxPLATFORM_XXXX !!!*/
381 #define ENC_PER_PLATFORM 5
382 // max no. of encodings for one language used on one platform
383 // Anybody thinks 5 is not enough? ;-)
385 static wxFontEncoding
386 EquivalentEncodings
[][NUM_OF_PLATFORMS
][ENC_PER_PLATFORM
+1] = {
388 // *** Please put more common encodings as first! ***
392 /* unix */ {wxFONTENCODING_ISO8859_1
, wxFONTENCODING_ISO8859_15
, STOP
},
393 /* windows */ {wxFONTENCODING_CP1252
, STOP
},
395 /* mac */ {wxFONTENCODING_MACROMAN
, STOP
}
400 /* unix */ {wxFONTENCODING_ISO8859_2
, STOP
},
401 /* windows */ {wxFONTENCODING_CP1250
, STOP
},
403 /* mac */ {wxFONTENCODING_MACCENTRALEUR
, STOP
}
408 /* unix */ {wxFONTENCODING_ISO8859_13
, wxFONTENCODING_ISO8859_4
, STOP
},
409 /* windows */ {wxFONTENCODING_CP1257
, STOP
},
416 /* unix */ {wxFONTENCODING_ISO8859_8
, STOP
},
417 /* windows */ {wxFONTENCODING_CP1255
, STOP
},
419 /* mac */ {wxFONTENCODING_MACHEBREW
, STOP
}
424 /* unix */ {wxFONTENCODING_ISO8859_7
, STOP
},
425 /* windows */ {wxFONTENCODING_CP1253
, STOP
},
427 /* mac */ {wxFONTENCODING_MACGREEK
, STOP
}
432 /* unix */ {wxFONTENCODING_ISO8859_6
, STOP
},
433 /* windows */ {wxFONTENCODING_CP1256
, STOP
},
435 /* mac */ {wxFONTENCODING_MACARABIC
, STOP
}
440 /* unix */ {wxFONTENCODING_ISO8859_9
, STOP
},
441 /* windows */ {wxFONTENCODING_CP1254
, STOP
},
443 /* mac */ {wxFONTENCODING_MACTURKISH
, STOP
}
448 /* unix */ {wxFONTENCODING_KOI8
, wxFONTENCODING_KOI8_U
, wxFONTENCODING_ISO8859_5
, STOP
},
449 /* windows */ {wxFONTENCODING_CP1251
, STOP
},
451 /* mac */ {wxFONTENCODING_MACCYRILLIC
, STOP
}
454 {{STOP
},{STOP
},{STOP
},{STOP
}} /* Terminator */
455 /* no, _not_ Arnold! */
459 static bool FindEncoding(const wxFontEncodingArray
& arr
, wxFontEncoding f
)
461 for (wxFontEncodingArray::const_iterator it
= arr
.begin(), en
= arr
.end();
468 wxFontEncodingArray
wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc
, int platform
)
470 if (platform
== wxPLATFORM_CURRENT
)
472 #if defined(__WXMSW__)
473 platform
= wxPLATFORM_WINDOWS
;
474 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
475 platform
= wxPLATFORM_UNIX
;
476 #elif defined(__WXOS2__)
477 platform
= wxPLATFORM_OS2
;
478 #elif defined(__WXMAC__)
479 platform
= wxPLATFORM_MAC
;
485 wxFontEncodingArray arr
;
488 while (EquivalentEncodings
[clas
][0][0] != STOP
)
490 for (i
= 0; i
< NUM_OF_PLATFORMS
; i
++)
491 for (e
= 0; EquivalentEncodings
[clas
][i
][e
] != STOP
; e
++)
492 if (EquivalentEncodings
[clas
][i
][e
] == enc
)
494 for (f
= EquivalentEncodings
[clas
][platform
]; *f
!= STOP
; f
++)
495 if (*f
== enc
) arr
.push_back(enc
);
496 for (f
= EquivalentEncodings
[clas
][platform
]; *f
!= STOP
; f
++)
497 if (!FindEncoding(arr
, *f
)) arr
.push_back(*f
);
498 i
= NUM_OF_PLATFORMS
/*hack*/; break;
508 wxFontEncodingArray
wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc
)
512 wxFontEncodingArray arr
;
514 arr
= GetPlatformEquivalents(enc
); // we want them to be first items in array
517 while (EquivalentEncodings
[clas
][0][0] != STOP
)
519 for (i
= 0; i
< NUM_OF_PLATFORMS
; i
++)
520 for (e
= 0; EquivalentEncodings
[clas
][i
][e
] != STOP
; e
++)
521 if (EquivalentEncodings
[clas
][i
][e
] == enc
)
523 for (j
= 0; j
< NUM_OF_PLATFORMS
; j
++)
524 for (f
= EquivalentEncodings
[clas
][j
]; *f
!= STOP
; f
++)
525 if (!FindEncoding(arr
, *f
)) arr
.push_back(*f
);
526 i
= NUM_OF_PLATFORMS
/*hack*/; break;