1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: wxEncodingConverter class for converting between different
5 // Author: Vaclav Slavik
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
10 // For compilers that support precompilation, includes "wx.h".
11 #include "wx/wxprec.h"
17 #include "wx/encconv.h"
21 // conversion tables, generated by scripts in $(WXWIN)/misc/unictabl:
22 #if defined( __BORLANDC__ ) || defined(__DARWIN__)
23 #include "../common/unictabl.inc"
25 #include "unictabl.inc"
29 #include "wx/osx/core/cfstring.h"
30 #include <CoreFoundation/CFStringEncodingExt.h>
32 wxUint16 gMacEncodings
[wxFONTENCODING_MACMAX
-wxFONTENCODING_MACMIN
+1][128] ;
33 bool gMacEncodingsInited
[wxFONTENCODING_MACMAX
-wxFONTENCODING_MACMIN
+1] ;
37 #include "wx/msw/wince/missing.h" // for bsearch()
40 static const wxUint16
* GetEncTable(wxFontEncoding enc
)
43 if( enc
>= wxFONTENCODING_MACMIN
&& enc
<= wxFONTENCODING_MACMAX
)
45 int i
= enc
-wxFONTENCODING_MACMIN
;
46 if ( gMacEncodingsInited
[i
] == false )
49 CFStringEncoding cfencoding
= wxMacGetSystemEncFromFontEnc( enc
) ;
50 if( !CFStringIsEncodingAvailable( cfencoding
) )
53 memset( gMacEncodings
[i
] , 0 , 128 * 2 );
54 char s
[2] = { 0 , 0 };
55 CFRange firstchar
= CFRangeMake( 0, 1 );
56 for( unsigned char c
= 255 ; c
>= 128 ; --c
)
59 wxCFStringRef
cfref( CFStringCreateWithCStringNoCopy( NULL
, s
, cfencoding
, kCFAllocatorNull
) );
60 CFStringGetCharacters( cfref
, firstchar
, (UniChar
*) &gMacEncodings
[i
][c
-128] );
62 gMacEncodingsInited
[i
]=true;
64 return gMacEncodings
[i
] ;
68 for (int i
= 0; encodings_list
[i
].table
!= NULL
; i
++)
70 if (encodings_list
[i
].encoding
== enc
)
71 return encodings_list
[i
].table
;
83 static int wxCMPFUNC_CONV
84 CompareCharsetItems(const void *i1
, const void *i2
)
86 return ( ((CharsetItem
*)i1
) -> u
- ((CharsetItem
*)i2
) -> u
);
90 static CharsetItem
* BuildReverseTable(const wxUint16
*tbl
)
92 CharsetItem
*rev
= new CharsetItem
[128];
94 for (int i
= 0; i
< 128; i
++)
95 rev
[i
].c
= wxUint8(128 + i
), rev
[i
].u
= tbl
[i
];
97 qsort(rev
, 128, sizeof(CharsetItem
), CompareCharsetItems
);
104 wxEncodingConverter::wxEncodingConverter()
107 m_UnicodeInput
= m_UnicodeOutput
= false;
113 bool wxEncodingConverter::Init(wxFontEncoding input_enc
, wxFontEncoding output_enc
, int method
)
116 const wxUint16
*in_tbl
;
117 const wxUint16
*out_tbl
= NULL
;
121 if (input_enc
== output_enc
) {m_JustCopy
= true; return true;}
123 m_UnicodeOutput
= (output_enc
== wxFONTENCODING_UNICODE
);
126 if (input_enc
== wxFONTENCODING_UNICODE
)
128 if ((out_tbl
= GetEncTable(output_enc
)) == NULL
) return false;
130 m_Table
= new wchar_t[65536];
131 for (i
= 0; i
< 128; i
++) m_Table
[i
] = (wchar_t)i
; // 7bit ASCII
132 for (i
= 128; i
< 65536; i
++) m_Table
[i
] = (wchar_t)0;
134 if (method
== wxCONVERT_SUBSTITUTE
)
136 for (i
= 0; i
< encoding_unicode_fallback_count
; i
++)
137 m_Table
[encoding_unicode_fallback
[i
].c
] = (wchar_t) encoding_unicode_fallback
[i
].s
;
140 for (i
= 0; i
< 128; i
++)
141 m_Table
[out_tbl
[i
]] = (wchar_t)(128 + i
);
143 m_UnicodeInput
= true;
145 else // input !Unicode
147 if ((in_tbl
= GetEncTable(input_enc
)) == NULL
) return false;
148 if (output_enc
!= wxFONTENCODING_UNICODE
)
149 if ((out_tbl
= GetEncTable(output_enc
)) == NULL
) return false;
151 m_UnicodeInput
= false;
153 m_Table
= new wchar_t[256];
154 for (i
= 0; i
< 128; i
++) m_Table
[i
] = (wchar_t)i
; // 7bit ASCII
156 if (output_enc
== wxFONTENCODING_UNICODE
)
158 for (i
= 0; i
< 128; i
++) m_Table
[128 + i
] = (wchar_t)in_tbl
[i
];
161 else // output !Unicode
163 CharsetItem
*rev
= BuildReverseTable(out_tbl
);
167 for (i
= 0; i
< 128; i
++)
170 item
= (CharsetItem
*) bsearch(&key
, rev
, 128, sizeof(CharsetItem
), CompareCharsetItems
);
171 if (item
== NULL
&& method
== wxCONVERT_SUBSTITUTE
)
172 item
= (CharsetItem
*) bsearch(&key
, encoding_unicode_fallback
,
173 encoding_unicode_fallback_count
, sizeof(CharsetItem
), CompareCharsetItems
);
175 m_Table
[128 + i
] = (wchar_t)item
-> c
;
177 m_Table
[128 + i
] = (wchar_t)(128 + i
);
188 #define REPLACEMENT_CHAR (L'?')
190 inline wchar_t GetTableValue(const wchar_t *table
, wchar_t value
, bool& repl
)
192 wchar_t r
= table
[value
];
193 if (r
== 0 && value
!= 0)
195 r
= REPLACEMENT_CHAR
;
202 bool wxEncodingConverter::Convert(const char* input
, char* output
) const
204 wxASSERT_MSG(!m_UnicodeOutput
, wxT("You cannot convert to unicode if output is const char*!"));
205 wxASSERT_MSG(!m_UnicodeInput
, wxT("You cannot convert from unicode if input is const char*!"));
212 strcpy(output
, input
);
216 wxCHECK_MSG(m_Table
!= NULL
, false,
217 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
219 bool replaced
= false;
221 for (i
= input
, o
= output
; *i
!= 0;)
222 *(o
++) = (char)(GetTableValue(m_Table
, (wxUint8
)*(i
++), replaced
));
229 bool wxEncodingConverter::Convert(const char* input
, wchar_t* output
) const
231 wxASSERT_MSG(m_UnicodeOutput
, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
232 wxASSERT_MSG(!m_UnicodeInput
, wxT("You cannot convert from unicode if input is const char*!"));
239 for (i
= input
, o
= output
; *i
!= 0;)
240 *(o
++) = (wchar_t)(*(i
++));
245 wxCHECK_MSG(m_Table
!= NULL
, false,
246 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
248 bool replaced
= false;
250 for (i
= input
, o
= output
; *i
!= 0;)
251 *(o
++) = (wchar_t)(GetTableValue(m_Table
, (wxUint8
)*(i
++), replaced
));
259 bool wxEncodingConverter::Convert(const wchar_t* input
, char* output
) const
261 wxASSERT_MSG(!m_UnicodeOutput
, wxT("You cannot convert to unicode if output is const char*!"));
262 wxASSERT_MSG(m_UnicodeInput
, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
269 for (i
= input
, o
= output
; *i
!= 0;)
270 *(o
++) = (char)(*(i
++));
275 wxCHECK_MSG(m_Table
!= NULL
, false,
276 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
278 bool replaced
= false;
280 for (i
= input
, o
= output
; *i
!= 0;)
281 *(o
++) = (char)(GetTableValue(m_Table
, (wxUint16
)*(i
++), replaced
));
289 bool wxEncodingConverter::Convert(const wchar_t* input
, wchar_t* output
) const
291 wxASSERT_MSG(m_UnicodeOutput
, wxT("You cannot convert to 8-bit if output is const wchar_t*!"));
292 wxASSERT_MSG(m_UnicodeInput
, wxT("You cannot convert from 8-bit if input is const wchar_t*!"));
299 // wcscpy() is not guaranteed to exist
300 for (i
= input
, o
= output
; *i
!= 0;)
306 wxCHECK_MSG(m_Table
!= NULL
, false,
307 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
309 bool replaced
= false;
311 for (i
= input
, o
= output
; *i
!= 0;)
312 *(o
++) = (wchar_t)(GetTableValue(m_Table
, (wxUint8
)*(i
++), replaced
));
319 wxString
wxEncodingConverter::Convert(const wxString
& input
) const
321 if (m_JustCopy
) return input
;
326 wxCHECK_MSG(m_Table
!= NULL
, s
,
327 wxT("You must call wxEncodingConverter::Init() before actually converting!"));
331 for (i
= input
.c_str(); *i
!= 0; i
++)
332 s
<< (wxChar
)(m_Table
[(wxUint16
)*i
]);
336 for (i
= input
.c_str(); *i
!= 0; i
++)
337 s
<< (wxChar
)(m_Table
[(wxUint8
)*i
]);
349 // Following tables describe classes of encoding equivalence.
352 #define STOP wxFONTENCODING_SYSTEM
354 #define NUM_OF_PLATFORMS 4 /*must conform to enum wxPLATFORM_XXXX !!!*/
355 #define ENC_PER_PLATFORM 3
356 // max no. of encodings for one language used on one platform.
357 // Using maximum of everything at the current moment to not make the
358 // library larger than necessary. Make larger only if necessary - MR
360 static const wxFontEncoding
361 EquivalentEncodings
[][NUM_OF_PLATFORMS
][ENC_PER_PLATFORM
+1] = {
363 // *** Please put more common encodings as first! ***
367 /* unix */ {wxFONTENCODING_ISO8859_1
, wxFONTENCODING_ISO8859_15
, STOP
},
368 /* windows */ {wxFONTENCODING_CP1252
, STOP
},
370 /* mac */ {wxFONTENCODING_MACROMAN
, STOP
}
375 /* unix */ {wxFONTENCODING_ISO8859_2
, STOP
},
376 /* windows */ {wxFONTENCODING_CP1250
, STOP
},
378 /* mac */ {wxFONTENCODING_MACCENTRALEUR
, STOP
}
383 /* unix */ {wxFONTENCODING_ISO8859_13
, wxFONTENCODING_ISO8859_4
, STOP
},
384 /* windows */ {wxFONTENCODING_CP1257
, STOP
},
391 /* unix */ {wxFONTENCODING_ISO8859_8
, STOP
},
392 /* windows */ {wxFONTENCODING_CP1255
, STOP
},
394 /* mac */ {wxFONTENCODING_MACHEBREW
, STOP
}
399 /* unix */ {wxFONTENCODING_ISO8859_7
, STOP
},
400 /* windows */ {wxFONTENCODING_CP1253
, STOP
},
402 /* mac */ {wxFONTENCODING_MACGREEK
, STOP
}
407 /* unix */ {wxFONTENCODING_ISO8859_6
, STOP
},
408 /* windows */ {wxFONTENCODING_CP1256
, STOP
},
410 /* mac */ {wxFONTENCODING_MACARABIC
, STOP
}
415 /* unix */ {wxFONTENCODING_ISO8859_9
, STOP
},
416 /* windows */ {wxFONTENCODING_CP1254
, STOP
},
418 /* mac */ {wxFONTENCODING_MACTURKISH
, STOP
}
423 /* unix */ {wxFONTENCODING_KOI8
, wxFONTENCODING_KOI8_U
, wxFONTENCODING_ISO8859_5
, STOP
},
424 /* windows */ {wxFONTENCODING_CP1251
, STOP
},
426 /* mac */ {wxFONTENCODING_MACCYRILLIC
, STOP
}
429 {{STOP
},{STOP
},{STOP
},{STOP
}} /* Terminator */
430 /* no, _not_ Arnold! */
434 static bool FindEncoding(const wxFontEncodingArray
& arr
, wxFontEncoding f
)
436 for (wxFontEncodingArray::const_iterator it
= arr
.begin(), en
= arr
.end();
443 wxFontEncodingArray
wxEncodingConverter::GetPlatformEquivalents(wxFontEncoding enc
, int platform
)
445 if (platform
== wxPLATFORM_CURRENT
)
447 #if defined(__WXMSW__)
448 platform
= wxPLATFORM_WINDOWS
;
449 #elif defined(__WXGTK__) || defined(__WXMOTIF__)
450 platform
= wxPLATFORM_UNIX
;
451 #elif defined(__WXPM__)
452 platform
= wxPLATFORM_OS2
;
453 #elif defined(__WXMAC__)
454 platform
= wxPLATFORM_MAC
;
459 const wxFontEncoding
*f
;
460 wxFontEncodingArray arr
;
463 while (EquivalentEncodings
[clas
][0][0] != STOP
)
465 for (i
= 0; i
< NUM_OF_PLATFORMS
; i
++)
466 for (e
= 0; EquivalentEncodings
[clas
][i
][e
] != STOP
; e
++)
467 if (EquivalentEncodings
[clas
][i
][e
] == enc
)
469 for (f
= EquivalentEncodings
[clas
][platform
]; *f
!= STOP
; f
++)
470 if (*f
== enc
) arr
.push_back(enc
);
471 for (f
= EquivalentEncodings
[clas
][platform
]; *f
!= STOP
; f
++)
472 if (!FindEncoding(arr
, *f
)) arr
.push_back(*f
);
473 i
= NUM_OF_PLATFORMS
/*hack*/; break;
483 wxFontEncodingArray
wxEncodingConverter::GetAllEquivalents(wxFontEncoding enc
)
486 const wxFontEncoding
*f
;
487 wxFontEncodingArray arr
;
489 arr
= GetPlatformEquivalents(enc
); // we want them to be first items in array
492 while (EquivalentEncodings
[clas
][0][0] != STOP
)
494 for (i
= 0; i
< NUM_OF_PLATFORMS
; i
++)
495 for (e
= 0; EquivalentEncodings
[clas
][i
][e
] != STOP
; e
++)
496 if (EquivalentEncodings
[clas
][i
][e
] == enc
)
498 for (j
= 0; j
< NUM_OF_PLATFORMS
; j
++)
499 for (f
= EquivalentEncodings
[clas
][j
]; *f
!= STOP
; f
++)
500 if (!FindEncoding(arr
, *f
)) arr
.push_back(*f
);
501 i
= NUM_OF_PLATFORMS
/*hack*/; break;