From: Vadim Zeitlin Date: Sun, 18 Sep 2005 14:01:14 +0000 (+0000) Subject: added GetAllEncodingNames(), use it to select the correct encoding name to pass to... X-Git-Url: https://git.saurik.com/wxWidgets.git/commitdiff_plain/8b3eb85d5ec8f68b168ae000bbbfa3af4b7fdb47 added GetAllEncodingNames(), use it to select the correct encoding name to pass to iconv_open() git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@35566 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- diff --git a/include/wx/fontmap.h b/include/wx/fontmap.h index d7555d3b57..d3b0461e72 100644 --- a/include/wx/fontmap.h +++ b/include/wx/fontmap.h @@ -90,10 +90,13 @@ public: // get the n-th supported encoding static wxFontEncoding GetEncoding(size_t n); - // return internal string identifier for the encoding (see also - // GetEncodingDescription()) + // return canonical name of this encoding (this is a short string, + // GetEncodingDescription() returns a longer one) static wxString GetEncodingName(wxFontEncoding encoding); + // return a list of all names of this encoding (see GetEncodingName) + static const wxChar** GetAllEncodingNames(wxFontEncoding encoding); + // return user-readable string describing the given encoding // // NB: hard-coded now, but might change later (read it from config?) diff --git a/src/common/fmapbase.cpp b/src/common/fmapbase.cpp index 6e8e9467aa..e5a48a1602 100644 --- a/src/common/fmapbase.cpp +++ b/src/common/fmapbase.cpp @@ -96,6 +96,10 @@ static wxFontEncoding gs_encodings[] = wxFONTENCODING_UTF32BE, wxFONTENCODING_UTF32LE, wxFONTENCODING_EUC_JP, + wxFONTENCODING_DEFAULT, + wxFONTENCODING_BIG5, + wxFONTENCODING_SHIFT_JIS, + wxFONTENCODING_GB2312, }; // the descriptions for them @@ -141,56 +145,74 @@ static const wxChar* gs_encodingDescs[] = wxTRANSLATE( "Unicode 32 bit Big Endian (UTF-32BE)" ), wxTRANSLATE( "Unicode 32 bit Little Endian (UTF-32LE)" ), wxTRANSLATE( "Extended Unix Codepage for Japanese (EUC-JP)" ), + wxTRANSLATE( "US-ASCII" ), + wxTRANSLATE( "BIG5" ), + wxTRANSLATE( "SHIFT-JIS" ), + wxTRANSLATE( "GB-2312" ), }; // and the internal names (these are not translated on purpose!) -static const wxChar* gs_encodingNames[] = +static const wxChar* gs_encodingNames[WXSIZEOF(gs_encodingDescs)][7] = { - wxT( "iso-8859-1" ), - wxT( "iso-8859-2" ), - wxT( "iso-8859-3" ), - wxT( "iso-8859-4" ), - wxT( "iso-8859-5" ), - wxT( "iso-8859-6" ), - wxT( "iso-8859-7" ), - wxT( "iso-8859-8" ), - wxT( "iso-8859-9" ), - wxT( "iso-8859-10" ), - wxT( "iso-8859-11" ), - wxT( "iso-8859-12" ), - wxT( "iso-8859-13" ), - wxT( "iso-8859-14" ), - wxT( "iso-8859-15" ), - wxT( "koi8-r" ), - wxT( "koi8-u" ), - wxT( "windows-874" ), - wxT( "windows-932" ), - wxT( "windows-936" ), - wxT( "windows-949" ), - wxT( "windows-950" ), - wxT( "windows-1250" ), - wxT( "windows-1251" ), - wxT( "windows-1252" ), - wxT( "windows-1253" ), - wxT( "windows-1254" ), - wxT( "windows-1255" ), - wxT( "windows-1256" ), - wxT( "windows-1257" ), - wxT( "windows-437" ), - wxT( "utf-7" ), - wxT( "utf-8" ), - wxT( "utf-16" ), - wxT( "utf-16be" ), - wxT( "utf-16le" ), - wxT( "utf-32" ), - wxT( "utf-32be" ), - wxT( "utf-32le" ), - wxT( "euc-jp" ), + { wxT( "iso-8859-1" ), NULL }, + { wxT( "iso-8859-2" ), NULL }, + { wxT( "iso-8859-3" ), NULL }, + { wxT( "iso-8859-4" ), NULL }, + { wxT( "iso-8859-5" ), NULL }, + { wxT( "iso-8859-6" ), NULL }, + { wxT( "iso-8859-7" ), NULL }, + { wxT( "iso-8859-8" ), NULL }, + { wxT( "iso-8859-9" ), NULL }, + { wxT( "iso-8859-10" ), NULL }, + { wxT( "iso-8859-11" ), NULL }, + { wxT( "iso-8859-12" ), NULL }, + { wxT( "iso-8859-13" ), NULL }, + { wxT( "iso-8859-14" ), NULL }, + { wxT( "iso-8859-15" ), NULL }, + // although koi8-ru is not strictly speaking the same as koi8-r, + // they are similar enough to make mapping it to koi8 better than + // not recognizing it at all + { wxT( "koi8-r" ), wxT( "koi8-ru" ), NULL }, + { wxT( "koi8-u" ), NULL }, + { wxT( "windows-874" ), NULL }, + { wxT( "windows-932" ), NULL }, + { wxT( "windows-936" ), NULL }, + { wxT( "windows-949" ), wxT( "euc-kr" ), + wxT( "euckr" ), wxT( "euc_kr" ), NULL }, + { wxT( "windows-950" ), NULL }, + { wxT( "windows-1250" ), NULL }, + { wxT( "windows-1251" ), NULL }, + { wxT( "windows-1252" ), NULL }, + { wxT( "windows-1253" ), NULL }, + { wxT( "windows-1254" ), NULL }, + { wxT( "windows-1255" ), NULL }, + { wxT( "windows-1256" ), NULL }, + { wxT( "windows-1257" ), NULL }, + { wxT( "windows-437" ), NULL }, + { wxT( "UTF-7" ), NULL }, + { wxT( "UTF-8" ), NULL }, + { wxT( "UTF-16" ), NULL }, + { wxT( "UTF-16be" ), NULL }, + { wxT( "UTF-16le" ), NULL }, + { wxT( "UTF-32" ), wxT( "UCS-4" ), NULL }, + { wxT( "UTF-32be" ), wxT( "UCS-4be" ), NULL }, + { wxT( "UTF-32le" ), wxT( "UCS-4le" ), NULL }, + { wxT( "euc-jp" ), wxT( "eucJP" ), wxT( "euc_jp" ), wxT( "IBM-eucJP" ), NULL }, + { wxT( "us-ascii" ), wxT( "ascii" ), wxT("ANSI_X3.4-1968"), +#ifdef __SOLARIS__ + wxT("646"), +#endif +#ifdef __HPUX__ + wxT("roman8"), +#endif + wxT( "" ), NULL }, + { wxT( "big5" ), NULL }, + { wxT( "shift-jis" ), wxT( "shift_jis" ), wxT( "sjis" ), NULL }, + { wxT( "gb2312" ), NULL }, }; -wxCOMPILE_TIME_ASSERT( WXSIZEOF(gs_encodingDescs) == WXSIZEOF(gs_encodings) && - WXSIZEOF(gs_encodingNames) == WXSIZEOF(gs_encodings), - EncodingsArraysNotInSync ); +wxCOMPILE_TIME_ASSERT( WXSIZEOF(gs_encodingDescs) == WXSIZEOF(gs_encodings), EncodingsArraysNotInSync ); +wxCOMPILE_TIME_ASSERT( WXSIZEOF(gs_encodingNames) == WXSIZEOF(gs_encodings), EncodingsArraysNotInSync ); // ---------------------------------------------------------------------------- // private classes @@ -468,82 +490,18 @@ wxFontMapperBase::NonInteractiveCharsetToEncoding(const wxString& charset) } } - cs.MakeUpper(); - - if ( cs.empty() || cs == _T("US-ASCII") ) - { - encoding = wxFONTENCODING_DEFAULT; - } - else if ( cs == wxT("UTF-7") ) - { - encoding = wxFONTENCODING_UTF7; - } - else if ( cs == wxT("UTF-8") ) - { - encoding = wxFONTENCODING_UTF8; - } - else if ( cs == wxT("UTF-16") ) - { - encoding = wxFONTENCODING_UTF16; - } - else if ( cs == wxT("UTF-16BE") ) - { - encoding = wxFONTENCODING_UTF16BE; - } - else if ( cs == wxT("UTF-16LE") ) - { - encoding = wxFONTENCODING_UTF16LE; - } - else if ( cs == wxT("UTF-32") || cs == wxT("UCS-4") ) - { - encoding = wxFONTENCODING_UTF32; - } - else if ( cs == wxT("UTF-32BE") || cs == wxT("UCS-4BE") ) - { - encoding = wxFONTENCODING_UTF32BE; - } - else if ( cs == wxT("UTF-32LE") || cs == wxT("UCS-4LE") ) - { - encoding = wxFONTENCODING_UTF32LE; - } - else if ( cs == wxT("GB2312") ) - { - encoding = wxFONTENCODING_GB2312; - } - else if ( cs == wxT("BIG5") ) - { - encoding = wxFONTENCODING_BIG5; - } - else if ( cs == wxT("SJIS") || - cs == wxT("SHIFT_JIS") || - cs == wxT("SHIFT-JIS") ) + for ( size_t i = 0; i < WXSIZEOF(gs_encodingNames); ++i ) { - encoding = wxFONTENCODING_SHIFT_JIS; - } - else if ( cs == wxT("EUC-JP") || - cs == wxT("EUC_JP") || - cs == wxT("EUCJP") ) - { - encoding = wxFONTENCODING_EUC_JP; - } - else if ( cs == wxT("EUC-KR") || - cs == wxT("EUC_KR") ) - { - encoding = wxFONTENCODING_CP949; - } - else if ( cs == wxT("KOI8-R") || - cs == wxT("KOI8-RU") ) - { - // although koi8-ru is not strictly speaking the same as koi8-r, - // they are similar enough to make mapping it to koi8 better than - // not recognizing it at all - encoding = wxFONTENCODING_KOI8; - } - else if ( cs == wxT("KOI8-U") ) - { - encoding = wxFONTENCODING_KOI8_U; + for ( const wxChar** encName = gs_encodingNames[i]; *encName; ++encName ) + { + if ( cs.CmpNoCase(*encName) == 0 ) + return gs_encodings[i]; + } } - else if ( cs.Left(3) == wxT("ISO") ) + + cs.MakeUpper(); + + if ( cs.Left(3) == wxT("ISO") ) { // the dash is optional (or, to be exact, it is not, but // several brokenmails "forget" it) @@ -712,7 +670,7 @@ wxString wxFontMapperBase::GetEncodingName(wxFontEncoding encoding) { if ( gs_encodings[i] == encoding ) { - return gs_encodingNames[i]; + return gs_encodingNames[i][0]; } } @@ -722,6 +680,22 @@ wxString wxFontMapperBase::GetEncodingName(wxFontEncoding encoding) return str; } +/* static */ +const wxChar** wxFontMapperBase::GetAllEncodingNames(wxFontEncoding encoding) +{ + static const wxChar* dummy[] = { NULL }; + + for ( size_t i = 0; i < WXSIZEOF(gs_encodingNames); i++ ) + { + if ( gs_encodings[i] == encoding ) + { + return gs_encodingNames[i]; + } + } + + return dummy; +} + /* static */ wxFontEncoding wxFontMapperBase::GetEncodingFromName(const wxString& name) { @@ -729,9 +703,10 @@ wxFontEncoding wxFontMapperBase::GetEncodingFromName(const wxString& name) for ( size_t i = 0; i < count; i++ ) { - if ( gs_encodingNames[i] == name ) + for ( const wxChar** encName = gs_encodingNames[i]; *encName; ++encName ) { - return gs_encodings[i]; + if ( name == *encName ) + return gs_encodings[i]; } } diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index 3154d94232..4de3f0927a 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -2509,6 +2509,15 @@ void wxCSConv::SetName(const wxChar *charset) } } +#if wxUSE_FONTMAP +#include "wx/hashmap.h" + +WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual, + wxEncodingNameCache ); + +static wxEncodingNameCache gs_nameCache; +#endif + wxMBConv *wxCSConv::DoCreate() const { // check for the special case of ASCII or ISO8859-1 charset: as we have @@ -2535,17 +2544,53 @@ wxMBConv *wxCSConv::DoCreate() const #endif // !wxUSE_FONTMAP { wxString name(m_name); + wxFontEncoding encoding(m_encoding); + + if ( !name.empty() ) + { + wxMBConv_iconv *conv = new wxMBConv_iconv(name); + if ( conv->IsOk() ) + return conv; + + delete conv; #if wxUSE_FONTMAP - if ( name.empty() ) - name = wxFontMapperBase::GetEncodingName(m_encoding); + encoding = + wxFontMapperBase::Get()->CharsetToEncoding(name, false); #endif // wxUSE_FONTMAP + } +#if wxUSE_FONTMAP + { + const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding); + if ( it != gs_nameCache.end() ) + { + if ( it->second.empty() ) + return NULL; - wxMBConv_iconv *conv = new wxMBConv_iconv(name); - if ( conv->IsOk() ) - return conv; + wxMBConv_iconv *conv = new wxMBConv_iconv(it->second); + if ( conv->IsOk() ) + return conv; - delete conv; + delete conv; + } + + const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding); + + for ( ; *names; ++names ) + { + wxMBConv_iconv *conv = new wxMBConv_iconv(*names); + if ( conv->IsOk() ) + { + gs_nameCache[encoding] = *names; + return conv; + } + + delete conv; + } + + gs_nameCache[encoding] = ""; // cache the failure + } +#endif // wxUSE_FONTMAP } #endif // HAVE_ICONV