From 874dbd3a4a318bd7f34548b84cfe0aed9efa42d5 Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Sat, 28 Nov 2009 14:37:03 +0000 Subject: [PATCH] Allow calling wxStrchr() with a narrow string and wide character. Calls to wxStrchr(char-string, wide-char) would previously fail if wide character couldn't be converted to a single character in the current locale encoding. Change it to simply return NULL in this case as it's a safe and useful generalization: a narrow string will never contain a wide character not representable in the current locale. Add wxUniChar::GetAsChar() to help with implementing this. Closes #11487. git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@62738 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- include/wx/unichar.h | 25 +++++++++++++++++++++- include/wx/wxcrt.h | 48 +++++++++++++++++++++--------------------- interface/wx/unichar.h | 24 +++++++++++++++++++++ src/common/unichar.cpp | 32 ++++++++++++++++------------ tests/strings/crt.cpp | 15 +++++++++++++ 5 files changed, 105 insertions(+), 39 deletions(-) diff --git a/include/wx/unichar.h b/include/wx/unichar.h index 9d0b593bd7..5d019f52ba 100644 --- a/include/wx/unichar.h +++ b/include/wx/unichar.h @@ -68,6 +68,27 @@ public: // Returns true if the character is an ASCII character: bool IsAscii() const { return m_value < 0x80; } + // Returns true if the character is representable as a single byte in the + // current locale encoding and return this byte in output argument c (which + // must be non-NULL) + bool GetAsChar(char *c) const + { +#if wxUSE_UNICODE + if ( !IsAscii() ) + { +#if !wxUSE_UTF8_LOCALE_ONLY + if ( GetAsHi8bit(m_value, c) ) + return true; +#endif // !wxUSE_UTF8_LOCALE_ONLY + + return false; + } +#endif // wxUSE_UNICODE + + *c = wx_truncate_cast(char, m_value); + return true; + } + // Conversions to char and wchar_t types: all of those are needed to be // able to pass wxUniChars to verious standard narrow and wide character // functions @@ -165,7 +186,8 @@ private: // helpers of the functions above called to deal with non-ASCII chars static value_type FromHi8bit(char c); - static char ToHi8bit(value_type c); + static char ToHi8bit(value_type v); + static bool GetAsHi8bit(value_type v, char *c); private: value_type m_value; @@ -209,6 +231,7 @@ public: #endif // wxUSE_UNICODE_UTF8 bool IsAscii() const { return UniChar().IsAscii(); } + bool GetAsChar(char *c) const { return UniChar().GetAsChar(c); } // Assignment operators: #if wxUSE_UNICODE_UTF8 diff --git a/include/wx/wxcrt.h b/include/wx/wxcrt.h index 2b8fb3e9aa..a348b14711 100644 --- a/include/wx/wxcrt.h +++ b/include/wx/wxcrt.h @@ -611,20 +611,20 @@ inline const char *wxStrrchr(const char *s, char c) { return wxCRT_StrrchrA(s, c); } inline const wchar_t *wxStrrchr(const wchar_t *s, wchar_t c) { return wxCRT_StrrchrW(s, c); } -inline const char *wxStrchr(const char *s, const wxUniChar& c) - { return wxCRT_StrchrA(s, (char)c); } +inline const char *wxStrchr(const char *s, const wxUniChar& uc) + { char c; return uc.GetAsChar(&c) ? wxCRT_StrchrA(s, c) : NULL; } inline const wchar_t *wxStrchr(const wchar_t *s, const wxUniChar& c) { return wxCRT_StrchrW(s, (wchar_t)c); } -inline const char *wxStrrchr(const char *s, const wxUniChar& c) - { return wxCRT_StrrchrA(s, (char)c); } +inline const char *wxStrrchr(const char *s, const wxUniChar& uc) + { char c; return uc.GetAsChar(&c) ? wxCRT_StrrchrA(s, c) : NULL; } inline const wchar_t *wxStrrchr(const wchar_t *s, const wxUniChar& c) { return wxCRT_StrrchrW(s, (wchar_t)c); } -inline const char *wxStrchr(const char *s, const wxUniCharRef& c) - { return wxCRT_StrchrA(s, (char)c); } +inline const char *wxStrchr(const char *s, const wxUniCharRef& uc) + { char c; return uc.GetAsChar(&c) ? wxCRT_StrchrA(s, c) : NULL; } inline const wchar_t *wxStrchr(const wchar_t *s, const wxUniCharRef& c) { return wxCRT_StrchrW(s, (wchar_t)c); } -inline const char *wxStrrchr(const char *s, const wxUniCharRef& c) - { return wxCRT_StrrchrA(s, (char)c); } +inline const char *wxStrrchr(const char *s, const wxUniCharRef& uc) + { char c; return uc.GetAsChar(&c) ? wxCRT_StrrchrA(s, c) : NULL; } inline const wchar_t *wxStrrchr(const wchar_t *s, const wxUniCharRef& c) { return wxCRT_StrrchrW(s, (wchar_t)c); } template @@ -655,14 +655,14 @@ inline const char* wxStrchr(const wxString& s, int c) { return wxCRT_StrchrA((const char*)s.c_str(), c); } inline const char* wxStrrchr(const wxString& s, int c) { return wxCRT_StrrchrA((const char*)s.c_str(), c); } -inline const char* wxStrchr(const wxString& s, const wxUniChar& c) - { return wxCRT_StrchrA((const char*)s.c_str(), (char)c); } -inline const char* wxStrrchr(const wxString& s, const wxUniChar& c) - { return wxCRT_StrrchrA((const char*)s.c_str(), (char)c); } -inline const char* wxStrchr(const wxString& s, const wxUniCharRef& c) - { return wxCRT_StrchrA((const char*)s.c_str(), (char)c); } -inline const char* wxStrrchr(const wxString& s, const wxUniCharRef& c) - { return wxCRT_StrrchrA((const char*)s.c_str(), (char)c); } +inline const char* wxStrchr(const wxString& s, const wxUniChar& uc) + { char c; return uc.GetAsChar(&c) ? wxCRT_StrchrA(s, c) : NULL; } +inline const char* wxStrrchr(const wxString& s, const wxUniChar& uc) + { char c; return uc.GetAsChar(&c) ? wxCRT_StrrchrA(s, c) : NULL; } +inline const char* wxStrchr(const wxString& s, const wxUniCharRef& uc) + { char c; return uc.GetAsChar(&c) ? wxCRT_StrchrA(s, c) : NULL; } +inline const char* wxStrrchr(const wxString& s, const wxUniCharRef& uc) + { char c; return uc.GetAsChar(&c) ? wxCRT_StrrchrA(s, c) : NULL; } inline const wchar_t* wxStrchr(const wxString& s, wchar_t c) { return wxCRT_StrchrW((const wchar_t*)s.c_str(), c); } inline const wchar_t* wxStrrchr(const wxString& s, wchar_t c) @@ -675,14 +675,14 @@ inline const char* wxStrchr(const wxCStrData& s, int c) { return wxCRT_StrchrA(s.AsChar(), c); } inline const char* wxStrrchr(const wxCStrData& s, int c) { return wxCRT_StrrchrA(s.AsChar(), c); } -inline const char* wxStrchr(const wxCStrData& s, const wxUniChar& c) - { return wxCRT_StrchrA(s.AsChar(), (char)c); } -inline const char* wxStrrchr(const wxCStrData& s, const wxUniChar& c) - { return wxCRT_StrrchrA(s.AsChar(), (char)c); } -inline const char* wxStrchr(const wxCStrData& s, const wxUniCharRef& c) - { return wxCRT_StrchrA(s.AsChar(), (char)c); } -inline const char* wxStrrchr(const wxCStrData& s, const wxUniCharRef& c) - { return wxCRT_StrrchrA(s.AsChar(), (char)c); } +inline const char* wxStrchr(const wxCStrData& s, const wxUniChar& uc) + { char c; return uc.GetAsChar(&c) ? wxCRT_StrchrA(s, c) : NULL; } +inline const char* wxStrrchr(const wxCStrData& s, const wxUniChar& uc) + { char c; return uc.GetAsChar(&c) ? wxCRT_StrrchrA(s, c) : NULL; } +inline const char* wxStrchr(const wxCStrData& s, const wxUniCharRef& uc) + { char c; return uc.GetAsChar(&c) ? wxCRT_StrchrA(s, c) : NULL; } +inline const char* wxStrrchr(const wxCStrData& s, const wxUniCharRef& uc) + { char c; return uc.GetAsChar(&c) ? wxCRT_StrrchrA(s, c) : NULL; } inline const wchar_t* wxStrchr(const wxCStrData& s, wchar_t c) { return wxCRT_StrchrW(s.AsWChar(), c); } inline const wchar_t* wxStrrchr(const wxCStrData& s, wchar_t c) diff --git a/interface/wx/unichar.h b/interface/wx/unichar.h index 41994dfde3..776c64486e 100644 --- a/interface/wx/unichar.h +++ b/interface/wx/unichar.h @@ -58,6 +58,30 @@ public: */ bool IsAscii() const; + /** + Returns true if the character is representable as a single byte in the + current locale encoding. + + This function only returns true if the character can be converted in + exactly one byte, e.g. it only returns true for 7 bit ASCII characters + when the encoding used is UTF-8. + + It is mostly useful to test if the character can be passed to functions + taking a char and is used by wxWidgets itself for this purpose. + + @param c + An output pointer to the value of this Unicode character as a @c + char. Must be non-@NULL. + @return + @true if the object is an 8 bit char and @a c was filled with its + value as char or @false otherwise (@a c won't be modified then). + + @see IsAscii() + + @since 2.9.1 + */ + bool GetAsChar(char *c) const; + //@{ /** Conversions to char and wchar_t types: all of those are needed to be diff --git a/src/common/unichar.cpp b/src/common/unichar.cpp index e1cd0bf418..645435641f 100644 --- a/src/common/unichar.cpp +++ b/src/common/unichar.cpp @@ -58,27 +58,31 @@ wxUniChar::value_type wxUniChar::FromHi8bit(char c) } /* static */ -char wxUniChar::ToHi8bit(wxUniChar::value_type c) +char wxUniChar::ToHi8bit(wxUniChar::value_type v) { -#if wxUSE_UTF8_LOCALE_ONLY - wxFAIL_MSG( "character cannot be converted to single UTF-8 byte" ); - wxUnusedVar(c); + char c; + if ( !GetAsHi8bit(v, &c) ) + { + wxFAIL_MSG( "character cannot be converted to single byte" ); + c = '?'; // FIXME-UTF8: what to use as failure character? + } - return '?'; // FIXME-UTF8: what to use as failure character? -#else + return c; +} + +/* static */ +bool wxUniChar::GetAsHi8bit(value_type v, char *c) +{ wchar_t wbuf[2]; - wbuf[0] = c; + wbuf[0] = v; wbuf[1] = L'\0'; char cbuf[2]; if ( wxConvLibc.FromWChar(cbuf, 2, wbuf, 2) != 2 ) - { - wxFAIL_MSG( "character cannot be converted to single byte" ); - return '?'; // FIXME-UTF8: what to use as failure character? - } - return cbuf[0]; -#endif -} + return false; + *c = cbuf[0]; + return true; +} // --------------------------------------------------------------------------- // wxUniCharRef diff --git a/tests/strings/crt.cpp b/tests/strings/crt.cpp index c0c6600a2f..e0623f6ac5 100644 --- a/tests/strings/crt.cpp +++ b/tests/strings/crt.cpp @@ -44,6 +44,7 @@ public: private: CPPUNIT_TEST_SUITE( CrtTestCase ); CPPUNIT_TEST( SetGetEnv ); + CPPUNIT_TEST( Strchr ); CPPUNIT_TEST( Strcmp ); CPPUNIT_TEST( Strspn ); CPPUNIT_TEST( Strcspn ); @@ -52,6 +53,7 @@ private: CPPUNIT_TEST_SUITE_END(); void SetGetEnv(); + void Strchr(); void Strcmp(); void Strspn(); void Strcspn(); @@ -89,6 +91,19 @@ void CrtTestCase::SetGetEnv() #undef TESTVAR_NAME } +void CrtTestCase::Strchr() +{ + // test that searching for a wide character in a narrow string simply + // doesn't find it but doesn't fail with an assert (#11487) + const wxUniChar smiley = *wxString::FromUTF8("\xe2\x98\xba").begin(); + + CPPUNIT_ASSERT( !wxStrchr("hello", smiley) ); + + // but searching for an explicitly wide character does find it + CPPUNIT_ASSERT( wxStrchr(wxString::FromUTF8(":-) == \xe2\x98\xba"), + static_cast(smiley)) ); +} + void CrtTestCase::Strcmp() { // this code tests if all possible ways of calling wxStrcmp() compile: -- 2.45.2