X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/817270659e986de1b243586d8eb6ad3a76c87480..fb2281054c60ade3dcd7bf26cc896dd4d7451674:/src/common/strvararg.cpp diff --git a/src/common/strvararg.cpp b/src/common/strvararg.cpp index 81288c6f89..a4c2256dfb 100644 --- a/src/common/strvararg.cpp +++ b/src/common/strvararg.cpp @@ -24,116 +24,693 @@ #endif #include "wx/strvararg.h" -#include "wx/buffer.h" -#include "wx/strconv.h" #include "wx/string.h" +#include "wx/crt.h" +#include "wx/private/wxprintf.h" // ============================================================================ // implementation // ============================================================================ -const wxChar *wxArgNormalizer::get() const -{ - // FIXME-UTF8: use some way that doesn't involve implicit conversion, - // so that we deallocate any converted buffer immediately; - // can't use AsString() because it returns wxString and not - // const wxString&, unfortunately; use As[W]CharBuf() when - // available. - return m_value; -} +// ---------------------------------------------------------------------------- +// wxArgNormalizer<> +// ---------------------------------------------------------------------------- -const wxChar *wxArgNormalizer::get() const +const wxStringCharType *wxArgNormalizerNative::get() const { -#if wxUSE_UNICODE_UTF8 // FIXME-UTF8 - return (const wxChar*)m_value; -#else return m_value.wx_str(); -#endif } -#if wxUSE_UNICODE // FIXME-UTF8: should be wxUSE_UNICODE_WCHAR -wxArgNormalizer::wxArgNormalizer(const char *value) +const wxStringCharType *wxArgNormalizerNative::get() const { - // FIXME-UTF8: move this to the header so that m_value doesn't have - // to be dynamically allocated - m_value = new wxWCharBuffer(wxConvLibc.cMB2WC(value)); + return m_value.AsInternal(); } -wxArgNormalizer::~wxArgNormalizer() +#if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY +wxArgNormalizerWchar::wxArgNormalizerWchar( + const wxString& s, + const wxFormatString *fmt, unsigned index) + : wxArgNormalizerWithBuffer(s.wc_str(), fmt, index) { - delete m_value; } -const wchar_t *wxArgNormalizer::get() const +wxArgNormalizerWchar::wxArgNormalizerWchar( + const wxCStrData& s, + const wxFormatString *fmt, unsigned index) + : wxArgNormalizerWithBuffer(s.AsWCharBuf(), fmt, index) { - return m_value->data(); } -#endif // wxUSE_UNICODE_WCHAR +#endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY +// ---------------------------------------------------------------------------- +// wxArgNormalizedString +// ---------------------------------------------------------------------------- -#if /*wxUSE_UNICODE_UTF8 ||*/ !wxUSE_UNICODE // FIXME-UTF8 -wxArgNormalizer::wxArgNormalizer(const wchar_t *value) +wxString wxArgNormalizedString::GetString() const { -#if wxUSE_UNICODE_UTF8 // FIXME-UTF8: this will be the only case - m_value = new wxCharBuffer(wxConvUTF8.cWC2MB(value)); + if ( !IsValid() ) + return wxEmptyString; + +#if wxUSE_UTF8_LOCALE_ONLY + return wxString(reinterpret_cast(m_ptr)); #else - m_value = new wxCharBuffer(wxConvLibc.cWC2MB(value)); -#endif + #if wxUSE_UNICODE_UTF8 + if ( wxLocaleIsUtf8 ) + return wxString(reinterpret_cast(m_ptr)); + else + #endif + return wxString(reinterpret_cast(m_ptr)); +#endif // !wxUSE_UTF8_LOCALE_ONLY } -wxArgNormalizer::~wxArgNormalizer() +wxArgNormalizedString::operator wxString() const { - delete m_value; + return GetString(); } -const char *wxArgNormalizer::get() const +// ---------------------------------------------------------------------------- +// wxFormatConverter: class doing the "%s" and "%c" normalization +// ---------------------------------------------------------------------------- + +/* + There are four problems with wxPrintf() etc. format strings: + + 1) The printf vararg macros convert all forms of strings into + wxStringCharType* representation. This may make the format string + incorrect: for example, if %ls was used together with a wchar_t* + variadic argument, this would no longer work, because the templates + would change wchar_t* argument to wxStringCharType* and %ls would now + be incorrect in e.g. UTF-8 build. We need make sure only one specifier + form is used. + + 2) To complicate matters further, the meaning of %s and %c is different + under Windows and on Unix. The Windows/MS convention is as follows: + + In ANSI mode: + + format specifier results in + ----------------------------------- + %s, %hs, %hS char* + %ls, %S, %lS wchar_t* + + In Unicode mode: + + format specifier results in + ----------------------------------- + %hs, %S, %hS char* + %s, %ls, %lS wchar_t* + + (While on POSIX systems we have %C identical to %lc and %c always means + char (in any mode) while %lc always means wchar_t.) + + In other words, we should _only_ use %s on Windows and %ls on Unix for + wxUSE_UNICODE_WCHAR build. + + 3) To make things even worse, we need two forms in UTF-8 build: one for + passing strings to ANSI functions under UTF-8 locales (this one should + use %s) and one for widechar functions used under non-UTF-8 locales + (this one should use %ls). + + And, of course, the same should be done for %c as well. + + + wxScanf() family of functions is simpler, because we don't normalize their + variadic arguments and we only have to handle 2) above and only for widechar + versions. +*/ + +template +class wxFormatConverterBase { - return m_value->data(); +public: + typedef T CharType; + + wxFormatConverterBase() + { + m_fmtOrig = NULL; + m_fmtLast = NULL; + m_nCopied = 0; + } + + wxScopedCharTypeBuffer Convert(const CharType *format) + { + // this is reset to NULL if we modify the format string + m_fmtOrig = format; + + while ( *format ) + { + if ( CopyFmtChar(*format++) == wxT('%') ) + { + // skip any flags + while ( IsFlagChar(*format) ) + CopyFmtChar(*format++); + + // and possible width + if ( *format == wxT('*') ) + CopyFmtChar(*format++); + else + SkipDigits(&format); + + // precision? + if ( *format == wxT('.') ) + { + CopyFmtChar(*format++); + if ( *format == wxT('*') ) + CopyFmtChar(*format++); + else + SkipDigits(&format); + } + + // next we can have a size modifier + SizeModifier size; + + switch ( *format ) + { + case 'h': + size = Size_Short; + format++; + break; + + case 'l': + // "ll" has a different meaning! + if ( format[1] != 'l' ) + { + size = Size_Long; + format++; + break; + } + //else: fall through + + default: + size = Size_Default; + } + + CharType outConv = *format; + SizeModifier outSize = size; + + // and finally we should have the type + switch ( *format ) + { + case wxT('S'): + case wxT('s'): + // all strings were converted into the same form by + // wxArgNormalizer, this form depends on the context + // in which the value is used (scanf/printf/wprintf): + HandleString(*format, size, outConv, outSize); + break; + + case wxT('C'): + case wxT('c'): + HandleChar(*format, size, outConv, outSize); + break; + + default: + // nothing special to do + break; + } + + if ( outConv == *format && outSize == size ) // no change + { + if ( size != Size_Default ) + CopyFmtChar(*(format - 1)); + CopyFmtChar(*format); + } + else // something changed + { + switch ( outSize ) + { + case Size_Long: + InsertFmtChar(wxT('l')); + break; + + case Size_Short: + InsertFmtChar(wxT('h')); + break; + + case Size_Default: + // nothing to do + break; + } + InsertFmtChar(outConv); + } + + format++; + } + } + + // notice that we only translated the string if m_fmtOrig == NULL (as + // set by CopyAllBefore()), otherwise we should simply use the original + // format + if ( m_fmtOrig ) + { + return wxScopedCharTypeBuffer::CreateNonOwned(m_fmtOrig); + } + else + { + // shrink converted format string to actual size (instead of + // over-sized allocation from CopyAllBefore()) and NUL-terminate + // it: + m_fmt.shrink(m_fmtLast - m_fmt.data()); + return m_fmt; + } + } + + virtual ~wxFormatConverterBase() {} + +protected: + enum SizeModifier + { + Size_Default, + Size_Short, + Size_Long + }; + + // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's' + // respectively), 'size' is the preceding size modifier; the new values of + // conversion and size specifiers must be written to outConv and outSize + virtual void HandleString(CharType conv, SizeModifier size, + CharType& outConv, SizeModifier& outSize) = 0; + + // ditto for %C or %c + virtual void HandleChar(CharType conv, SizeModifier size, + CharType& outConv, SizeModifier& outSize) = 0; + +private: + // copy another character to the translated format: this function does the + // copy if we are translating but doesn't do anything at all if we don't, + // so we don't create the translated format string at all unless we really + // need to (i.e. InsertFmtChar() is called) + CharType CopyFmtChar(CharType ch) + { + if ( !m_fmtOrig ) + { + // we're translating, do copy + *(m_fmtLast++) = ch; + } + else + { + // simply increase the count which should be copied by + // CopyAllBefore() later if needed + m_nCopied++; + } + + return ch; + } + + // insert an extra character + void InsertFmtChar(CharType ch) + { + if ( m_fmtOrig ) + { + // so far we haven't translated anything yet + CopyAllBefore(); + } + + *(m_fmtLast++) = ch; + } + + void CopyAllBefore() + { + wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" ); + + // the modified format string is guaranteed to be no longer than + // 3/2 of the original (worst case: the entire format string consists + // of "%s" repeated and is expanded to "%ls" on Unix), so we can + // allocate the buffer now and not worry about running out of space if + // we over-allocate a bit: + size_t fmtLen = wxStrlen(m_fmtOrig); + // worst case is of even length, so there's no rounding error in *3/2: + m_fmt.extend(fmtLen * 3 / 2); + + if ( m_nCopied > 0 ) + wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied); + m_fmtLast = m_fmt.data() + m_nCopied; + + // we won't need it any longer and resetting it also indicates that we + // modified the format + m_fmtOrig = NULL; + } + + static bool IsFlagChar(CharType ch) + { + return ch == wxT('-') || ch == wxT('+') || + ch == wxT('0') || ch == wxT(' ') || ch == wxT('#'); + } + + void SkipDigits(const CharType **ptpc) + { + while ( **ptpc >= wxT('0') && **ptpc <= wxT('9') ) + CopyFmtChar(*(*ptpc)++); + } + + // the translated format + wxCharTypeBuffer m_fmt; + CharType *m_fmtLast; + + // the original format + const CharType *m_fmtOrig; + + // the number of characters already copied (i.e. already parsed, but left + // unmodified) + size_t m_nCopied; +}; + +#if defined(__WINDOWS__) && !defined(__CYGWIN__) + +// on Windows, we should use %s and %c regardless of the build: +class wxPrintfFormatConverterWchar : public wxFormatConverterBase +{ + virtual void HandleString(CharType WXUNUSED(conv), + SizeModifier WXUNUSED(size), + CharType& outConv, SizeModifier& outSize) + { + outConv = 's'; + outSize = Size_Default; + } + + virtual void HandleChar(CharType WXUNUSED(conv), + SizeModifier WXUNUSED(size), + CharType& outConv, SizeModifier& outSize) + { + outConv = 'c'; + outSize = Size_Default; + } +}; + +#else // !__WINDOWS__ + +// on Unix, it's %s for ANSI functions and %ls for widechar: + +#if !wxUSE_UTF8_LOCALE_ONLY +class wxPrintfFormatConverterWchar : public wxFormatConverterBase +{ + virtual void HandleString(CharType WXUNUSED(conv), + SizeModifier WXUNUSED(size), + CharType& outConv, SizeModifier& outSize) + { + outConv = 's'; + outSize = Size_Long; + } + + virtual void HandleChar(CharType WXUNUSED(conv), + SizeModifier WXUNUSED(size), + CharType& outConv, SizeModifier& outSize) + { + outConv = 'c'; + outSize = Size_Long; + } +}; +#endif // !wxUSE_UTF8_LOCALE_ONLY + +#if wxUSE_UNICODE_UTF8 +class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase +{ + virtual void HandleString(CharType WXUNUSED(conv), + SizeModifier WXUNUSED(size), + CharType& outConv, SizeModifier& outSize) + { + outConv = 's'; + outSize = Size_Default; + } + + virtual void HandleChar(CharType WXUNUSED(conv), + SizeModifier WXUNUSED(size), + CharType& outConv, SizeModifier& outSize) + { + // chars are represented using wchar_t in both builds, so this is + // the same as above + outConv = 'c'; + outSize = Size_Long; + } +}; +#endif // wxUSE_UNICODE_UTF8 + +#endif // __WINDOWS__/!__WINDOWS__ + +#if !wxUSE_UNICODE // FIXME-UTF8: remove +class wxPrintfFormatConverterANSI : public wxFormatConverterBase +{ + virtual void HandleString(CharType WXUNUSED(conv), + SizeModifier WXUNUSED(size), + CharType& outConv, SizeModifier& outSize) + { + outConv = 's'; + outSize = Size_Default; + } + + virtual void HandleChar(CharType WXUNUSED(conv), + SizeModifier WXUNUSED(size), + CharType& outConv, SizeModifier& outSize) + { + outConv = 'c'; + outSize = Size_Default; + } +}; +#endif // ANSI + +#ifndef __WINDOWS__ +/* + + wxScanf() format translation is different, we need to translate %s to %ls + and %c to %lc on Unix (but not Windows and for widechar functions only!). + + So to use native functions in order to get our semantics we must do the + following translations in Unicode mode: + + wxWidgets specifier POSIX specifier + ---------------------------------------- + + %hc, %C, %hC %c + %c %lc + + */ +class wxScanfFormatConverterWchar : public wxFormatConverterBase +{ + virtual void HandleString(CharType conv, SizeModifier size, + CharType& outConv, SizeModifier& outSize) + { + outConv = 's'; + outSize = GetOutSize(conv == 'S', size); + } + + virtual void HandleChar(CharType conv, SizeModifier size, + CharType& outConv, SizeModifier& outSize) + { + outConv = 'c'; + outSize = GetOutSize(conv == 'C', size); + } + + SizeModifier GetOutSize(bool convIsUpper, SizeModifier size) + { + // %S and %hS -> %s and %lS -> %ls + if ( convIsUpper ) + { + if ( size == Size_Long ) + return Size_Long; + else + return Size_Default; + } + else // %s or %c + { + if ( size == Size_Default ) + return Size_Long; + else + return size; + } + } +}; + +const wxScopedWCharBuffer wxScanfConvertFormatW(const wchar_t *format) +{ + return wxScanfFormatConverterWchar().Convert(format); +} +#endif // !__WINDOWS__ + + +// ---------------------------------------------------------------------------- +// wxFormatString +// ---------------------------------------------------------------------------- + +#if !wxUSE_UNICODE_WCHAR +const char* wxFormatString::InputAsChar() +{ + if ( m_char ) + return m_char.data(); + + // in ANSI build, wx_str() returns char*, in UTF-8 build, this function + // is only called under UTF-8 locales, so we should return UTF-8 string, + // which is, again, what wx_str() returns: + if ( m_str ) + return m_str->wx_str(); + + // ditto wxCStrData: + if ( m_cstr ) + return m_cstr->AsInternal(); + + // the last case is that wide string was passed in: in that case, we need + // to convert it: + wxASSERT( m_wchar ); + + m_char = wxConvLibc.cWC2MB(m_wchar.data()); + + return m_char.data(); } -#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE -#if 0 // wxUSE_UNICODE_UTF8 - FIXME-UTF8 -wxArgNormalizer::wxArgNormalizer(const char *value) +const char* wxFormatString::AsChar() { - // FIXME-UTF8: move this to the header so that m_value doesn't have - // to be dynamically allocated - // FIXME-UTF8: optimize this if current locale is UTF-8 one + if ( !m_convertedChar ) +#if !wxUSE_UNICODE // FIXME-UTF8: remove this + m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar()); +#else + m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar()); +#endif - // convert to widechar string first: - wxWCharBuffer buf(wxConvLibc.cMB2WC(value)); + return m_convertedChar.data(); +} +#endif // !wxUSE_UNICODE_WCHAR + +#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY +const wchar_t* wxFormatString::InputAsWChar() +{ + if ( m_wchar ) + return m_wchar.data(); - if ( buf ) +#if wxUSE_UNICODE_WCHAR + if ( m_str ) + return m_str->wc_str(); + if ( m_cstr ) + return m_cstr->AsInternal(); +#else // wxUSE_UNICODE_UTF8 + if ( m_str ) { - // then to UTF-8: - m_value = new wxCharBuffer(wxConvUTF8.cWC2MB(value)); + m_wchar = m_str->wc_str(); + return m_wchar.data(); } - else + if ( m_cstr ) { - m_value = new wxCharBuffer(); + m_wchar = m_cstr->AsWCharBuf(); + return m_wchar.data(); } +#endif // wxUSE_UNICODE_WCHAR/UTF8 + + // the last case is that narrow string was passed in: in that case, we need + // to convert it: + wxASSERT( m_char ); + + m_wchar = wxConvLibc.cMB2WC(m_char.data()); + + return m_wchar.data(); } -wxArgNormalizer::~wxArgNormalizer() +const wchar_t* wxFormatString::AsWChar() { - delete m_value; + if ( !m_convertedWChar ) + m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar()); + + return m_convertedWChar.data(); } +#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY -const char *wxArgNormalizer::get() const +wxString wxFormatString::InputAsString() const { - return m_value->data(); + if ( m_str ) + return *m_str; + if ( m_cstr ) + return m_cstr->AsString(); + if ( m_wchar ) + return wxString(m_wchar); + if ( m_char ) + return wxString(m_char); + + wxFAIL_MSG( "invalid wxFormatString - not initialized?" ); + return wxString(); } -#endif // wxUSE_UNICODE_UTF8 +// ---------------------------------------------------------------------------- +// wxFormatString::GetArgumentType() +// ---------------------------------------------------------------------------- +namespace +{ -// FIXME-UTF8: move this to the header once it's possible to include buffer.h -// without including wxcrt.h -wxArgNormalizer::wxArgNormalizer(const wxCharBuffer& buf) - : wxArgNormalizer(buf.data()) +template +wxFormatString::ArgumentType DoGetArgumentType(const CharType *format, + unsigned n) { + wxCHECK_MSG( format, wxFormatString::Arg_Unknown, + "empty format string not allowed here" ); + + wxPrintfConvSpecParser parser(format); + + wxCHECK_MSG( n <= parser.nargs, wxFormatString::Arg_Unknown, + "more arguments than format string specifiers?" ); + + wxCHECK_MSG( parser.pspec[n-1] != NULL, wxFormatString::Arg_Unknown, + "requested argument not found - invalid format string?" ); + + switch ( parser.pspec[n-1]->m_type ) + { + case wxPAT_CHAR: + case wxPAT_WCHAR: + return wxFormatString::Arg_Char; + + case wxPAT_PCHAR: + case wxPAT_PWCHAR: + return wxFormatString::Arg_String; + + case wxPAT_INT: + return wxFormatString::Arg_Int; + case wxPAT_LONGINT: + return wxFormatString::Arg_LongInt; +#ifdef wxLongLong_t + case wxPAT_LONGLONGINT: + return wxFormatString::Arg_LongLongInt; +#endif + case wxPAT_SIZET: + return wxFormatString::Arg_Size_t; + + case wxPAT_DOUBLE: + return wxFormatString::Arg_Double; + case wxPAT_LONGDOUBLE: + return wxFormatString::Arg_LongDouble; + + case wxPAT_POINTER: + return wxFormatString::Arg_Pointer; + + case wxPAT_NINT: + return wxFormatString::Arg_IntPtr; + case wxPAT_NSHORTINT: + return wxFormatString::Arg_ShortIntPtr; + case wxPAT_NLONGINT: + return wxFormatString::Arg_LongIntPtr; + + case wxPAT_STAR: + // "*" requires argument of type int + return wxFormatString::Arg_Int; + + case wxPAT_INVALID: + // (handled after the switch statement) + break; + } + + // silence warning + wxFAIL_MSG( "unexpected argument type" ); + return wxFormatString::Arg_Unknown; } -wxArgNormalizer::wxArgNormalizer(const wxWCharBuffer& buf) - : wxArgNormalizer(buf.data()) +} // anonymous namespace + +wxFormatString::ArgumentType wxFormatString::GetArgumentType(unsigned n) const { + if ( m_char ) + return DoGetArgumentType(m_char.data(), n); + else if ( m_wchar ) + return DoGetArgumentType(m_wchar.data(), n); + else if ( m_str ) + return DoGetArgumentType(m_str->wx_str(), n); + else if ( m_cstr ) + return DoGetArgumentType(m_cstr->AsInternal(), n); + + wxFAIL_MSG( "unreachable code" ); + return Arg_Unknown; }