src/common/strvararg.cpp

///////////////////////////////////////////////////////////////////////////////
// Name:        src/common/strvararg.cpp
// Purpose:     macros for implementing type-safe vararg passing of strings
// Author:      Vaclav Slavik
// Created:     2007-02-19
// RCS-ID:      $Id$
// Copyright:   (c) 2007 REA Elektronik GmbH
// Licence:     wxWindows licence
///////////////////////////////////////////////////////////////////////////////

// ============================================================================
// declarations
// ============================================================================

// ----------------------------------------------------------------------------
// headers
// ----------------------------------------------------------------------------

// for compilers that support precompilation, includes "wx.h".
#include "wx/wxprec.h"

#ifdef __BORLANDC__
    #pragma hdrstop
#endif

#include "wx/strvararg.h"
#include "wx/string.h"
#include "wx/crt.h"
#include "wx/private/wxprintf.h"

// ============================================================================
// implementation
// ============================================================================

// ----------------------------------------------------------------------------
// wxArgNormalizer<>
// ----------------------------------------------------------------------------

const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const
{
    return m_value.wx_str();
}

const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
{
    return m_value.AsInternal();
}

#if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(
                            const wxString& s,
                            const wxFormatString *fmt, unsigned index)
    : wxArgNormalizerWithBuffer<wchar_t>(s.wc_str(), fmt, index)
{
}

wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(
                            const wxCStrData& s,
                            const wxFormatString *fmt, unsigned index)
    : wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf(), fmt, index)
{
}
#endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY

// ----------------------------------------------------------------------------
// wxArgNormalizedString
// ----------------------------------------------------------------------------

wxString wxArgNormalizedString::GetString() const
{
    if ( !IsValid() )
        return wxEmptyString;

#if wxUSE_UTF8_LOCALE_ONLY
    return wxString(reinterpret_cast<const char*>(m_ptr));
#else
    #if wxUSE_UNICODE_UTF8
        if ( wxLocaleIsUtf8 )
            return wxString(reinterpret_cast<const char*>(m_ptr));
        else
    #endif
        return wxString(reinterpret_cast<const wxChar*>(m_ptr));
#endif // !wxUSE_UTF8_LOCALE_ONLY
}

wxArgNormalizedString::operator wxString() const
{
    return GetString();
}

// ----------------------------------------------------------------------------
// wxFormatConverter: class doing the "%s" and "%c" normalization
// ----------------------------------------------------------------------------

/*
   There are four problems with wxPrintf() etc. format strings:

   1) The printf vararg macros convert all forms of strings into
      wxStringCharType* representation. This may make the format string
      incorrect: for example, if %ls was used together with a wchar_t*
      variadic argument, this would no longer work, because the templates
      would change wchar_t* argument to wxStringCharType* and %ls would now
      be incorrect in e.g. UTF-8 build. We need make sure only one specifier
      form is used.

   2) To complicate matters further, the meaning of %s and %c is different
      under Windows and on Unix. The Windows/MS convention is as follows:

       In ANSI mode:

       format specifier         results in
       -----------------------------------
       %s, %hs, %hS             char*
       %ls, %S, %lS             wchar_t*

       In Unicode mode:

       format specifier         results in
       -----------------------------------
       %hs, %S, %hS             char*
       %s, %ls, %lS             wchar_t*

       (While on POSIX systems we have %C identical to %lc and %c always means
       char (in any mode) while %lc always means wchar_t.)

      In other words, we should _only_ use %s on Windows and %ls on Unix for
      wxUSE_UNICODE_WCHAR build.

   3) To make things even worse, we need two forms in UTF-8 build: one for
      passing strings to ANSI functions under UTF-8 locales (this one should
      use %s) and one for widechar functions used under non-UTF-8 locales
      (this one should use %ls).

   And, of course, the same should be done for %c as well.


   wxScanf() family of functions is simpler, because we don't normalize their
   variadic arguments and we only have to handle 2) above and only for widechar
   versions.
*/

template<typename T>
class wxFormatConverterBase
{
public:
    typedef T CharType;

    wxFormatConverterBase()
    {
        m_fmtOrig = NULL;
        m_fmtLast = NULL;
        m_nCopied = 0;
    }

    wxScopedCharTypeBuffer<CharType> Convert(const CharType *format)
    {
        // this is reset to NULL if we modify the format string
        m_fmtOrig = format;

        while ( *format )
        {
            if ( CopyFmtChar(*format++) == wxT('%') )
            {
#if wxUSE_PRINTF_POS_PARAMS
                if ( *format >= '0' && *format <= '9' )
                {
                    SkipDigits(&format);
                    if ( *format == '$' )
                    {
                        // It was a positional argument specification.
                        CopyFmtChar(*format++);
                    }
                    //else: it was a width specification, nothing else to do.
                }
#endif // wxUSE_PRINTF_POS_PARAMS

                // skip any flags
                while ( IsFlagChar(*format) )
                    CopyFmtChar(*format++);

                // and possible width
                if ( *format == wxT('*') )
                    CopyFmtChar(*format++);
                else
                    SkipDigits(&format);

                // precision?
                if ( *format == wxT('.') )
                {
                    CopyFmtChar(*format++);
                    if ( *format == wxT('*') )
                        CopyFmtChar(*format++);
                    else
                        SkipDigits(&format);
                }

                // next we can have a size modifier
                SizeModifier size;

                switch ( *format )
                {
                    case 'h':
                        size = Size_Short;
                        format++;
                        break;

                    case 'l':
                        // "ll" has a different meaning!
                        if ( format[1] != 'l' )
                        {
                            size = Size_Long;
                            format++;
                            break;
                        }
                        //else: fall through

                    default:
                        size = Size_Default;
                }

                CharType outConv = *format;
                SizeModifier outSize = size;

                // and finally we should have the type
                switch ( *format )
                {
                    case wxT('S'):
                    case wxT('s'):
                        // all strings were converted into the same form by
                        // wxArgNormalizer<T>, this form depends on the context
                        // in which the value is used (scanf/printf/wprintf):
                        HandleString(*format, size, outConv, outSize);
                        break;

                    case wxT('C'):
                    case wxT('c'):
                        HandleChar(*format, size, outConv, outSize);
                        break;

                    default:
                        // nothing special to do
                        break;
                }

                if ( outConv == *format && outSize == size ) // no change
                {
                    if ( size != Size_Default )
                        CopyFmtChar(*(format - 1));
                    CopyFmtChar(*format);
                }
                else // something changed
                {
                    switch ( outSize )
                    {
                        case Size_Long:
                            InsertFmtChar(wxT('l'));
                            break;

                        case Size_Short:
                            InsertFmtChar(wxT('h'));
                            break;

                        case Size_Default:
                            // nothing to do
                            break;
                    }
                    InsertFmtChar(outConv);
                }

                format++;
            }
        }

        // notice that we only translated the string if m_fmtOrig == NULL (as
        // set by CopyAllBefore()), otherwise we should simply use the original
        // format
        if ( m_fmtOrig )
        {
            return wxScopedCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig);
        }
        else
        {
            // shrink converted format string to actual size (instead of
            // over-sized allocation from CopyAllBefore()) and NUL-terminate
            // it:
            m_fmt.shrink(m_fmtLast - m_fmt.data());
            return m_fmt;
        }
    }

    virtual ~wxFormatConverterBase() {}

protected:
    enum SizeModifier
    {
        Size_Default,
        Size_Short,
        Size_Long
    };

    // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
    // respectively), 'size' is the preceding size modifier; the new values of
    // conversion and size specifiers must be written to outConv and outSize
    virtual void HandleString(CharType conv, SizeModifier size,
                              CharType& outConv, SizeModifier& outSize) = 0;

    // ditto for %C or %c
    virtual void HandleChar(CharType conv, SizeModifier size,
                            CharType& outConv, SizeModifier& outSize) = 0;

private:
    // copy another character to the translated format: this function does the
    // copy if we are translating but doesn't do anything at all if we don't,
    // so we don't create the translated format string at all unless we really
    // need to (i.e. InsertFmtChar() is called)
    CharType CopyFmtChar(CharType ch)
    {
        if ( !m_fmtOrig )
        {
            // we're translating, do copy
            *(m_fmtLast++) = ch;
        }
        else
        {
            // simply increase the count which should be copied by
            // CopyAllBefore() later if needed
            m_nCopied++;
        }

        return ch;
    }

    // insert an extra character
    void InsertFmtChar(CharType ch)
    {
        if ( m_fmtOrig )
        {
            // so far we haven't translated anything yet
            CopyAllBefore();
        }

        *(m_fmtLast++) = ch;
    }

    void CopyAllBefore()
    {
        wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" );

        // the modified format string is guaranteed to be no longer than
        // 3/2 of the original (worst case: the entire format string consists
        // of "%s" repeated and is expanded to "%ls" on Unix), so we can
        // allocate the buffer now and not worry about running out of space if
        // we over-allocate a bit:
        size_t fmtLen = wxStrlen(m_fmtOrig);
        // worst case is of even length, so there's no rounding error in *3/2:
        m_fmt.extend(fmtLen * 3 / 2);

        if ( m_nCopied > 0 )
            wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied);
        m_fmtLast = m_fmt.data() + m_nCopied;

        // we won't need it any longer and resetting it also indicates that we
        // modified the format
        m_fmtOrig = NULL;
    }

    static bool IsFlagChar(CharType ch)
    {
        return ch == wxT('-') || ch == wxT('+') ||
               ch == wxT('0') || ch == wxT(' ') || ch == wxT('#');
    }

    void SkipDigits(const CharType **ptpc)
    {
        while ( **ptpc >= wxT('0') && **ptpc <= wxT('9') )
            CopyFmtChar(*(*ptpc)++);
    }

    // the translated format
    wxCharTypeBuffer<CharType> m_fmt;
    CharType *m_fmtLast;

    // the original format
    const CharType *m_fmtOrig;

    // the number of characters already copied (i.e. already parsed, but left
    // unmodified)
    size_t m_nCopied;
};

#if defined(__WINDOWS__) && !defined(__CYGWIN__)

// on Windows, we should use %s and %c regardless of the build:
class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
{
    virtual void HandleString(CharType WXUNUSED(conv),
                              SizeModifier WXUNUSED(size),
                              CharType& outConv, SizeModifier& outSize)
    {
        outConv = 's';
        outSize = Size_Default;
    }

    virtual void HandleChar(CharType WXUNUSED(conv),
                            SizeModifier WXUNUSED(size),
                            CharType& outConv, SizeModifier& outSize)
    {
        outConv = 'c';
        outSize = Size_Default;
    }
};

#else // !__WINDOWS__

// on Unix, it's %s for ANSI functions and %ls for widechar:

#if !wxUSE_UTF8_LOCALE_ONLY
class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
{
    virtual void HandleString(CharType WXUNUSED(conv),
                              SizeModifier WXUNUSED(size),
                              CharType& outConv, SizeModifier& outSize)
    {
        outConv = 's';
        outSize = Size_Long;
    }

    virtual void HandleChar(CharType WXUNUSED(conv),
                            SizeModifier WXUNUSED(size),
                            CharType& outConv, SizeModifier& outSize)
    {
        outConv = 'c';
        outSize = Size_Long;
    }
};
#endif // !wxUSE_UTF8_LOCALE_ONLY

#if wxUSE_UNICODE_UTF8
class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char>
{
    virtual void HandleString(CharType WXUNUSED(conv),
                              SizeModifier WXUNUSED(size),
                              CharType& outConv, SizeModifier& outSize)
    {
        outConv = 's';
        outSize = Size_Default;
    }

    virtual void HandleChar(CharType WXUNUSED(conv),
                            SizeModifier WXUNUSED(size),
                            CharType& outConv, SizeModifier& outSize)
    {
        // chars are represented using wchar_t in both builds, so this is
        // the same as above
        outConv = 'c';
        outSize = Size_Long;
    }
};
#endif // wxUSE_UNICODE_UTF8

#endif // __WINDOWS__/!__WINDOWS__

#if !wxUSE_UNICODE // FIXME-UTF8: remove
class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char>
{
    virtual void HandleString(CharType WXUNUSED(conv),
                              SizeModifier WXUNUSED(size),
                              CharType& outConv, SizeModifier& outSize)
    {
        outConv = 's';
        outSize = Size_Default;
    }

    virtual void HandleChar(CharType WXUNUSED(conv),
                            SizeModifier WXUNUSED(size),
                            CharType& outConv, SizeModifier& outSize)
    {
        outConv = 'c';
        outSize = Size_Default;
    }
};
#endif // ANSI

#ifndef __WINDOWS__
/*

   wxScanf() format translation is different, we need to translate %s to %ls
   and %c to %lc on Unix (but not Windows and for widechar functions only!).

   So to use native functions in order to get our semantics we must do the
   following translations in Unicode mode:

   wxWidgets specifier      POSIX specifier
   ----------------------------------------

   %hc, %C, %hC             %c
   %c                       %lc

 */
class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
{
    virtual void HandleString(CharType conv, SizeModifier size,
                              CharType& outConv, SizeModifier& outSize)
    {
        outConv = 's';
        outSize = GetOutSize(conv == 'S', size);
    }

    virtual void HandleChar(CharType conv, SizeModifier size,
                            CharType& outConv, SizeModifier& outSize)
    {
        outConv = 'c';
        outSize = GetOutSize(conv == 'C', size);
    }

    SizeModifier GetOutSize(bool convIsUpper, SizeModifier size)
    {
        // %S and %hS -> %s and %lS -> %ls
        if ( convIsUpper )
        {
            if ( size == Size_Long )
                return Size_Long;
            else
                return Size_Default;
        }
        else // %s or %c
        {
            if ( size == Size_Default )
                return Size_Long;
            else
                return size;
        }
    }
};

const wxScopedWCharBuffer wxScanfConvertFormatW(const wchar_t *format)
{
    return wxScanfFormatConverterWchar().Convert(format);
}
#endif // !__WINDOWS__


// ----------------------------------------------------------------------------
// wxFormatString
// ----------------------------------------------------------------------------

#if !wxUSE_UNICODE_WCHAR
const char* wxFormatString::InputAsChar()
{
    if ( m_char )
        return m_char.data();

    // in ANSI build, wx_str() returns char*, in UTF-8 build, this function
    // is only called under UTF-8 locales, so we should return UTF-8 string,
    // which is, again, what wx_str() returns:
    if ( m_str )
        return m_str->wx_str();

    // ditto wxCStrData:
    if ( m_cstr )
        return m_cstr->AsInternal();

    // the last case is that wide string was passed in: in that case, we need
    // to convert it:
    wxASSERT( m_wchar );

    m_char = wxConvLibc.cWC2MB(m_wchar.data());

    return m_char.data();
}

const char* wxFormatString::AsChar()
{
    if ( !m_convertedChar )
#if !wxUSE_UNICODE // FIXME-UTF8: remove this
        m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar());
#else
        m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar());
#endif

    return m_convertedChar.data();
}
#endif // !wxUSE_UNICODE_WCHAR

#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
const wchar_t* wxFormatString::InputAsWChar()
{
    if ( m_wchar )
        return m_wchar.data();

#if wxUSE_UNICODE_WCHAR
    if ( m_str )
        return m_str->wc_str();
    if ( m_cstr )
        return m_cstr->AsInternal();
#else // wxUSE_UNICODE_UTF8
    if ( m_str )
    {
        m_wchar = m_str->wc_str();
        return m_wchar.data();
    }
    if ( m_cstr )
    {
        m_wchar = m_cstr->AsWCharBuf();
        return m_wchar.data();
    }
#endif // wxUSE_UNICODE_WCHAR/UTF8

    // the last case is that narrow string was passed in: in that case, we need
    // to convert it:
    wxASSERT( m_char );

    m_wchar = wxConvLibc.cMB2WC(m_char.data());

    return m_wchar.data();
}

const wchar_t* wxFormatString::AsWChar()
{
    if ( !m_convertedWChar )
        m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar());

    return m_convertedWChar.data();
}
#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY

wxString wxFormatString::InputAsString() const
{
    if ( m_str )
        return *m_str;
    if ( m_cstr )
        return m_cstr->AsString();
    if ( m_wchar )
        return wxString(m_wchar);
    if ( m_char )
        return wxString(m_char);

    wxFAIL_MSG( "invalid wxFormatString - not initialized?" );
    return wxString();
}

// ----------------------------------------------------------------------------
// wxFormatString::GetArgumentType()
// ----------------------------------------------------------------------------

namespace
{

template<typename CharType>
wxFormatString::ArgumentType DoGetArgumentType(const CharType *format,
                                               unsigned n)
{
    wxCHECK_MSG( format, wxFormatString::Arg_Unknown,
                 "empty format string not allowed here" );

    wxPrintfConvSpecParser<CharType> parser(format);

    wxCHECK_MSG( n <= parser.nargs, wxFormatString::Arg_Unknown,
                 "more arguments than format string specifiers?" );

    wxCHECK_MSG( parser.pspec[n-1] != NULL, wxFormatString::Arg_Unknown,
                 "requested argument not found - invalid format string?" );

    switch ( parser.pspec[n-1]->m_type )
    {
        case wxPAT_CHAR:
        case wxPAT_WCHAR:
            return wxFormatString::Arg_Char;

        case wxPAT_PCHAR:
        case wxPAT_PWCHAR:
            return wxFormatString::Arg_String;

        case wxPAT_INT:
            return wxFormatString::Arg_Int;
        case wxPAT_LONGINT:
            return wxFormatString::Arg_LongInt;
#ifdef wxLongLong_t
        case wxPAT_LONGLONGINT:
            return wxFormatString::Arg_LongLongInt;
#endif
        case wxPAT_SIZET:
            return wxFormatString::Arg_Size_t;

        case wxPAT_DOUBLE:
            return wxFormatString::Arg_Double;
        case wxPAT_LONGDOUBLE:
            return wxFormatString::Arg_LongDouble;

        case wxPAT_POINTER:
            return wxFormatString::Arg_Pointer;

        case wxPAT_NINT:
            return wxFormatString::Arg_IntPtr;
        case wxPAT_NSHORTINT:
            return wxFormatString::Arg_ShortIntPtr;
        case wxPAT_NLONGINT:
            return wxFormatString::Arg_LongIntPtr;

        case wxPAT_STAR:
            // "*" requires argument of type int
            return wxFormatString::Arg_Int;

        case wxPAT_INVALID:
            // (handled after the switch statement)
            break;
    }

    // silence warning
    wxFAIL_MSG( "unexpected argument type" );
    return wxFormatString::Arg_Unknown;
}

} // anonymous namespace

wxFormatString::ArgumentType wxFormatString::GetArgumentType(unsigned n) const
{
    if ( m_char )
        return DoGetArgumentType(m_char.data(), n);
    else if ( m_wchar )
        return DoGetArgumentType(m_wchar.data(), n);
    else if ( m_str )
        return DoGetArgumentType(m_str->wx_str(), n);
    else if ( m_cstr )
        return DoGetArgumentType(m_cstr->AsInternal(), n);

    wxFAIL_MSG( "unreachable code" );
    return Arg_Unknown;
}
Commit	Line	Data
	1	///////////////////////////////////////////////////////////////////////////////
	2	// Name: src/common/strvararg.cpp
	3	// Purpose: macros for implementing type-safe vararg passing of strings
	4	// Author: Vaclav Slavik
	5	// Created: 2007-02-19
	6	// RCS-ID: $Id$
	7	// Copyright: (c) 2007 REA Elektronik GmbH
	8	// Licence: wxWindows licence
	9	///////////////////////////////////////////////////////////////////////////////
	10
	11	// ============================================================================
	12	// declarations
	13	// ============================================================================
	14
	15	// ----------------------------------------------------------------------------
	16	// headers
	17	// ----------------------------------------------------------------------------
	18
	19	// for compilers that support precompilation, includes "wx.h".
	20	#include "wx/wxprec.h"
	21
	22	#ifdef __BORLANDC__
	23	#pragma hdrstop
	24	#endif
	25
	26	#include "wx/strvararg.h"
	27	#include "wx/string.h"
	28	#include "wx/crt.h"
	29	#include "wx/private/wxprintf.h"
	30
	31	// ============================================================================
	32	// implementation
	33	// ============================================================================
	34
	35	// ----------------------------------------------------------------------------
	36	// wxArgNormalizer<>
	37	// ----------------------------------------------------------------------------
	38
	39	const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const
	40	{
	41	return m_value.wx_str();
	42	}
	43
	44	const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
	45	{
	46	return m_value.AsInternal();
	47	}
	48
	49	#if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
	50	wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(
	51	const wxString& s,
	52	const wxFormatString *fmt, unsigned index)
	53	: wxArgNormalizerWithBuffer<wchar_t>(s.wc_str(), fmt, index)
	54	{
	55	}
	56
	57	wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(
	58	const wxCStrData& s,
	59	const wxFormatString *fmt, unsigned index)
	60	: wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf(), fmt, index)
	61	{
	62	}
	63	#endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
	64
	65	// ----------------------------------------------------------------------------
	66	// wxArgNormalizedString
	67	// ----------------------------------------------------------------------------
	68
	69	wxString wxArgNormalizedString::GetString() const
	70	{
	71	if ( !IsValid() )
	72	return wxEmptyString;
	73
	74	#if wxUSE_UTF8_LOCALE_ONLY
	75	return wxString(reinterpret_cast<const char*>(m_ptr));
	76	#else
	77	#if wxUSE_UNICODE_UTF8
	78	if ( wxLocaleIsUtf8 )
	79	return wxString(reinterpret_cast<const char*>(m_ptr));
	80	else
	81	#endif
	82	return wxString(reinterpret_cast<const wxChar*>(m_ptr));
	83	#endif // !wxUSE_UTF8_LOCALE_ONLY
	84	}
	85
	86	wxArgNormalizedString::operator wxString() const
	87	{
	88	return GetString();
	89	}
	90
	91	// ----------------------------------------------------------------------------
	92	// wxFormatConverter: class doing the "%s" and "%c" normalization
	93	// ----------------------------------------------------------------------------
	94
	95	/*
	96	There are four problems with wxPrintf() etc. format strings:
	97
	98	1) The printf vararg macros convert all forms of strings into
	99	wxStringCharType* representation. This may make the format string
	100	incorrect: for example, if %ls was used together with a wchar_t*
	101	variadic argument, this would no longer work, because the templates
	102	would change wchar_t* argument to wxStringCharType* and %ls would now
	103	be incorrect in e.g. UTF-8 build. We need make sure only one specifier
	104	form is used.
	105
	106	2) To complicate matters further, the meaning of %s and %c is different
	107	under Windows and on Unix. The Windows/MS convention is as follows:
	108
	109	In ANSI mode:
	110
	111	format specifier results in
	112	-----------------------------------
	113	%s, %hs, %hS char*
	114	%ls, %S, %lS wchar_t*
	115
	116	In Unicode mode:
	117
	118	format specifier results in
	119	-----------------------------------
	120	%hs, %S, %hS char*
	121	%s, %ls, %lS wchar_t*
	122
	123	(While on POSIX systems we have %C identical to %lc and %c always means
	124	char (in any mode) while %lc always means wchar_t.)
	125
	126	In other words, we should _only_ use %s on Windows and %ls on Unix for
	127	wxUSE_UNICODE_WCHAR build.
	128
	129	3) To make things even worse, we need two forms in UTF-8 build: one for
	130	passing strings to ANSI functions under UTF-8 locales (this one should
	131	use %s) and one for widechar functions used under non-UTF-8 locales
	132	(this one should use %ls).
	133
	134	And, of course, the same should be done for %c as well.
	135
	136
	137	wxScanf() family of functions is simpler, because we don't normalize their
	138	variadic arguments and we only have to handle 2) above and only for widechar
	139	versions.
	140	*/
	141
	142	template<typename T>
	143	class wxFormatConverterBase
	144	{
	145	public:
	146	typedef T CharType;
	147
	148	wxFormatConverterBase()
	149	{
	150	m_fmtOrig = NULL;
	151	m_fmtLast = NULL;
	152	m_nCopied = 0;
	153	}
	154
	155	wxScopedCharTypeBuffer<CharType> Convert(const CharType *format)
	156	{
	157	// this is reset to NULL if we modify the format string
	158	m_fmtOrig = format;
	159
	160	while ( *format )
	161	{
	162	if ( CopyFmtChar(*format++) == wxT('%') )
	163	{
	164	#if wxUSE_PRINTF_POS_PARAMS
	165	if ( format >= '0' && format <= '9' )
	166	{
	167	SkipDigits(&format);
	168	if ( *format == '$' )
	169	{
	170	// It was a positional argument specification.
	171	CopyFmtChar(*format++);
	172	}
	173	//else: it was a width specification, nothing else to do.
	174	}
	175	#endif // wxUSE_PRINTF_POS_PARAMS
	176
	177	// skip any flags
	178	while ( IsFlagChar(*format) )
	179	CopyFmtChar(*format++);
	180
	181	// and possible width
	182	if ( format == wxT('') )
	183	CopyFmtChar(*format++);
	184	else
	185	SkipDigits(&format);
	186
	187	// precision?
	188	if ( *format == wxT('.') )
	189	{
	190	CopyFmtChar(*format++);
	191	if ( format == wxT('') )
	192	CopyFmtChar(*format++);
	193	else
	194	SkipDigits(&format);
	195	}
	196
	197	// next we can have a size modifier
	198	SizeModifier size;
	199
	200	switch ( *format )
	201	{
	202	case 'h':
	203	size = Size_Short;
	204	format++;
	205	break;
	206
	207	case 'l':
	208	// "ll" has a different meaning!
	209	if ( format[1] != 'l' )
	210	{
	211	size = Size_Long;
	212	format++;
	213	break;
	214	}
	215	//else: fall through
	216
	217	default:
	218	size = Size_Default;
	219	}
	220
	221	CharType outConv = *format;
	222	SizeModifier outSize = size;
	223
	224	// and finally we should have the type
	225	switch ( *format )
	226	{
	227	case wxT('S'):
	228	case wxT('s'):
	229	// all strings were converted into the same form by
	230	// wxArgNormalizer<T>, this form depends on the context
	231	// in which the value is used (scanf/printf/wprintf):
	232	HandleString(*format, size, outConv, outSize);
	233	break;
	234
	235	case wxT('C'):
	236	case wxT('c'):
	237	HandleChar(*format, size, outConv, outSize);
	238	break;
	239
	240	default:
	241	// nothing special to do
	242	break;
	243	}
	244
	245	if ( outConv == *format && outSize == size ) // no change
	246	{
	247	if ( size != Size_Default )
	248	CopyFmtChar(*(format - 1));
	249	CopyFmtChar(*format);
	250	}
	251	else // something changed
	252	{
	253	switch ( outSize )
	254	{
	255	case Size_Long:
	256	InsertFmtChar(wxT('l'));
	257	break;
	258
	259	case Size_Short:
	260	InsertFmtChar(wxT('h'));
	261	break;
	262
	263	case Size_Default:
	264	// nothing to do
	265	break;
	266	}
	267	InsertFmtChar(outConv);
	268	}
	269
	270	format++;
	271	}
	272	}
	273
	274	// notice that we only translated the string if m_fmtOrig == NULL (as
	275	// set by CopyAllBefore()), otherwise we should simply use the original
	276	// format
	277	if ( m_fmtOrig )
	278	{
	279	return wxScopedCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig);
	280	}
	281	else
	282	{
	283	// shrink converted format string to actual size (instead of
	284	// over-sized allocation from CopyAllBefore()) and NUL-terminate
	285	// it:
	286	m_fmt.shrink(m_fmtLast - m_fmt.data());
	287	return m_fmt;
	288	}
	289	}
	290
	291	virtual ~wxFormatConverterBase() {}
	292
	293	protected:
	294	enum SizeModifier
	295	{
	296	Size_Default,
	297	Size_Short,
	298	Size_Long
	299	};
	300
	301	// called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
	302	// respectively), 'size' is the preceding size modifier; the new values of
	303	// conversion and size specifiers must be written to outConv and outSize
	304	virtual void HandleString(CharType conv, SizeModifier size,
	305	CharType& outConv, SizeModifier& outSize) = 0;
	306
	307	// ditto for %C or %c
	308	virtual void HandleChar(CharType conv, SizeModifier size,
	309	CharType& outConv, SizeModifier& outSize) = 0;
	310
	311	private:
	312	// copy another character to the translated format: this function does the
	313	// copy if we are translating but doesn't do anything at all if we don't,
	314	// so we don't create the translated format string at all unless we really
	315	// need to (i.e. InsertFmtChar() is called)
	316	CharType CopyFmtChar(CharType ch)
	317	{
	318	if ( !m_fmtOrig )
	319	{
	320	// we're translating, do copy
	321	*(m_fmtLast++) = ch;
	322	}
	323	else
	324	{
	325	// simply increase the count which should be copied by
	326	// CopyAllBefore() later if needed
	327	m_nCopied++;
	328	}
	329
	330	return ch;
	331	}
	332
	333	// insert an extra character
	334	void InsertFmtChar(CharType ch)
	335	{
	336	if ( m_fmtOrig )
	337	{
	338	// so far we haven't translated anything yet
	339	CopyAllBefore();
	340	}
	341
	342	*(m_fmtLast++) = ch;
	343	}
	344
	345	void CopyAllBefore()
	346	{
	347	wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" );
	348
	349	// the modified format string is guaranteed to be no longer than
	350	// 3/2 of the original (worst case: the entire format string consists
	351	// of "%s" repeated and is expanded to "%ls" on Unix), so we can
	352	// allocate the buffer now and not worry about running out of space if
	353	// we over-allocate a bit:
	354	size_t fmtLen = wxStrlen(m_fmtOrig);
	355	// worst case is of even length, so there's no rounding error in *3/2:
	356	m_fmt.extend(fmtLen * 3 / 2);
	357
	358	if ( m_nCopied > 0 )
	359	wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied);
	360	m_fmtLast = m_fmt.data() + m_nCopied;
	361
	362	// we won't need it any longer and resetting it also indicates that we
	363	// modified the format
	364	m_fmtOrig = NULL;
	365	}
	366
	367	static bool IsFlagChar(CharType ch)
	368	{
	369	return ch == wxT('-') \|\| ch == wxT('+') \|\|
	370	ch == wxT('0') \|\| ch == wxT(' ') \|\| ch == wxT('#');
	371	}
	372
	373	void SkipDigits(const CharType **ptpc)
	374	{
	375	while ( ptpc >= wxT('0') && ptpc <= wxT('9') )
	376	CopyFmtChar((ptpc)++);
	377	}
	378
	379	// the translated format
	380	wxCharTypeBuffer<CharType> m_fmt;
	381	CharType *m_fmtLast;
	382
	383	// the original format
	384	const CharType *m_fmtOrig;
	385
	386	// the number of characters already copied (i.e. already parsed, but left
	387	// unmodified)
	388	size_t m_nCopied;
	389	};
	390
	391	#if defined(__WINDOWS__) && !defined(__CYGWIN__)
	392
	393	// on Windows, we should use %s and %c regardless of the build:
	394	class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
	395	{
	396	virtual void HandleString(CharType WXUNUSED(conv),
	397	SizeModifier WXUNUSED(size),
	398	CharType& outConv, SizeModifier& outSize)
	399	{
	400	outConv = 's';
	401	outSize = Size_Default;
	402	}
	403
	404	virtual void HandleChar(CharType WXUNUSED(conv),
	405	SizeModifier WXUNUSED(size),
	406	CharType& outConv, SizeModifier& outSize)
	407	{
	408	outConv = 'c';
	409	outSize = Size_Default;
	410	}
	411	};
	412
	413	#else // !__WINDOWS__
	414
	415	// on Unix, it's %s for ANSI functions and %ls for widechar:
	416
	417	#if !wxUSE_UTF8_LOCALE_ONLY
	418	class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
	419	{
	420	virtual void HandleString(CharType WXUNUSED(conv),
	421	SizeModifier WXUNUSED(size),
	422	CharType& outConv, SizeModifier& outSize)
	423	{
	424	outConv = 's';
	425	outSize = Size_Long;
	426	}
	427
	428	virtual void HandleChar(CharType WXUNUSED(conv),
	429	SizeModifier WXUNUSED(size),
	430	CharType& outConv, SizeModifier& outSize)
	431	{
	432	outConv = 'c';
	433	outSize = Size_Long;
	434	}
	435	};
	436	#endif // !wxUSE_UTF8_LOCALE_ONLY
	437
	438	#if wxUSE_UNICODE_UTF8
	439	class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char>
	440	{
	441	virtual void HandleString(CharType WXUNUSED(conv),
	442	SizeModifier WXUNUSED(size),
	443	CharType& outConv, SizeModifier& outSize)
	444	{
	445	outConv = 's';
	446	outSize = Size_Default;
	447	}
	448
	449	virtual void HandleChar(CharType WXUNUSED(conv),
	450	SizeModifier WXUNUSED(size),
	451	CharType& outConv, SizeModifier& outSize)
	452	{
	453	// chars are represented using wchar_t in both builds, so this is
	454	// the same as above
	455	outConv = 'c';
	456	outSize = Size_Long;
	457	}
	458	};
	459	#endif // wxUSE_UNICODE_UTF8
	460
	461	#endif // __WINDOWS__/!__WINDOWS__
	462
	463	#if !wxUSE_UNICODE // FIXME-UTF8: remove
	464	class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char>
	465	{
	466	virtual void HandleString(CharType WXUNUSED(conv),
	467	SizeModifier WXUNUSED(size),
	468	CharType& outConv, SizeModifier& outSize)
	469	{
	470	outConv = 's';
	471	outSize = Size_Default;
	472	}
	473
	474	virtual void HandleChar(CharType WXUNUSED(conv),
	475	SizeModifier WXUNUSED(size),
	476	CharType& outConv, SizeModifier& outSize)
	477	{
	478	outConv = 'c';
	479	outSize = Size_Default;
	480	}
	481	};
	482	#endif // ANSI
	483
	484	#ifndef __WINDOWS__
	485	/*
	486
	487	wxScanf() format translation is different, we need to translate %s to %ls
	488	and %c to %lc on Unix (but not Windows and for widechar functions only!).
	489
	490	So to use native functions in order to get our semantics we must do the
	491	following translations in Unicode mode:
	492
	493	wxWidgets specifier POSIX specifier
	494	----------------------------------------
	495
	496	%hc, %C, %hC %c
	497	%c %lc
	498
	499	*/
	500	class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
	501	{
	502	virtual void HandleString(CharType conv, SizeModifier size,
	503	CharType& outConv, SizeModifier& outSize)
	504	{
	505	outConv = 's';
	506	outSize = GetOutSize(conv == 'S', size);
	507	}
	508
	509	virtual void HandleChar(CharType conv, SizeModifier size,
	510	CharType& outConv, SizeModifier& outSize)
	511	{
	512	outConv = 'c';
	513	outSize = GetOutSize(conv == 'C', size);
	514	}
	515
	516	SizeModifier GetOutSize(bool convIsUpper, SizeModifier size)
	517	{
	518	// %S and %hS -> %s and %lS -> %ls
	519	if ( convIsUpper )
	520	{
	521	if ( size == Size_Long )
	522	return Size_Long;
	523	else
	524	return Size_Default;
	525	}
	526	else // %s or %c
	527	{
	528	if ( size == Size_Default )
	529	return Size_Long;
	530	else
	531	return size;
	532	}
	533	}
	534	};
	535
	536	const wxScopedWCharBuffer wxScanfConvertFormatW(const wchar_t *format)
	537	{
	538	return wxScanfFormatConverterWchar().Convert(format);
	539	}
	540	#endif // !__WINDOWS__
	541
	542
	543	// ----------------------------------------------------------------------------
	544	// wxFormatString
	545	// ----------------------------------------------------------------------------
	546
	547	#if !wxUSE_UNICODE_WCHAR
	548	const char* wxFormatString::InputAsChar()
	549	{
	550	if ( m_char )
	551	return m_char.data();
	552
	553	// in ANSI build, wx_str() returns char*, in UTF-8 build, this function
	554	// is only called under UTF-8 locales, so we should return UTF-8 string,
	555	// which is, again, what wx_str() returns:
	556	if ( m_str )
	557	return m_str->wx_str();
	558
	559	// ditto wxCStrData:
	560	if ( m_cstr )
	561	return m_cstr->AsInternal();
	562
	563	// the last case is that wide string was passed in: in that case, we need
	564	// to convert it:
	565	wxASSERT( m_wchar );
	566
	567	m_char = wxConvLibc.cWC2MB(m_wchar.data());
	568
	569	return m_char.data();
	570	}
	571
	572	const char* wxFormatString::AsChar()
	573	{
	574	if ( !m_convertedChar )
	575	#if !wxUSE_UNICODE // FIXME-UTF8: remove this
	576	m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar());
	577	#else
	578	m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar());
	579	#endif
	580
	581	return m_convertedChar.data();
	582	}
	583	#endif // !wxUSE_UNICODE_WCHAR
	584
	585	#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
	586	const wchar_t* wxFormatString::InputAsWChar()
	587	{
	588	if ( m_wchar )
	589	return m_wchar.data();
	590
	591	#if wxUSE_UNICODE_WCHAR
	592	if ( m_str )
	593	return m_str->wc_str();
	594	if ( m_cstr )
	595	return m_cstr->AsInternal();
	596	#else // wxUSE_UNICODE_UTF8
	597	if ( m_str )
	598	{
	599	m_wchar = m_str->wc_str();
	600	return m_wchar.data();
	601	}
	602	if ( m_cstr )
	603	{
	604	m_wchar = m_cstr->AsWCharBuf();
	605	return m_wchar.data();
	606	}
	607	#endif // wxUSE_UNICODE_WCHAR/UTF8
	608
	609	// the last case is that narrow string was passed in: in that case, we need
	610	// to convert it:
	611	wxASSERT( m_char );
	612
	613	m_wchar = wxConvLibc.cMB2WC(m_char.data());
	614
	615	return m_wchar.data();
	616	}
	617
	618	const wchar_t* wxFormatString::AsWChar()
	619	{
	620	if ( !m_convertedWChar )
	621	m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar());
	622
	623	return m_convertedWChar.data();
	624	}
	625	#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
	626
	627	wxString wxFormatString::InputAsString() const
	628	{
	629	if ( m_str )
	630	return *m_str;
	631	if ( m_cstr )
	632	return m_cstr->AsString();
	633	if ( m_wchar )
	634	return wxString(m_wchar);
	635	if ( m_char )
	636	return wxString(m_char);
	637
	638	wxFAIL_MSG( "invalid wxFormatString - not initialized?" );
	639	return wxString();
	640	}
	641
	642	// ----------------------------------------------------------------------------
	643	// wxFormatString::GetArgumentType()
	644	// ----------------------------------------------------------------------------
	645
	646	namespace
	647	{
	648
	649	template<typename CharType>
	650	wxFormatString::ArgumentType DoGetArgumentType(const CharType *format,
	651	unsigned n)
	652	{
	653	wxCHECK_MSG( format, wxFormatString::Arg_Unknown,
	654	"empty format string not allowed here" );
	655
	656	wxPrintfConvSpecParser<CharType> parser(format);
	657
	658	wxCHECK_MSG( n <= parser.nargs, wxFormatString::Arg_Unknown,
	659	"more arguments than format string specifiers?" );
	660
	661	wxCHECK_MSG( parser.pspec[n-1] != NULL, wxFormatString::Arg_Unknown,
	662	"requested argument not found - invalid format string?" );
	663
	664	switch ( parser.pspec[n-1]->m_type )
	665	{
	666	case wxPAT_CHAR:
	667	case wxPAT_WCHAR:
	668	return wxFormatString::Arg_Char;
	669
	670	case wxPAT_PCHAR:
	671	case wxPAT_PWCHAR:
	672	return wxFormatString::Arg_String;
	673
	674	case wxPAT_INT:
	675	return wxFormatString::Arg_Int;
	676	case wxPAT_LONGINT:
	677	return wxFormatString::Arg_LongInt;
	678	#ifdef wxLongLong_t
	679	case wxPAT_LONGLONGINT:
	680	return wxFormatString::Arg_LongLongInt;
	681	#endif
	682	case wxPAT_SIZET:
	683	return wxFormatString::Arg_Size_t;
	684
	685	case wxPAT_DOUBLE:
	686	return wxFormatString::Arg_Double;
	687	case wxPAT_LONGDOUBLE:
	688	return wxFormatString::Arg_LongDouble;
	689
	690	case wxPAT_POINTER:
	691	return wxFormatString::Arg_Pointer;
	692
	693	case wxPAT_NINT:
	694	return wxFormatString::Arg_IntPtr;
	695	case wxPAT_NSHORTINT:
	696	return wxFormatString::Arg_ShortIntPtr;
	697	case wxPAT_NLONGINT:
	698	return wxFormatString::Arg_LongIntPtr;
	699
	700	case wxPAT_STAR:
	701	// "*" requires argument of type int
	702	return wxFormatString::Arg_Int;
	703
	704	case wxPAT_INVALID:
	705	// (handled after the switch statement)
	706	break;
	707	}
	708
	709	// silence warning
	710	wxFAIL_MSG( "unexpected argument type" );
	711	return wxFormatString::Arg_Unknown;
	712	}
	713
	714	} // anonymous namespace
	715
	716	wxFormatString::ArgumentType wxFormatString::GetArgumentType(unsigned n) const
	717	{
	718	if ( m_char )
	719	return DoGetArgumentType(m_char.data(), n);
	720	else if ( m_wchar )
	721	return DoGetArgumentType(m_wchar.data(), n);
	722	else if ( m_str )
	723	return DoGetArgumentType(m_str->wx_str(), n);
	724	else if ( m_cstr )
	725	return DoGetArgumentType(m_cstr->AsInternal(), n);
	726
	727	wxFAIL_MSG( "unreachable code" );
	728	return Arg_Unknown;
	729	}