[wxWidgets.git] / src / common / strvararg.cpp

///////////////////////////////////////////////////////////////////////////////
// Name:        src/common/strvararg.cpp
// Purpose:     macros for implementing type-safe vararg passing of strings
// Author:      Vaclav Slavik
// Created:     2007-02-19
// RCS-ID:      $Id$
// Copyright:   (c) 2007 REA Elektronik GmbH
// Licence:     wxWindows licence
///////////////////////////////////////////////////////////////////////////////

// ============================================================================
// declarations
// ============================================================================

// ----------------------------------------------------------------------------
// headers
// ----------------------------------------------------------------------------

// for compilers that support precompilation, includes "wx.h".
#include "wx/wxprec.h"

#ifdef __BORLANDC__
    #pragma hdrstop
#endif

#include "wx/strvararg.h"
#include "wx/string.h"
#include "wx/crt.h"
#include "wx/private/wxprintf.h"

// ============================================================================
// implementation
// ============================================================================

// ----------------------------------------------------------------------------
// wxArgNormalizer<>
// ----------------------------------------------------------------------------

const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const
{
    return m_value.wx_str();
}

const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
{
    return m_value.AsInternal();
}

#if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(
                            const wxString& s,
                            const wxFormatString *fmt, unsigned index)
    : wxArgNormalizerWithBuffer<wchar_t>(s.wc_str(), fmt, index)
{
}

wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(
                            const wxCStrData& s,
                            const wxFormatString *fmt, unsigned index)
    : wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf(), fmt, index)
{
}
#endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY

// ----------------------------------------------------------------------------
// wxArgNormalizedString
// ----------------------------------------------------------------------------

wxString wxArgNormalizedString::GetString() const
{
    if ( !IsValid() )
        return wxEmptyString;

#if wxUSE_UTF8_LOCALE_ONLY
    return wxString(reinterpret_cast<const char*>(m_ptr));
#else
    #if wxUSE_UNICODE_UTF8
        if ( wxLocaleIsUtf8 )
            return wxString(reinterpret_cast<const char*>(m_ptr));
        else
    #endif
        return wxString(reinterpret_cast<const wxChar*>(m_ptr));
#endif // !wxUSE_UTF8_LOCALE_ONLY
}

wxArgNormalizedString::operator wxString() const
{
    return GetString();
}

// ----------------------------------------------------------------------------
// wxFormatConverter: class doing the "%s" and "%c" normalization
// ----------------------------------------------------------------------------

/*
   There are four problems with wxPrintf() etc. format strings:

   1) The printf vararg macros convert all forms of strings into
      wxStringCharType* representation. This may make the format string
      incorrect: for example, if %ls was used together with a wchar_t*
      variadic argument, this would no longer work, because the templates
      would change wchar_t* argument to wxStringCharType* and %ls would now
      be incorrect in e.g. UTF-8 build. We need make sure only one specifier
      form is used.

   2) To complicate matters further, the meaning of %s and %c is different
      under Windows and on Unix. The Windows/MS convention is as follows:

       In ANSI mode:

       format specifier         results in
       -----------------------------------
       %s, %hs, %hS             char*
       %ls, %S, %lS             wchar_t*

       In Unicode mode:

       format specifier         results in
       -----------------------------------
       %hs, %S, %hS             char*
       %s, %ls, %lS             wchar_t*

       (While on POSIX systems we have %C identical to %lc and %c always means
       char (in any mode) while %lc always means wchar_t.)

      In other words, we should _only_ use %s on Windows and %ls on Unix for
      wxUSE_UNICODE_WCHAR build.

   3) To make things even worse, we need two forms in UTF-8 build: one for
      passing strings to ANSI functions under UTF-8 locales (this one should
      use %s) and one for widechar functions used under non-UTF-8 locales
      (this one should use %ls).

   And, of course, the same should be done for %c as well.


   wxScanf() family of functions is simpler, because we don't normalize their
   variadic arguments and we only have to handle 2) above and only for widechar
   versions.
*/

template<typename T>
class wxFormatConverterBase
{
public:
    typedef T CharType;

    wxFormatConverterBase()
    {
        m_fmtOrig = NULL;
        m_fmtLast = NULL;
        m_nCopied = 0;
    }

    wxCharTypeBuffer<CharType> Convert(const CharType *format)
    {
        // this is reset to NULL if we modify the format string
        m_fmtOrig = format;

        while ( *format )
        {
            if ( CopyFmtChar(*format++) == _T('%') )
            {
                // skip any flags
                while ( IsFlagChar(*format) )
                    CopyFmtChar(*format++);

                // and possible width
                if ( *format == _T('*') )
                    CopyFmtChar(*format++);
                else
                    SkipDigits(&format);

                // precision?
                if ( *format == _T('.') )
                {
                    CopyFmtChar(*format++);
                    if ( *format == _T('*') )
                        CopyFmtChar(*format++);
                    else
                        SkipDigits(&format);
                }

                // next we can have a size modifier
                SizeModifier size;

                switch ( *format )
                {
                    case 'h':
                        size = Size_Short;
                        format++;
                        break;

                    case 'l':
                        // "ll" has a different meaning!
                        if ( format[1] != 'l' )
                        {
                            size = Size_Long;
                            format++;
                            break;
                        }
                        //else: fall through

                    default:
                        size = Size_Default;
                }

                CharType outConv = *format;
                SizeModifier outSize = size;

                // and finally we should have the type
                switch ( *format )
                {
                    case _T('S'):
                    case _T('s'):
                        // all strings were converted into the same form by
                        // wxArgNormalizer<T>, this form depends on the context
                        // in which the value is used (scanf/printf/wprintf):
                        HandleString(*format, size, outConv, outSize);
                        break;

                    case _T('C'):
                    case _T('c'):
                        HandleChar(*format, size, outConv, outSize);
                        break;

                    default:
                        // nothing special to do
                        break;
                }

                if ( outConv == *format && outSize == size ) // no change
                {
                    if ( size != Size_Default )
                        CopyFmtChar(*(format - 1));
                    CopyFmtChar(*format);
                }
                else // something changed
                {
                    switch ( outSize )
                    {
                        case Size_Long:
                            InsertFmtChar(_T('l'));
                            break;

                        case Size_Short:
                            InsertFmtChar(_T('h'));
                            break;

                        case Size_Default:
                            // nothing to do
                            break;
                    }
                    InsertFmtChar(outConv);
                }

                format++;
            }
        }

        // notice that we only translated the string if m_fmtOrig == NULL (as
        // set by CopyAllBefore()), otherwise we should simply use the original
        // format
        if ( m_fmtOrig )
        {
            return wxCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig);
        }
        else
        {
            // NULL-terminate converted format string:
            *m_fmtLast = 0;
            return m_fmt;
        }
    }

    virtual ~wxFormatConverterBase() {}

protected:
    enum SizeModifier
    {
        Size_Default,
        Size_Short,
        Size_Long
    };

    // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
    // respectively), 'size' is the preceding size modifier; the new values of
    // conversion and size specifiers must be written to outConv and outSize
    virtual void HandleString(CharType conv, SizeModifier size,
                              CharType& outConv, SizeModifier& outSize) = 0;

    // ditto for %C or %c
    virtual void HandleChar(CharType conv, SizeModifier size,
                            CharType& outConv, SizeModifier& outSize) = 0;

private:
    // copy another character to the translated format: this function does the
    // copy if we are translating but doesn't do anything at all if we don't,
    // so we don't create the translated format string at all unless we really
    // need to (i.e. InsertFmtChar() is called)
    CharType CopyFmtChar(CharType ch)
    {
        if ( !m_fmtOrig )
        {
            // we're translating, do copy
            *(m_fmtLast++) = ch;
        }
        else
        {
            // simply increase the count which should be copied by
            // CopyAllBefore() later if needed
            m_nCopied++;
        }

        return ch;
    }

    // insert an extra character
    void InsertFmtChar(CharType ch)
    {
        if ( m_fmtOrig )
        {
            // so far we haven't translated anything yet
            CopyAllBefore();
        }

        *(m_fmtLast++) = ch;
    }

    void CopyAllBefore()
    {
        wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" );

        // the modified format string is guaranteed to be no longer than
        // 3/2 of the original (worst case: the entire format string consists
        // of "%s" repeated and is expanded to "%ls" on Unix), so we can
        // allocate the buffer now and not worry about running out of space if
        // we over-allocate a bit:
        size_t fmtLen = wxStrlen(m_fmtOrig);
        // worst case is of even length, so there's no rounding error in *3/2:
        m_fmt.extend(fmtLen * 3 / 2);

        if ( m_nCopied > 0 )
            wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied);
        m_fmtLast = m_fmt.data() + m_nCopied;

        // we won't need it any longer and resetting it also indicates that we
        // modified the format
        m_fmtOrig = NULL;
    }

    static bool IsFlagChar(CharType ch)
    {
        return ch == _T('-') || ch == _T('+') ||
               ch == _T('0') || ch == _T(' ') || ch == _T('#');
    }

    void SkipDigits(const CharType **ptpc)
    {
        while ( **ptpc >= _T('0') && **ptpc <= _T('9') )
            CopyFmtChar(*(*ptpc)++);
    }

    // the translated format
    wxCharTypeBuffer<CharType> m_fmt;
    CharType *m_fmtLast;

    // the original format
    const CharType *m_fmtOrig;

    // the number of characters already copied (i.e. already parsed, but left
    // unmodified)
    size_t m_nCopied;
};


#ifdef __WINDOWS

// on Windows, we should use %s and %c regardless of the build:
class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
{
    virtual void HandleString(CharType WXUNUSED(conv),
                              SizeModifier WXUNUSED(size),
                              CharType& outConv, SizeModifier& outSize)
    {
        outConv = 's';
        outSize = Size_Default;
    }

    virtual void HandleChar(CharType WXUNUSED(conv),
                            SizeModifier WXUNUSED(size),
                            CharType& outConv, SizeModifier& outSize)
    {
        outConv = 'c';
        outSize = Size_Default;
    }
};

#else // !__WINDOWS__

// on Unix, it's %s for ANSI functions and %ls for widechar:

#if !wxUSE_UTF8_LOCALE_ONLY
class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
{
    virtual void HandleString(CharType WXUNUSED(conv),
                              SizeModifier WXUNUSED(size),
                              CharType& outConv, SizeModifier& outSize)
    {
        outConv = 's';
        outSize = Size_Long;
    }

    virtual void HandleChar(CharType WXUNUSED(conv),
                            SizeModifier WXUNUSED(size),
                            CharType& outConv, SizeModifier& outSize)
    {
        outConv = 'c';
        outSize = Size_Long;
    }
};
#endif // !wxUSE_UTF8_LOCALE_ONLY

#if wxUSE_UNICODE_UTF8
class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char>
{
    virtual void HandleString(CharType WXUNUSED(conv),
                              SizeModifier WXUNUSED(size),
                              CharType& outConv, SizeModifier& outSize)
    {
        outConv = 's';
        outSize = Size_Default;
    }

    virtual void HandleChar(CharType WXUNUSED(conv),
                            SizeModifier WXUNUSED(size),
                            CharType& outConv, SizeModifier& outSize)
    {
        // chars are represented using wchar_t in both builds, so this is
        // the same as above
        outConv = 'c';
        outSize = Size_Long;
    }
};
#endif // wxUSE_UNICODE_UTF8

#endif // __WINDOWS__/!__WINDOWS__

#if !wxUSE_UNICODE // FIXME-UTF8: remove
class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char>
{
    virtual void HandleString(CharType WXUNUSED(conv),
                              SizeModifier WXUNUSED(size),
                              CharType& outConv, SizeModifier& outSize)
    {
        outConv = 's';
        outSize = Size_Default;
    }

    virtual void HandleChar(CharType WXUNUSED(conv),
                            SizeModifier WXUNUSED(size),
                            CharType& outConv, SizeModifier& outSize)
    {
        outConv = 'c';
        outSize = Size_Default;
    }
};
#endif // ANSI

#ifndef __WINDOWS__
/*

   wxScanf() format translation is different, we need to translate %s to %ls
   and %c to %lc on Unix (but not Windows and for widechar functions only!).

   So to use native functions in order to get our semantics we must do the
   following translations in Unicode mode:

   wxWidgets specifier      POSIX specifier
   ----------------------------------------

   %hc, %C, %hC             %c
   %c                       %lc

 */
class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
{
    virtual void HandleString(CharType conv, SizeModifier size,
                              CharType& outConv, SizeModifier& outSize)
    {
        outConv = 's';
        outSize = GetOutSize(conv == 'S', size);
    }

    virtual void HandleChar(CharType conv, SizeModifier size,
                            CharType& outConv, SizeModifier& outSize)
    {
        outConv = 'c';
        outSize = GetOutSize(conv == 'C', size);
    }

    SizeModifier GetOutSize(bool convIsUpper, SizeModifier size)
    {
        // %S and %hS -> %s and %lS -> %ls
        if ( convIsUpper )
        {
            if ( size == Size_Long )
                return Size_Long;
            else
                return Size_Default;
        }
        else // %s or %c
        {
            if ( size == Size_Default )
                return Size_Long;
            else
                return size;
        }
    }
};

const wxWCharBuffer wxScanfConvertFormatW(const wchar_t *format)
{
    return wxScanfFormatConverterWchar().Convert(format);
}
#endif // !__WINDOWS__


// ----------------------------------------------------------------------------
// wxFormatString
// ----------------------------------------------------------------------------

#if !wxUSE_UNICODE_WCHAR
const char* wxFormatString::InputAsChar()
{
    if ( m_char )
        return m_char.data();

    // in ANSI build, wx_str() returns char*, in UTF-8 build, this function
    // is only called under UTF-8 locales, so we should return UTF-8 string,
    // which is, again, what wx_str() returns:
    if ( m_str )
        return m_str->wx_str();

    // ditto wxCStrData:
    if ( m_cstr )
        return m_cstr->AsInternal();

    // the last case is that wide string was passed in: in that case, we need
    // to convert it:
    wxASSERT( m_wchar );

    m_char = wxConvLibc.cWC2MB(m_wchar.data());

    return m_char.data();
}

const char* wxFormatString::AsChar()
{
    if ( !m_convertedChar )
#if !wxUSE_UNICODE // FIXME-UTF8: remove this
        m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar());
#else
        m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar());
#endif

    return m_convertedChar.data();
}
#endif // !wxUSE_UNICODE_WCHAR

#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
const wchar_t* wxFormatString::InputAsWChar()
{
    if ( m_wchar )
        return m_wchar.data();

#if wxUSE_UNICODE_WCHAR
    if ( m_str )
        return m_str->wc_str();
    if ( m_cstr )
        return m_cstr->AsInternal();
#else // wxUSE_UNICODE_UTF8
    if ( m_str )
    {
        m_wchar = m_str->wc_str();
        return m_wchar.data();
    }
    if ( m_cstr )
    {
        m_wchar = m_cstr->AsWCharBuf();
        return m_wchar.data();
    }
#endif // wxUSE_UNICODE_WCHAR/UTF8

    // the last case is that narrow string was passed in: in that case, we need
    // to convert it:
    wxASSERT( m_char );

    m_wchar = wxConvLibc.cMB2WC(m_char.data());

    return m_wchar.data();
}

const wchar_t* wxFormatString::AsWChar()
{
    if ( !m_convertedWChar )
        m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar());

    return m_convertedWChar.data();
}
#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY

wxString wxFormatString::InputAsString() const
{
    if ( m_str )
        return *m_str;
    if ( m_cstr )
        return m_cstr->AsString();
    if ( m_wchar )
        return wxString(m_wchar);
    if ( m_char )
        return wxString(m_char);

    wxFAIL_MSG( "invalid wxFormatString - not initialized?" );
    return wxString();
}

// ----------------------------------------------------------------------------
// wxFormatString::GetArgumentType()
// ----------------------------------------------------------------------------

namespace
{

template<typename CharType>
wxFormatString::ArgumentType DoGetArgumentType(const CharType *format,
                                               unsigned n)
{
    wxCHECK_MSG( format, wxFormatString::Arg_Other,
                 "empty format string not allowed here" );

    wxPrintfConvSpecParser<CharType> parser(format);

    wxCHECK_MSG( parser.pspec[n-1] != NULL, wxFormatString::Arg_Other,
                 "requested argument not found - invalid format string?" );

    switch ( parser.pspec[n-1]->m_type )
    {
        case wxPAT_CHAR:
        case wxPAT_WCHAR:
            return wxFormatString::Arg_Char;

        default:
            return wxFormatString::Arg_Other;
    }
}

} // anonymous namespace

wxFormatString::ArgumentType wxFormatString::GetArgumentType(unsigned n) const
{
    if ( m_char )
        return DoGetArgumentType(m_char.data(), n);
    else if ( m_wchar )
        return DoGetArgumentType(m_wchar.data(), n);
    else if ( m_str )
        return DoGetArgumentType(m_str->wx_str(), n);
    else if ( m_cstr )
        return DoGetArgumentType(m_cstr->AsInternal(), n);

    wxFAIL_MSG( "unreachable code" );
    return Arg_Other;
}
Commit	Line	Data
c9f78968 VS	1	///////////////////////////////////////////////////////////////////////////////
	2	// Name: src/common/strvararg.cpp
	3	// Purpose: macros for implementing type-safe vararg passing of strings
	4	// Author: Vaclav Slavik
	5	// Created: 2007-02-19
	6	// RCS-ID: $Id$
	7	// Copyright: (c) 2007 REA Elektronik GmbH
	8	// Licence: wxWindows licence
	9	///////////////////////////////////////////////////////////////////////////////
	10
	11	// ============================================================================
	12	// declarations
	13	// ============================================================================
	14
	15	// ----------------------------------------------------------------------------
	16	// headers
	17	// ----------------------------------------------------------------------------
	18
	19	// for compilers that support precompilation, includes "wx.h".
	20	#include "wx/wxprec.h"
	21
	22	#ifdef __BORLANDC__
	23	#pragma hdrstop
	24	#endif
	25
	26	#include "wx/strvararg.h"
c9f78968	27	#include "wx/string.h"
f6e38901	28	#include "wx/crt.h"
47346406	29	#include "wx/private/wxprintf.h"
c9f78968 VS	30
	31	// ============================================================================
	32	// implementation
	33	// ============================================================================
	34
1528e0b8 VS	35	// ----------------------------------------------------------------------------
	36	// wxArgNormalizer<>
	37	// ----------------------------------------------------------------------------
c9f78968	38
2523e9b7	39	const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const
c9f78968	40	{
8f93a29f	41	return m_value.wx_str();
c9f78968	42	}
c9f78968	43
2523e9b7	44	const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
c9f78968	45	{
2523e9b7	46	return m_value.AsInternal();
c9f78968 VS	47	}
c9f78968 VS	48
111d9948	49	#if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
47346406 VS	50	wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(
	51	const wxString& s,
	52	const wxFormatString *fmt, unsigned index)
	53	: wxArgNormalizerWithBuffer<wchar_t>(s.wc_str(), fmt, index)
c9f78968	54	{
c9f78968 VS	55	}
c9f78968 VS	56
47346406 VS	57	wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(
	58	const wxCStrData& s,
	59	const wxFormatString *fmt, unsigned index)
	60	: wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf(), fmt, index)
c9f78968	61	{
81727065	62	}
111d9948	63	#endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
81727065	64
1528e0b8 VS	65	// ----------------------------------------------------------------------------
	66	// wxArgNormalizedString
	67	// ----------------------------------------------------------------------------
	68
2523e9b7	69	wxString wxArgNormalizedString::GetString() const
359bd4d1	70	{
2523e9b7 VS	71	if ( !IsValid() )
	72	return wxEmptyString;
	73
	74	#if wxUSE_UTF8_LOCALE_ONLY
5c33522f	75	return wxString(reinterpret_cast<const char*>(m_ptr));
2523e9b7 VS	76	#else
	77	#if wxUSE_UNICODE_UTF8
	78	if ( wxLocaleIsUtf8 )
5c33522f	79	return wxString(reinterpret_cast<const char*>(m_ptr));
2523e9b7 VS	80	else
2523e9b7 VS	81	#endif
5c33522f	82	return wxString(reinterpret_cast<const wxChar*>(m_ptr));
2523e9b7	83	#endif // !wxUSE_UTF8_LOCALE_ONLY
359bd4d1 VS	84	}
359bd4d1 VS	85
2523e9b7	86	wxArgNormalizedString::operator wxString() const
359bd4d1	87	{
2523e9b7	88	return GetString();
359bd4d1	89	}
1528e0b8	90
50e27899 VS	91	// ----------------------------------------------------------------------------
	92	// wxFormatConverter: class doing the "%s" and "%c" normalization
	93	// ----------------------------------------------------------------------------
	94
	95	/*
	96	There are four problems with wxPrintf() etc. format strings:
	97
	98	1) The printf vararg macros convert all forms of strings into
	99	wxStringCharType* representation. This may make the format string
	100	incorrect: for example, if %ls was used together with a wchar_t*
	101	variadic argument, this would no longer work, because the templates
	102	would change wchar_t* argument to wxStringCharType* and %ls would now
	103	be incorrect in e.g. UTF-8 build. We need make sure only one specifier
	104	form is used.
	105
	106	2) To complicate matters further, the meaning of %s and %c is different
	107	under Windows and on Unix. The Windows/MS convention is as follows:
	108
	109	In ANSI mode:
	110
	111	format specifier results in
	112	-----------------------------------
	113	%s, %hs, %hS char*
	114	%ls, %S, %lS wchar_t*
	115
	116	In Unicode mode:
	117
	118	format specifier results in
	119	-----------------------------------
	120	%hs, %S, %hS char*
	121	%s, %ls, %lS wchar_t*
	122
	123	(While on POSIX systems we have %C identical to %lc and %c always means
	124	char (in any mode) while %lc always means wchar_t.)
	125
	126	In other words, we should _only_ use %s on Windows and %ls on Unix for
	127	wxUSE_UNICODE_WCHAR build.
	128
	129	3) To make things even worse, we need two forms in UTF-8 build: one for
	130	passing strings to ANSI functions under UTF-8 locales (this one should
	131	use %s) and one for widechar functions used under non-UTF-8 locales
	132	(this one should use %ls).
	133
	134	And, of course, the same should be done for %c as well.
	135
50e27899 VS	136
	137	wxScanf() family of functions is simpler, because we don't normalize their
	138	variadic arguments and we only have to handle 2) above and only for widechar
	139	versions.
	140	*/
	141
	142	template<typename T>
	143	class wxFormatConverterBase
	144	{
	145	public:
	146	typedef T CharType;
	147
	148	wxFormatConverterBase()
	149	{
	150	m_fmtOrig = NULL;
	151	m_fmtLast = NULL;
	152	m_nCopied = 0;
	153	}
	154
	155	wxCharTypeBuffer<CharType> Convert(const CharType *format)
	156	{
	157	// this is reset to NULL if we modify the format string
	158	m_fmtOrig = format;
	159
	160	while ( *format )
	161	{
	162	if ( CopyFmtChar(*format++) == _T('%') )
	163	{
	164	// skip any flags
	165	while ( IsFlagChar(*format) )
	166	CopyFmtChar(*format++);
	167
	168	// and possible width
	169	if ( format == _T('') )
	170	CopyFmtChar(*format++);
	171	else
	172	SkipDigits(&format);
	173
	174	// precision?
	175	if ( *format == _T('.') )
	176	{
	177	CopyFmtChar(*format++);
	178	if ( format == _T('') )
	179	CopyFmtChar(*format++);
	180	else
	181	SkipDigits(&format);
	182	}
	183
	184	// next we can have a size modifier
	185	SizeModifier size;
	186
	187	switch ( *format )
	188	{
	189	case 'h':
	190	size = Size_Short;
	191	format++;
	192	break;
	193
	194	case 'l':
	195	// "ll" has a different meaning!
	196	if ( format[1] != 'l' )
	197	{
	198	size = Size_Long;
	199	format++;
200	break;
201	}
202	//else: fall through
203
204	default:
205	size = Size_Default;
206	}
207
208	CharType outConv = *format;
209	SizeModifier outSize = size;
210
211	// and finally we should have the type
212	switch ( *format )
213	{
214	case _T('S'):
215	case _T('s'):
216	// all strings were converted into the same form by
217	// wxArgNormalizer<T>, this form depends on the context
218	// in which the value is used (scanf/printf/wprintf):
219	HandleString(*format, size, outConv, outSize);
220	break;
221
222	case _T('C'):
223	case _T('c'):
224	HandleChar(*format, size, outConv, outSize);
225	break;
226
227	default:
228	// nothing special to do
229	break;
230	}
231
232	if ( outConv == *format && outSize == size ) // no change
233	{
234	if ( size != Size_Default )
235	CopyFmtChar(*(format - 1));
236	CopyFmtChar(*format);
237	}
238	else // something changed
239	{
240	switch ( outSize )
241	{
242	case Size_Long:
243	InsertFmtChar(_T('l'));
244	break;
245
246	case Size_Short:
247	InsertFmtChar(_T('h'));
248	break;
249
250	case Size_Default:
251	// nothing to do
252	break;
253	}
254	InsertFmtChar(outConv);
255	}
256
257	format++;
258	}
259	}
260
261	// notice that we only translated the string if m_fmtOrig == NULL (as
262	// set by CopyAllBefore()), otherwise we should simply use the original
263	// format
264	if ( m_fmtOrig )
265	{
266	return wxCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig);
267	}
268	else
269	{
270	// NULL-terminate converted format string:
271	*m_fmtLast = 0;
272	return m_fmt;
273	}
274	}
275
276	virtual ~wxFormatConverterBase() {}
277
278	protected:
279	enum SizeModifier
280	{
281	Size_Default,
282	Size_Short,
283	Size_Long
284	};
285
286	// called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
287	// respectively), 'size' is the preceding size modifier; the new values of
288	// conversion and size specifiers must be written to outConv and outSize
289	virtual void HandleString(CharType conv, SizeModifier size,
290	CharType& outConv, SizeModifier& outSize) = 0;
291
292	// ditto for %C or %c
293	virtual void HandleChar(CharType conv, SizeModifier size,
294	CharType& outConv, SizeModifier& outSize) = 0;
295
296	private:
297	// copy another character to the translated format: this function does the
298	// copy if we are translating but doesn't do anything at all if we don't,
299	// so we don't create the translated format string at all unless we really
300	// need to (i.e. InsertFmtChar() is called)
301	CharType CopyFmtChar(CharType ch)
302	{
303	if ( !m_fmtOrig )
304	{
305	// we're translating, do copy
306	*(m_fmtLast++) = ch;
307	}
308	else
309	{
310	// simply increase the count which should be copied by
311	// CopyAllBefore() later if needed
312	m_nCopied++;
313	}
314
315	return ch;
316	}
317
318	// insert an extra character
319	void InsertFmtChar(CharType ch)
320	{
321	if ( m_fmtOrig )
322	{
323	// so far we haven't translated anything yet
324	CopyAllBefore();
325	}
326
327	*(m_fmtLast++) = ch;
328	}
329
330	void CopyAllBefore()
331	{
332	wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" );
333
334	// the modified format string is guaranteed to be no longer than
335	// 3/2 of the original (worst case: the entire format string consists
336	// of "%s" repeated and is expanded to "%ls" on Unix), so we can
337	// allocate the buffer now and not worry about running out of space if
338	// we over-allocate a bit:
339	size_t fmtLen = wxStrlen(m_fmtOrig);
340	// worst case is of even length, so there's no rounding error in *3/2:
341	m_fmt.extend(fmtLen * 3 / 2);
342
343	if ( m_nCopied > 0 )
344	wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied);
345	m_fmtLast = m_fmt.data() + m_nCopied;
346
347	// we won't need it any longer and resetting it also indicates that we
348	// modified the format
349	m_fmtOrig = NULL;
350	}
351
352	static bool IsFlagChar(CharType ch)
353	{
354	return ch == _T('-') \|\| ch == _T('+') \|\|
355	ch == _T('0') \|\| ch == _T(' ') \|\| ch == _T('#');
356	}
357
358	void SkipDigits(const CharType **ptpc)
359	{
360	while ( ptpc >= _T('0') && ptpc <= _T('9') )
361	CopyFmtChar((ptpc)++);
362	}
363
364	// the translated format
365	wxCharTypeBuffer<CharType> m_fmt;
366	CharType *m_fmtLast;
367
368	// the original format
369	const CharType *m_fmtOrig;
370
371	// the number of characters already copied (i.e. already parsed, but left
372	// unmodified)
373	size_t m_nCopied;
374	};
375
376
377
378	#ifdef __WINDOWS
379
380	// on Windows, we should use %s and %c regardless of the build:
381	class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
382	{
383	virtual void HandleString(CharType WXUNUSED(conv),
384	SizeModifier WXUNUSED(size),
385	CharType& outConv, SizeModifier& outSize)
386	{
387	outConv = 's';
388	outSize = Size_Default;
389	}
390
391	virtual void HandleChar(CharType WXUNUSED(conv),
392	SizeModifier WXUNUSED(size),
393	CharType& outConv, SizeModifier& outSize)
394	{
395	outConv = 'c';
396	outSize = Size_Default;
397	}
398	};
399
400	#else // !__WINDOWS__
401
402	// on Unix, it's %s for ANSI functions and %ls for widechar:
403
404	#if !wxUSE_UTF8_LOCALE_ONLY
405	class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
406	{
407	virtual void HandleString(CharType WXUNUSED(conv),
408	SizeModifier WXUNUSED(size),
409	CharType& outConv, SizeModifier& outSize)
410	{
411	outConv = 's';
412	outSize = Size_Long;
413	}
414
415	virtual void HandleChar(CharType WXUNUSED(conv),
416	SizeModifier WXUNUSED(size),
417	CharType& outConv, SizeModifier& outSize)
418	{
419	outConv = 'c';
420	outSize = Size_Long;
421	}
422	};
423	#endif // !wxUSE_UTF8_LOCALE_ONLY
424
425	#if wxUSE_UNICODE_UTF8
426	class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char>
427	{
428	virtual void HandleString(CharType WXUNUSED(conv),
429	SizeModifier WXUNUSED(size),
430	CharType& outConv, SizeModifier& outSize)
431	{
432	outConv = 's';
433	outSize = Size_Default;
434	}
435
436	virtual void HandleChar(CharType WXUNUSED(conv),
437	SizeModifier WXUNUSED(size),
438	CharType& outConv, SizeModifier& outSize)
439	{
47346406 VS	440	// chars are represented using wchar_t in both builds, so this is
	441	// the same as above
	442	outConv = 'c';
	443	outSize = Size_Long;
50e27899 VS	444	}
	445	};
	446	#endif // wxUSE_UNICODE_UTF8
	447
	448	#endif // __WINDOWS__/!__WINDOWS__
	449
	450	#if !wxUSE_UNICODE // FIXME-UTF8: remove
	451	class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char>
	452	{
	453	virtual void HandleString(CharType WXUNUSED(conv),
	454	SizeModifier WXUNUSED(size),
	455	CharType& outConv, SizeModifier& outSize)
	456	{
	457	outConv = 's';
	458	outSize = Size_Default;
	459	}
	460
	461	virtual void HandleChar(CharType WXUNUSED(conv),
	462	SizeModifier WXUNUSED(size),
	463	CharType& outConv, SizeModifier& outSize)
	464	{
	465	outConv = 'c';
	466	outSize = Size_Default;
	467	}
	468	};
	469	#endif // ANSI
	470
	471	#ifndef __WINDOWS__
	472	/*
	473
	474	wxScanf() format translation is different, we need to translate %s to %ls
	475	and %c to %lc on Unix (but not Windows and for widechar functions only!).
	476
	477	So to use native functions in order to get our semantics we must do the
	478	following translations in Unicode mode:
	479
	480	wxWidgets specifier POSIX specifier
	481	----------------------------------------
	482
	483	%hc, %C, %hC %c
	484	%c %lc
	485
	486	*/
	487	class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
	488	{
	489	virtual void HandleString(CharType conv, SizeModifier size,
	490	CharType& outConv, SizeModifier& outSize)
	491	{
	492	outConv = 's';
	493	outSize = GetOutSize(conv == 'S', size);
	494	}
	495
	496	virtual void HandleChar(CharType conv, SizeModifier size,
	497	CharType& outConv, SizeModifier& outSize)
	498	{
	499	outConv = 'c';
	500	outSize = GetOutSize(conv == 'C', size);
	501	}
	502
	503	SizeModifier GetOutSize(bool convIsUpper, SizeModifier size)
	504	{
	505	// %S and %hS -> %s and %lS -> %ls
	506	if ( convIsUpper )
	507	{
508	if ( size == Size_Long )
509	return Size_Long;
510	else
511	return Size_Default;
512	}
513	else // %s or %c
514	{
515	if ( size == Size_Default )
516	return Size_Long;
517	else
518	return size;
519	}
520	}
521	};
522
523	const wxWCharBuffer wxScanfConvertFormatW(const wchar_t *format)
524	{
525	return wxScanfFormatConverterWchar().Convert(format);
526	}
527	#endif // !__WINDOWS__
528
529
1528e0b8 VS	530	// ----------------------------------------------------------------------------
	531	// wxFormatString
	532	// ----------------------------------------------------------------------------
	533
	534	#if !wxUSE_UNICODE_WCHAR
50e27899	535	const char* wxFormatString::InputAsChar()
1528e0b8 VS	536	{
	537	if ( m_char )
	538	return m_char.data();
	539
	540	// in ANSI build, wx_str() returns char*, in UTF-8 build, this function
	541	// is only called under UTF-8 locales, so we should return UTF-8 string,
	542	// which is, again, what wx_str() returns:
	543	if ( m_str )
	544	return m_str->wx_str();
	545
	546	// ditto wxCStrData:
	547	if ( m_cstr )
	548	return m_cstr->AsInternal();
	549
	550	// the last case is that wide string was passed in: in that case, we need
	551	// to convert it:
	552	wxASSERT( m_wchar );
	553
	554	m_char = wxConvLibc.cWC2MB(m_wchar.data());
	555
	556	return m_char.data();
	557	}
50e27899 VS	558
	559	const char* wxFormatString::AsChar()
	560	{
	561	if ( !m_convertedChar )
	562	#if !wxUSE_UNICODE // FIXME-UTF8: remove this
	563	m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar());
	564	#else
	565	m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar());
	566	#endif
	567
	568	return m_convertedChar.data();
	569	}
1528e0b8 VS	570	#endif // !wxUSE_UNICODE_WCHAR
	571
	572	#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
50e27899	573	const wchar_t* wxFormatString::InputAsWChar()
1528e0b8 VS	574	{
	575	if ( m_wchar )
	576	return m_wchar.data();
	577
	578	#if wxUSE_UNICODE_WCHAR
	579	if ( m_str )
	580	return m_str->wc_str();
	581	if ( m_cstr )
	582	return m_cstr->AsInternal();
	583	#else // wxUSE_UNICODE_UTF8
	584	if ( m_str )
	585	{
	586	m_wchar = m_str->wc_str();
	587	return m_wchar.data();
	588	}
	589	if ( m_cstr )
	590	{
	591	m_wchar = m_cstr->AsWCharBuf();
	592	return m_wchar.data();
	593	}
	594	#endif // wxUSE_UNICODE_WCHAR/UTF8
	595
	596	// the last case is that narrow string was passed in: in that case, we need
	597	// to convert it:
	598	wxASSERT( m_char );
	599
	600	m_wchar = wxConvLibc.cMB2WC(m_char.data());
	601
	602	return m_wchar.data();
	603	}
50e27899 VS	604
	605	const wchar_t* wxFormatString::AsWChar()
	606	{
	607	if ( !m_convertedWChar )
	608	m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar());
	609
	610	return m_convertedWChar.data();
	611	}
1528e0b8	612	#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
47346406	613
e68a8744 VS	614	wxString wxFormatString::InputAsString() const
	615	{
	616	if ( m_str )
	617	return *m_str;
	618	if ( m_cstr )
	619	return m_cstr->AsString();
	620	if ( m_wchar )
	621	return wxString(m_wchar);
	622	if ( m_char )
	623	return wxString(m_char);
	624
	625	wxFAIL_MSG( "invalid wxFormatString - not initialized?" );
	626	return wxString();
	627	}
	628
47346406 VS	629	// ----------------------------------------------------------------------------
	630	// wxFormatString::GetArgumentType()
	631	// ----------------------------------------------------------------------------
	632
	633	namespace
	634	{
	635
	636	template<typename CharType>
	637	wxFormatString::ArgumentType DoGetArgumentType(const CharType *format,
	638	unsigned n)
	639	{
	640	wxCHECK_MSG( format, wxFormatString::Arg_Other,
	641	"empty format string not allowed here" );
	642
	643	wxPrintfConvSpecParser<CharType> parser(format);
	644
	645	wxCHECK_MSG( parser.pspec[n-1] != NULL, wxFormatString::Arg_Other,
	646	"requested argument not found - invalid format string?" );
	647
	648	switch ( parser.pspec[n-1]->m_type )
	649	{
	650	case wxPAT_CHAR:
	651	case wxPAT_WCHAR:
	652	return wxFormatString::Arg_Char;
	653
	654	default:
	655	return wxFormatString::Arg_Other;
	656	}
	657	}
	658
	659	} // anonymous namespace
	660
	661	wxFormatString::ArgumentType wxFormatString::GetArgumentType(unsigned n) const
	662	{
	663	if ( m_char )
	664	return DoGetArgumentType(m_char.data(), n);
	665	else if ( m_wchar )
	666	return DoGetArgumentType(m_wchar.data(), n);
	667	else if ( m_str )
	668	return DoGetArgumentType(m_str->wx_str(), n);
	669	else if ( m_cstr )
	670	return DoGetArgumentType(m_cstr->AsInternal(), n);
	671
	672	wxFAIL_MSG( "unreachable code" );
	673	return Arg_Other;
	674	}