1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strvararg.cpp
3 // Purpose: macros for implementing type-safe vararg passing of strings
4 // Author: Vaclav Slavik
7 // Copyright: (c) 2007 REA Elektronik GmbH
8 // Licence: wxWindows licence
9 ///////////////////////////////////////////////////////////////////////////////
11 // ============================================================================
13 // ============================================================================
15 // ----------------------------------------------------------------------------
17 // ----------------------------------------------------------------------------
19 // for compilers that support precompilation, includes "wx.h".
20 #include "wx/wxprec.h"
26 #include "wx/strvararg.h"
27 #include "wx/string.h"
29 #include "wx/private/wxprintf.h"
31 // ============================================================================
33 // ============================================================================
35 // ----------------------------------------------------------------------------
37 // ----------------------------------------------------------------------------
39 const wxStringCharType
*wxArgNormalizerNative
<const wxString
&>::get() const
41 return m_value
.wx_str();
44 const wxStringCharType
*wxArgNormalizerNative
<const wxCStrData
&>::get() const
46 return m_value
.AsInternal();
49 #if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
50 wxArgNormalizerWchar
<const wxString
&>::wxArgNormalizerWchar(
52 const wxFormatString
*fmt
, unsigned index
)
53 : wxArgNormalizerWithBuffer
<wchar_t>(s
.wc_str(), fmt
, index
)
57 wxArgNormalizerWchar
<const wxCStrData
&>::wxArgNormalizerWchar(
59 const wxFormatString
*fmt
, unsigned index
)
60 : wxArgNormalizerWithBuffer
<wchar_t>(s
.AsWCharBuf(), fmt
, index
)
63 #endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
65 // ----------------------------------------------------------------------------
66 // wxArgNormalizedString
67 // ----------------------------------------------------------------------------
69 wxString
wxArgNormalizedString::GetString() const
74 #if wxUSE_UTF8_LOCALE_ONLY
75 return wxString(reinterpret_cast<const char*>(m_ptr
));
77 #if wxUSE_UNICODE_UTF8
79 return wxString(reinterpret_cast<const char*>(m_ptr
));
82 return wxString(reinterpret_cast<const wxChar
*>(m_ptr
));
83 #endif // !wxUSE_UTF8_LOCALE_ONLY
86 wxArgNormalizedString::operator wxString() const
91 // ----------------------------------------------------------------------------
92 // wxFormatConverter: class doing the "%s" and "%c" normalization
93 // ----------------------------------------------------------------------------
96 There are four problems with wxPrintf() etc. format strings:
98 1) The printf vararg macros convert all forms of strings into
99 wxStringCharType* representation. This may make the format string
100 incorrect: for example, if %ls was used together with a wchar_t*
101 variadic argument, this would no longer work, because the templates
102 would change wchar_t* argument to wxStringCharType* and %ls would now
103 be incorrect in e.g. UTF-8 build. We need make sure only one specifier
106 2) To complicate matters further, the meaning of %s and %c is different
107 under Windows and on Unix. The Windows/MS convention is as follows:
111 format specifier results in
112 -----------------------------------
114 %ls, %S, %lS wchar_t*
118 format specifier results in
119 -----------------------------------
121 %s, %ls, %lS wchar_t*
123 (While on POSIX systems we have %C identical to %lc and %c always means
124 char (in any mode) while %lc always means wchar_t.)
126 In other words, we should _only_ use %s on Windows and %ls on Unix for
127 wxUSE_UNICODE_WCHAR build.
129 3) To make things even worse, we need two forms in UTF-8 build: one for
130 passing strings to ANSI functions under UTF-8 locales (this one should
131 use %s) and one for widechar functions used under non-UTF-8 locales
132 (this one should use %ls).
134 And, of course, the same should be done for %c as well.
137 wxScanf() family of functions is simpler, because we don't normalize their
138 variadic arguments and we only have to handle 2) above and only for widechar
143 class wxFormatConverterBase
148 wxFormatConverterBase()
155 wxCharTypeBuffer
<CharType
> Convert(const CharType
*format
)
157 // this is reset to NULL if we modify the format string
162 if ( CopyFmtChar(*format
++) == _T('%') )
165 while ( IsFlagChar(*format
) )
166 CopyFmtChar(*format
++);
168 // and possible width
169 if ( *format
== _T('*') )
170 CopyFmtChar(*format
++);
175 if ( *format
== _T('.') )
177 CopyFmtChar(*format
++);
178 if ( *format
== _T('*') )
179 CopyFmtChar(*format
++);
184 // next we can have a size modifier
195 // "ll" has a different meaning!
196 if ( format
[1] != 'l' )
208 CharType outConv
= *format
;
209 SizeModifier outSize
= size
;
211 // and finally we should have the type
216 // all strings were converted into the same form by
217 // wxArgNormalizer<T>, this form depends on the context
218 // in which the value is used (scanf/printf/wprintf):
219 HandleString(*format
, size
, outConv
, outSize
);
224 HandleChar(*format
, size
, outConv
, outSize
);
228 // nothing special to do
232 if ( outConv
== *format
&& outSize
== size
) // no change
234 if ( size
!= Size_Default
)
235 CopyFmtChar(*(format
- 1));
236 CopyFmtChar(*format
);
238 else // something changed
243 InsertFmtChar(_T('l'));
247 InsertFmtChar(_T('h'));
254 InsertFmtChar(outConv
);
261 // notice that we only translated the string if m_fmtOrig == NULL (as
262 // set by CopyAllBefore()), otherwise we should simply use the original
266 return wxCharTypeBuffer
<CharType
>::CreateNonOwned(m_fmtOrig
);
270 // NULL-terminate converted format string:
276 virtual ~wxFormatConverterBase() {}
286 // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
287 // respectively), 'size' is the preceding size modifier; the new values of
288 // conversion and size specifiers must be written to outConv and outSize
289 virtual void HandleString(CharType conv
, SizeModifier size
,
290 CharType
& outConv
, SizeModifier
& outSize
) = 0;
292 // ditto for %C or %c
293 virtual void HandleChar(CharType conv
, SizeModifier size
,
294 CharType
& outConv
, SizeModifier
& outSize
) = 0;
297 // copy another character to the translated format: this function does the
298 // copy if we are translating but doesn't do anything at all if we don't,
299 // so we don't create the translated format string at all unless we really
300 // need to (i.e. InsertFmtChar() is called)
301 CharType
CopyFmtChar(CharType ch
)
305 // we're translating, do copy
310 // simply increase the count which should be copied by
311 // CopyAllBefore() later if needed
318 // insert an extra character
319 void InsertFmtChar(CharType ch
)
323 // so far we haven't translated anything yet
332 wxASSERT_MSG( m_fmtOrig
&& m_fmt
.data() == NULL
, "logic error" );
334 // the modified format string is guaranteed to be no longer than
335 // 3/2 of the original (worst case: the entire format string consists
336 // of "%s" repeated and is expanded to "%ls" on Unix), so we can
337 // allocate the buffer now and not worry about running out of space if
338 // we over-allocate a bit:
339 size_t fmtLen
= wxStrlen(m_fmtOrig
);
340 // worst case is of even length, so there's no rounding error in *3/2:
341 m_fmt
.extend(fmtLen
* 3 / 2);
344 wxStrncpy(m_fmt
.data(), m_fmtOrig
, m_nCopied
);
345 m_fmtLast
= m_fmt
.data() + m_nCopied
;
347 // we won't need it any longer and resetting it also indicates that we
348 // modified the format
352 static bool IsFlagChar(CharType ch
)
354 return ch
== _T('-') || ch
== _T('+') ||
355 ch
== _T('0') || ch
== _T(' ') || ch
== _T('#');
358 void SkipDigits(const CharType
**ptpc
)
360 while ( **ptpc
>= _T('0') && **ptpc
<= _T('9') )
361 CopyFmtChar(*(*ptpc
)++);
364 // the translated format
365 wxCharTypeBuffer
<CharType
> m_fmt
;
368 // the original format
369 const CharType
*m_fmtOrig
;
371 // the number of characters already copied (i.e. already parsed, but left
378 // on Windows, we should use %s and %c regardless of the build:
379 class wxPrintfFormatConverterWchar
: public wxFormatConverterBase
<wchar_t>
381 virtual void HandleString(CharType
WXUNUSED(conv
),
382 SizeModifier
WXUNUSED(size
),
383 CharType
& outConv
, SizeModifier
& outSize
)
386 outSize
= Size_Default
;
389 virtual void HandleChar(CharType
WXUNUSED(conv
),
390 SizeModifier
WXUNUSED(size
),
391 CharType
& outConv
, SizeModifier
& outSize
)
394 outSize
= Size_Default
;
398 #else // !__WINDOWS__
400 // on Unix, it's %s for ANSI functions and %ls for widechar:
402 #if !wxUSE_UTF8_LOCALE_ONLY
403 class wxPrintfFormatConverterWchar
: public wxFormatConverterBase
<wchar_t>
405 virtual void HandleString(CharType
WXUNUSED(conv
),
406 SizeModifier
WXUNUSED(size
),
407 CharType
& outConv
, SizeModifier
& outSize
)
413 virtual void HandleChar(CharType
WXUNUSED(conv
),
414 SizeModifier
WXUNUSED(size
),
415 CharType
& outConv
, SizeModifier
& outSize
)
421 #endif // !wxUSE_UTF8_LOCALE_ONLY
423 #if wxUSE_UNICODE_UTF8
424 class wxPrintfFormatConverterUtf8
: public wxFormatConverterBase
<char>
426 virtual void HandleString(CharType
WXUNUSED(conv
),
427 SizeModifier
WXUNUSED(size
),
428 CharType
& outConv
, SizeModifier
& outSize
)
431 outSize
= Size_Default
;
434 virtual void HandleChar(CharType
WXUNUSED(conv
),
435 SizeModifier
WXUNUSED(size
),
436 CharType
& outConv
, SizeModifier
& outSize
)
438 // chars are represented using wchar_t in both builds, so this is
444 #endif // wxUSE_UNICODE_UTF8
446 #endif // __WINDOWS__/!__WINDOWS__
448 #if !wxUSE_UNICODE // FIXME-UTF8: remove
449 class wxPrintfFormatConverterANSI
: public wxFormatConverterBase
<char>
451 virtual void HandleString(CharType
WXUNUSED(conv
),
452 SizeModifier
WXUNUSED(size
),
453 CharType
& outConv
, SizeModifier
& outSize
)
456 outSize
= Size_Default
;
459 virtual void HandleChar(CharType
WXUNUSED(conv
),
460 SizeModifier
WXUNUSED(size
),
461 CharType
& outConv
, SizeModifier
& outSize
)
464 outSize
= Size_Default
;
472 wxScanf() format translation is different, we need to translate %s to %ls
473 and %c to %lc on Unix (but not Windows and for widechar functions only!).
475 So to use native functions in order to get our semantics we must do the
476 following translations in Unicode mode:
478 wxWidgets specifier POSIX specifier
479 ----------------------------------------
485 class wxScanfFormatConverterWchar
: public wxFormatConverterBase
<wchar_t>
487 virtual void HandleString(CharType conv
, SizeModifier size
,
488 CharType
& outConv
, SizeModifier
& outSize
)
491 outSize
= GetOutSize(conv
== 'S', size
);
494 virtual void HandleChar(CharType conv
, SizeModifier size
,
495 CharType
& outConv
, SizeModifier
& outSize
)
498 outSize
= GetOutSize(conv
== 'C', size
);
501 SizeModifier
GetOutSize(bool convIsUpper
, SizeModifier size
)
503 // %S and %hS -> %s and %lS -> %ls
506 if ( size
== Size_Long
)
513 if ( size
== Size_Default
)
521 const wxScopedWCharBuffer
wxScanfConvertFormatW(const wchar_t *format
)
523 return wxScanfFormatConverterWchar().Convert(format
);
525 #endif // !__WINDOWS__
528 // ----------------------------------------------------------------------------
530 // ----------------------------------------------------------------------------
532 #if !wxUSE_UNICODE_WCHAR
533 const char* wxFormatString::InputAsChar()
536 return m_char
.data();
538 // in ANSI build, wx_str() returns char*, in UTF-8 build, this function
539 // is only called under UTF-8 locales, so we should return UTF-8 string,
540 // which is, again, what wx_str() returns:
542 return m_str
->wx_str();
546 return m_cstr
->AsInternal();
548 // the last case is that wide string was passed in: in that case, we need
552 m_char
= wxConvLibc
.cWC2MB(m_wchar
.data());
554 return m_char
.data();
557 const char* wxFormatString::AsChar()
559 if ( !m_convertedChar
)
560 #if !wxUSE_UNICODE // FIXME-UTF8: remove this
561 m_convertedChar
= wxPrintfFormatConverterANSI().Convert(InputAsChar());
563 m_convertedChar
= wxPrintfFormatConverterUtf8().Convert(InputAsChar());
566 return m_convertedChar
.data();
568 #endif // !wxUSE_UNICODE_WCHAR
570 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
571 const wchar_t* wxFormatString::InputAsWChar()
574 return m_wchar
.data();
576 #if wxUSE_UNICODE_WCHAR
578 return m_str
->wc_str();
580 return m_cstr
->AsInternal();
581 #else // wxUSE_UNICODE_UTF8
584 m_wchar
= m_str
->wc_str();
585 return m_wchar
.data();
589 m_wchar
= m_cstr
->AsWCharBuf();
590 return m_wchar
.data();
592 #endif // wxUSE_UNICODE_WCHAR/UTF8
594 // the last case is that narrow string was passed in: in that case, we need
598 m_wchar
= wxConvLibc
.cMB2WC(m_char
.data());
600 return m_wchar
.data();
603 const wchar_t* wxFormatString::AsWChar()
605 if ( !m_convertedWChar
)
606 m_convertedWChar
= wxPrintfFormatConverterWchar().Convert(InputAsWChar());
608 return m_convertedWChar
.data();
610 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
612 wxString
wxFormatString::InputAsString() const
617 return m_cstr
->AsString();
619 return wxString(m_wchar
);
621 return wxString(m_char
);
623 wxFAIL_MSG( "invalid wxFormatString - not initialized?" );
627 // ----------------------------------------------------------------------------
628 // wxFormatString::GetArgumentType()
629 // ----------------------------------------------------------------------------
634 template<typename CharType
>
635 wxFormatString::ArgumentType
DoGetArgumentType(const CharType
*format
,
638 wxCHECK_MSG( format
, wxFormatString::Arg_Other
,
639 "empty format string not allowed here" );
641 wxPrintfConvSpecParser
<CharType
> parser(format
);
643 wxCHECK_MSG( parser
.pspec
[n
-1] != NULL
, wxFormatString::Arg_Other
,
644 "requested argument not found - invalid format string?" );
646 switch ( parser
.pspec
[n
-1]->m_type
)
650 return wxFormatString::Arg_Char
;
653 return wxFormatString::Arg_Other
;
657 } // anonymous namespace
659 wxFormatString::ArgumentType
wxFormatString::GetArgumentType(unsigned n
) const
662 return DoGetArgumentType(m_char
.data(), n
);
664 return DoGetArgumentType(m_wchar
.data(), n
);
666 return DoGetArgumentType(m_str
->wx_str(), n
);
668 return DoGetArgumentType(m_cstr
->AsInternal(), n
);
670 wxFAIL_MSG( "unreachable code" );