1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strvararg.cpp
3 // Purpose: macros for implementing type-safe vararg passing of strings
4 // Author: Vaclav Slavik
7 // Copyright: (c) 2007 REA Elektronik GmbH
8 // Licence: wxWindows licence
9 ///////////////////////////////////////////////////////////////////////////////
11 // ============================================================================
13 // ============================================================================
15 // ----------------------------------------------------------------------------
17 // ----------------------------------------------------------------------------
19 // for compilers that support precompilation, includes "wx.h".
20 #include "wx/wxprec.h"
26 #include "wx/strvararg.h"
27 #include "wx/string.h"
29 #include "wx/private/wxprintf.h"
31 // ============================================================================
33 // ============================================================================
35 // ----------------------------------------------------------------------------
37 // ----------------------------------------------------------------------------
39 const wxStringCharType
*wxArgNormalizerNative
<const wxString
&>::get() const
41 return m_value
.wx_str();
44 const wxStringCharType
*wxArgNormalizerNative
<const wxCStrData
&>::get() const
46 return m_value
.AsInternal();
49 #if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
50 wxArgNormalizerWchar
<const wxString
&>::wxArgNormalizerWchar(
52 const wxFormatString
*fmt
, unsigned index
)
53 : wxArgNormalizerWithBuffer
<wchar_t>(s
.wc_str(), fmt
, index
)
57 wxArgNormalizerWchar
<const wxCStrData
&>::wxArgNormalizerWchar(
59 const wxFormatString
*fmt
, unsigned index
)
60 : wxArgNormalizerWithBuffer
<wchar_t>(s
.AsWCharBuf(), fmt
, index
)
63 #endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
65 // ----------------------------------------------------------------------------
66 // wxArgNormalizedString
67 // ----------------------------------------------------------------------------
69 wxString
wxArgNormalizedString::GetString() const
74 #if wxUSE_UTF8_LOCALE_ONLY
75 return wxString(reinterpret_cast<const char*>(m_ptr
));
77 #if wxUSE_UNICODE_UTF8
79 return wxString(reinterpret_cast<const char*>(m_ptr
));
82 return wxString(reinterpret_cast<const wxChar
*>(m_ptr
));
83 #endif // !wxUSE_UTF8_LOCALE_ONLY
86 wxArgNormalizedString::operator wxString() const
91 // ----------------------------------------------------------------------------
92 // wxFormatConverter: class doing the "%s" and "%c" normalization
93 // ----------------------------------------------------------------------------
96 There are four problems with wxPrintf() etc. format strings:
98 1) The printf vararg macros convert all forms of strings into
99 wxStringCharType* representation. This may make the format string
100 incorrect: for example, if %ls was used together with a wchar_t*
101 variadic argument, this would no longer work, because the templates
102 would change wchar_t* argument to wxStringCharType* and %ls would now
103 be incorrect in e.g. UTF-8 build. We need make sure only one specifier
106 2) To complicate matters further, the meaning of %s and %c is different
107 under Windows and on Unix. The Windows/MS convention is as follows:
111 format specifier results in
112 -----------------------------------
114 %ls, %S, %lS wchar_t*
118 format specifier results in
119 -----------------------------------
121 %s, %ls, %lS wchar_t*
123 (While on POSIX systems we have %C identical to %lc and %c always means
124 char (in any mode) while %lc always means wchar_t.)
126 In other words, we should _only_ use %s on Windows and %ls on Unix for
127 wxUSE_UNICODE_WCHAR build.
129 3) To make things even worse, we need two forms in UTF-8 build: one for
130 passing strings to ANSI functions under UTF-8 locales (this one should
131 use %s) and one for widechar functions used under non-UTF-8 locales
132 (this one should use %ls).
134 And, of course, the same should be done for %c as well.
137 wxScanf() family of functions is simpler, because we don't normalize their
138 variadic arguments and we only have to handle 2) above and only for widechar
143 class wxFormatConverterBase
148 wxFormatConverterBase()
155 wxScopedCharTypeBuffer
<CharType
> Convert(const CharType
*format
)
157 // this is reset to NULL if we modify the format string
162 if ( CopyFmtChar(*format
++) == wxT('%') )
165 while ( IsFlagChar(*format
) )
166 CopyFmtChar(*format
++);
168 // and possible width
169 if ( *format
== wxT('*') )
170 CopyFmtChar(*format
++);
175 if ( *format
== wxT('.') )
177 CopyFmtChar(*format
++);
178 if ( *format
== wxT('*') )
179 CopyFmtChar(*format
++);
184 // next we can have a size modifier
195 // "ll" has a different meaning!
196 if ( format
[1] != 'l' )
208 CharType outConv
= *format
;
209 SizeModifier outSize
= size
;
211 // and finally we should have the type
216 // all strings were converted into the same form by
217 // wxArgNormalizer<T>, this form depends on the context
218 // in which the value is used (scanf/printf/wprintf):
219 HandleString(*format
, size
, outConv
, outSize
);
224 HandleChar(*format
, size
, outConv
, outSize
);
228 // nothing special to do
232 if ( outConv
== *format
&& outSize
== size
) // no change
234 if ( size
!= Size_Default
)
235 CopyFmtChar(*(format
- 1));
236 CopyFmtChar(*format
);
238 else // something changed
243 InsertFmtChar(wxT('l'));
247 InsertFmtChar(wxT('h'));
254 InsertFmtChar(outConv
);
261 // notice that we only translated the string if m_fmtOrig == NULL (as
262 // set by CopyAllBefore()), otherwise we should simply use the original
266 return wxScopedCharTypeBuffer
<CharType
>::CreateNonOwned(m_fmtOrig
);
270 // shrink converted format string to actual size (instead of
271 // over-sized allocation from CopyAllBefore()) and NUL-terminate
273 m_fmt
.shrink(m_fmtLast
- m_fmt
.data());
278 virtual ~wxFormatConverterBase() {}
288 // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
289 // respectively), 'size' is the preceding size modifier; the new values of
290 // conversion and size specifiers must be written to outConv and outSize
291 virtual void HandleString(CharType conv
, SizeModifier size
,
292 CharType
& outConv
, SizeModifier
& outSize
) = 0;
294 // ditto for %C or %c
295 virtual void HandleChar(CharType conv
, SizeModifier size
,
296 CharType
& outConv
, SizeModifier
& outSize
) = 0;
299 // copy another character to the translated format: this function does the
300 // copy if we are translating but doesn't do anything at all if we don't,
301 // so we don't create the translated format string at all unless we really
302 // need to (i.e. InsertFmtChar() is called)
303 CharType
CopyFmtChar(CharType ch
)
307 // we're translating, do copy
312 // simply increase the count which should be copied by
313 // CopyAllBefore() later if needed
320 // insert an extra character
321 void InsertFmtChar(CharType ch
)
325 // so far we haven't translated anything yet
334 wxASSERT_MSG( m_fmtOrig
&& m_fmt
.data() == NULL
, "logic error" );
336 // the modified format string is guaranteed to be no longer than
337 // 3/2 of the original (worst case: the entire format string consists
338 // of "%s" repeated and is expanded to "%ls" on Unix), so we can
339 // allocate the buffer now and not worry about running out of space if
340 // we over-allocate a bit:
341 size_t fmtLen
= wxStrlen(m_fmtOrig
);
342 // worst case is of even length, so there's no rounding error in *3/2:
343 m_fmt
.extend(fmtLen
* 3 / 2);
346 wxStrncpy(m_fmt
.data(), m_fmtOrig
, m_nCopied
);
347 m_fmtLast
= m_fmt
.data() + m_nCopied
;
349 // we won't need it any longer and resetting it also indicates that we
350 // modified the format
354 static bool IsFlagChar(CharType ch
)
356 return ch
== wxT('-') || ch
== wxT('+') ||
357 ch
== wxT('0') || ch
== wxT(' ') || ch
== wxT('#');
360 void SkipDigits(const CharType
**ptpc
)
362 while ( **ptpc
>= wxT('0') && **ptpc
<= wxT('9') )
363 CopyFmtChar(*(*ptpc
)++);
366 // the translated format
367 wxCharTypeBuffer
<CharType
> m_fmt
;
370 // the original format
371 const CharType
*m_fmtOrig
;
373 // the number of characters already copied (i.e. already parsed, but left
378 #if defined(__WINDOWS__) && !defined(__CYGWIN__)
380 // on Windows, we should use %s and %c regardless of the build:
381 class wxPrintfFormatConverterWchar
: public wxFormatConverterBase
<wchar_t>
383 virtual void HandleString(CharType
WXUNUSED(conv
),
384 SizeModifier
WXUNUSED(size
),
385 CharType
& outConv
, SizeModifier
& outSize
)
388 outSize
= Size_Default
;
391 virtual void HandleChar(CharType
WXUNUSED(conv
),
392 SizeModifier
WXUNUSED(size
),
393 CharType
& outConv
, SizeModifier
& outSize
)
396 outSize
= Size_Default
;
400 #else // !__WINDOWS__
402 // on Unix, it's %s for ANSI functions and %ls for widechar:
404 #if !wxUSE_UTF8_LOCALE_ONLY
405 class wxPrintfFormatConverterWchar
: public wxFormatConverterBase
<wchar_t>
407 virtual void HandleString(CharType
WXUNUSED(conv
),
408 SizeModifier
WXUNUSED(size
),
409 CharType
& outConv
, SizeModifier
& outSize
)
415 virtual void HandleChar(CharType
WXUNUSED(conv
),
416 SizeModifier
WXUNUSED(size
),
417 CharType
& outConv
, SizeModifier
& outSize
)
423 #endif // !wxUSE_UTF8_LOCALE_ONLY
425 #if wxUSE_UNICODE_UTF8
426 class wxPrintfFormatConverterUtf8
: public wxFormatConverterBase
<char>
428 virtual void HandleString(CharType
WXUNUSED(conv
),
429 SizeModifier
WXUNUSED(size
),
430 CharType
& outConv
, SizeModifier
& outSize
)
433 outSize
= Size_Default
;
436 virtual void HandleChar(CharType
WXUNUSED(conv
),
437 SizeModifier
WXUNUSED(size
),
438 CharType
& outConv
, SizeModifier
& outSize
)
440 // chars are represented using wchar_t in both builds, so this is
446 #endif // wxUSE_UNICODE_UTF8
448 #endif // __WINDOWS__/!__WINDOWS__
450 #if !wxUSE_UNICODE // FIXME-UTF8: remove
451 class wxPrintfFormatConverterANSI
: public wxFormatConverterBase
<char>
453 virtual void HandleString(CharType
WXUNUSED(conv
),
454 SizeModifier
WXUNUSED(size
),
455 CharType
& outConv
, SizeModifier
& outSize
)
458 outSize
= Size_Default
;
461 virtual void HandleChar(CharType
WXUNUSED(conv
),
462 SizeModifier
WXUNUSED(size
),
463 CharType
& outConv
, SizeModifier
& outSize
)
466 outSize
= Size_Default
;
474 wxScanf() format translation is different, we need to translate %s to %ls
475 and %c to %lc on Unix (but not Windows and for widechar functions only!).
477 So to use native functions in order to get our semantics we must do the
478 following translations in Unicode mode:
480 wxWidgets specifier POSIX specifier
481 ----------------------------------------
487 class wxScanfFormatConverterWchar
: public wxFormatConverterBase
<wchar_t>
489 virtual void HandleString(CharType conv
, SizeModifier size
,
490 CharType
& outConv
, SizeModifier
& outSize
)
493 outSize
= GetOutSize(conv
== 'S', size
);
496 virtual void HandleChar(CharType conv
, SizeModifier size
,
497 CharType
& outConv
, SizeModifier
& outSize
)
500 outSize
= GetOutSize(conv
== 'C', size
);
503 SizeModifier
GetOutSize(bool convIsUpper
, SizeModifier size
)
505 // %S and %hS -> %s and %lS -> %ls
508 if ( size
== Size_Long
)
515 if ( size
== Size_Default
)
523 const wxScopedWCharBuffer
wxScanfConvertFormatW(const wchar_t *format
)
525 return wxScanfFormatConverterWchar().Convert(format
);
527 #endif // !__WINDOWS__
530 // ----------------------------------------------------------------------------
532 // ----------------------------------------------------------------------------
534 #if !wxUSE_UNICODE_WCHAR
535 const char* wxFormatString::InputAsChar()
538 return m_char
.data();
540 // in ANSI build, wx_str() returns char*, in UTF-8 build, this function
541 // is only called under UTF-8 locales, so we should return UTF-8 string,
542 // which is, again, what wx_str() returns:
544 return m_str
->wx_str();
548 return m_cstr
->AsInternal();
550 // the last case is that wide string was passed in: in that case, we need
554 m_char
= wxConvLibc
.cWC2MB(m_wchar
.data());
556 return m_char
.data();
559 const char* wxFormatString::AsChar()
561 if ( !m_convertedChar
)
562 #if !wxUSE_UNICODE // FIXME-UTF8: remove this
563 m_convertedChar
= wxPrintfFormatConverterANSI().Convert(InputAsChar());
565 m_convertedChar
= wxPrintfFormatConverterUtf8().Convert(InputAsChar());
568 return m_convertedChar
.data();
570 #endif // !wxUSE_UNICODE_WCHAR
572 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
573 const wchar_t* wxFormatString::InputAsWChar()
576 return m_wchar
.data();
578 #if wxUSE_UNICODE_WCHAR
580 return m_str
->wc_str();
582 return m_cstr
->AsInternal();
583 #else // wxUSE_UNICODE_UTF8
586 m_wchar
= m_str
->wc_str();
587 return m_wchar
.data();
591 m_wchar
= m_cstr
->AsWCharBuf();
592 return m_wchar
.data();
594 #endif // wxUSE_UNICODE_WCHAR/UTF8
596 // the last case is that narrow string was passed in: in that case, we need
600 m_wchar
= wxConvLibc
.cMB2WC(m_char
.data());
602 return m_wchar
.data();
605 const wchar_t* wxFormatString::AsWChar()
607 if ( !m_convertedWChar
)
608 m_convertedWChar
= wxPrintfFormatConverterWchar().Convert(InputAsWChar());
610 return m_convertedWChar
.data();
612 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
614 wxString
wxFormatString::InputAsString() const
619 return m_cstr
->AsString();
621 return wxString(m_wchar
);
623 return wxString(m_char
);
625 wxFAIL_MSG( "invalid wxFormatString - not initialized?" );
629 // ----------------------------------------------------------------------------
630 // wxFormatString::GetArgumentType()
631 // ----------------------------------------------------------------------------
636 template<typename CharType
>
637 wxFormatString::ArgumentType
DoGetArgumentType(const CharType
*format
,
640 wxCHECK_MSG( format
, wxFormatString::Arg_Unknown
,
641 "empty format string not allowed here" );
643 wxPrintfConvSpecParser
<CharType
> parser(format
);
645 wxCHECK_MSG( n
<= parser
.nargs
, wxFormatString::Arg_Unknown
,
646 "more arguments than format string specifiers?" );
648 wxCHECK_MSG( parser
.pspec
[n
-1] != NULL
, wxFormatString::Arg_Unknown
,
649 "requested argument not found - invalid format string?" );
651 switch ( parser
.pspec
[n
-1]->m_type
)
655 return wxFormatString::Arg_Char
;
659 return wxFormatString::Arg_String
;
662 return wxFormatString::Arg_Int
;
664 return wxFormatString::Arg_LongInt
;
666 case wxPAT_LONGLONGINT
:
667 return wxFormatString::Arg_LongLongInt
;
670 return wxFormatString::Arg_Size_t
;
673 return wxFormatString::Arg_Double
;
674 case wxPAT_LONGDOUBLE
:
675 return wxFormatString::Arg_LongDouble
;
678 return wxFormatString::Arg_Pointer
;
681 return wxFormatString::Arg_IntPtr
;
682 case wxPAT_NSHORTINT
:
683 return wxFormatString::Arg_ShortIntPtr
;
685 return wxFormatString::Arg_LongIntPtr
;
688 // "*" requires argument of type int
689 return wxFormatString::Arg_Int
;
692 // (handled after the switch statement)
697 wxFAIL_MSG( "unexpected argument type" );
698 return wxFormatString::Arg_Unknown
;
701 } // anonymous namespace
703 wxFormatString::ArgumentType
wxFormatString::GetArgumentType(unsigned n
) const
706 return DoGetArgumentType(m_char
.data(), n
);
708 return DoGetArgumentType(m_wchar
.data(), n
);
710 return DoGetArgumentType(m_str
->wx_str(), n
);
712 return DoGetArgumentType(m_cstr
->AsInternal(), n
);
714 wxFAIL_MSG( "unreachable code" );