1 /////////////////////////////////////////////////////////////////////////////// 
   2 // Name:        src/common/strvararg.cpp 
   3 // Purpose:     macros for implementing type-safe vararg passing of strings 
   4 // Author:      Vaclav Slavik 
   7 // Copyright:   (c) 2007 REA Elektronik GmbH 
   8 // Licence:     wxWindows licence 
   9 /////////////////////////////////////////////////////////////////////////////// 
  11 // ============================================================================ 
  13 // ============================================================================ 
  15 // ---------------------------------------------------------------------------- 
  17 // ---------------------------------------------------------------------------- 
  19 // for compilers that support precompilation, includes "wx.h". 
  20 #include "wx/wxprec.h" 
  26 #include "wx/strvararg.h" 
  27 #include "wx/string.h" 
  29 #include "wx/private/wxprintf.h" 
  31 // ============================================================================ 
  33 // ============================================================================ 
  35 // ---------------------------------------------------------------------------- 
  37 // ---------------------------------------------------------------------------- 
  39 const wxStringCharType 
*wxArgNormalizerNative
<const wxString
&>::get() const 
  41     return m_value
.wx_str(); 
  44 const wxStringCharType 
*wxArgNormalizerNative
<const wxCStrData
&>::get() const 
  46     return m_value
.AsInternal(); 
  49 #if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY 
  50 wxArgNormalizerWchar
<const wxString
&>::wxArgNormalizerWchar( 
  52                             const wxFormatString 
*fmt
, unsigned index
) 
  53     : wxArgNormalizerWithBuffer
<wchar_t>(s
.wc_str(), fmt
, index
) 
  57 wxArgNormalizerWchar
<const wxCStrData
&>::wxArgNormalizerWchar( 
  59                             const wxFormatString 
*fmt
, unsigned index
) 
  60     : wxArgNormalizerWithBuffer
<wchar_t>(s
.AsWCharBuf(), fmt
, index
) 
  63 #endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY 
  65 // ---------------------------------------------------------------------------- 
  66 // wxArgNormalizedString 
  67 // ---------------------------------------------------------------------------- 
  69 wxString 
wxArgNormalizedString::GetString() const 
  74 #if wxUSE_UTF8_LOCALE_ONLY 
  75     return wxString(reinterpret_cast<const char*>(m_ptr
)); 
  77     #if wxUSE_UNICODE_UTF8 
  79             return wxString(reinterpret_cast<const char*>(m_ptr
)); 
  82         return wxString(reinterpret_cast<const wxChar
*>(m_ptr
)); 
  83 #endif // !wxUSE_UTF8_LOCALE_ONLY 
  86 wxArgNormalizedString::operator wxString() const 
  91 // ---------------------------------------------------------------------------- 
  92 // wxFormatConverter: class doing the "%s" and "%c" normalization 
  93 // ---------------------------------------------------------------------------- 
  96    There are four problems with wxPrintf() etc. format strings: 
  98    1) The printf vararg macros convert all forms of strings into 
  99       wxStringCharType* representation. This may make the format string 
 100       incorrect: for example, if %ls was used together with a wchar_t* 
 101       variadic argument, this would no longer work, because the templates 
 102       would change wchar_t* argument to wxStringCharType* and %ls would now 
 103       be incorrect in e.g. UTF-8 build. We need make sure only one specifier 
 106    2) To complicate matters further, the meaning of %s and %c is different 
 107       under Windows and on Unix. The Windows/MS convention is as follows: 
 111        format specifier         results in 
 112        ----------------------------------- 
 114        %ls, %S, %lS             wchar_t* 
 118        format specifier         results in 
 119        ----------------------------------- 
 121        %s, %ls, %lS             wchar_t* 
 123        (While on POSIX systems we have %C identical to %lc and %c always means 
 124        char (in any mode) while %lc always means wchar_t.) 
 126       In other words, we should _only_ use %s on Windows and %ls on Unix for 
 127       wxUSE_UNICODE_WCHAR build. 
 129    3) To make things even worse, we need two forms in UTF-8 build: one for 
 130       passing strings to ANSI functions under UTF-8 locales (this one should 
 131       use %s) and one for widechar functions used under non-UTF-8 locales 
 132       (this one should use %ls). 
 134    And, of course, the same should be done for %c as well. 
 137    wxScanf() family of functions is simpler, because we don't normalize their 
 138    variadic arguments and we only have to handle 2) above and only for widechar 
 143 class wxFormatConverterBase
 
 148     wxFormatConverterBase() 
 155     wxCharTypeBuffer
<CharType
> Convert(const CharType 
*format
) 
 157         // this is reset to NULL if we modify the format string 
 162             if ( CopyFmtChar(*format
++) == _T('%') ) 
 165                 while ( IsFlagChar(*format
) ) 
 166                     CopyFmtChar(*format
++); 
 168                 // and possible width 
 169                 if ( *format 
== _T('*') ) 
 170                     CopyFmtChar(*format
++); 
 175                 if ( *format 
== _T('.') ) 
 177                     CopyFmtChar(*format
++); 
 178                     if ( *format 
== _T('*') ) 
 179                         CopyFmtChar(*format
++); 
 184                 // next we can have a size modifier 
 195                         // "ll" has a different meaning! 
 196                         if ( format
[1] != 'l' ) 
 208                 CharType outConv 
= *format
; 
 209                 SizeModifier outSize 
= size
; 
 211                 // and finally we should have the type 
 216                         // all strings were converted into the same form by 
 217                         // wxArgNormalizer<T>, this form depends on the context 
 218                         // in which the value is used (scanf/printf/wprintf): 
 219                         HandleString(*format
, size
, outConv
, outSize
); 
 224                         HandleChar(*format
, size
, outConv
, outSize
); 
 228                         // nothing special to do 
 232                 if ( outConv 
== *format 
&& outSize 
== size 
) // no change 
 234                     if ( size 
!= Size_Default 
) 
 235                         CopyFmtChar(*(format 
- 1)); 
 236                     CopyFmtChar(*format
); 
 238                 else // something changed 
 243                             InsertFmtChar(_T('l')); 
 247                             InsertFmtChar(_T('h')); 
 254                     InsertFmtChar(outConv
); 
 261         // notice that we only translated the string if m_fmtOrig == NULL (as 
 262         // set by CopyAllBefore()), otherwise we should simply use the original 
 266             return wxCharTypeBuffer
<CharType
>::CreateNonOwned(m_fmtOrig
); 
 270             // NULL-terminate converted format string: 
 276     virtual ~wxFormatConverterBase() {} 
 286     // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's' 
 287     // respectively), 'size' is the preceding size modifier; the new values of 
 288     // conversion and size specifiers must be written to outConv and outSize 
 289     virtual void HandleString(CharType conv
, SizeModifier size
, 
 290                               CharType
& outConv
, SizeModifier
& outSize
) = 0; 
 292     // ditto for %C or %c 
 293     virtual void HandleChar(CharType conv
, SizeModifier size
, 
 294                             CharType
& outConv
, SizeModifier
& outSize
) = 0; 
 297     // copy another character to the translated format: this function does the 
 298     // copy if we are translating but doesn't do anything at all if we don't, 
 299     // so we don't create the translated format string at all unless we really 
 300     // need to (i.e. InsertFmtChar() is called) 
 301     CharType 
CopyFmtChar(CharType ch
) 
 305             // we're translating, do copy 
 310             // simply increase the count which should be copied by 
 311             // CopyAllBefore() later if needed 
 318     // insert an extra character 
 319     void InsertFmtChar(CharType ch
) 
 323             // so far we haven't translated anything yet 
 332         wxASSERT_MSG( m_fmtOrig 
&& m_fmt
.data() == NULL
, "logic error" ); 
 334         // the modified format string is guaranteed to be no longer than 
 335         // 3/2 of the original (worst case: the entire format string consists 
 336         // of "%s" repeated and is expanded to "%ls" on Unix), so we can 
 337         // allocate the buffer now and not worry about running out of space if 
 338         // we over-allocate a bit: 
 339         size_t fmtLen 
= wxStrlen(m_fmtOrig
); 
 340         // worst case is of even length, so there's no rounding error in *3/2: 
 341         m_fmt
.extend(fmtLen 
* 3 / 2); 
 344             wxStrncpy(m_fmt
.data(), m_fmtOrig
, m_nCopied
); 
 345         m_fmtLast 
= m_fmt
.data() + m_nCopied
; 
 347         // we won't need it any longer and resetting it also indicates that we 
 348         // modified the format 
 352     static bool IsFlagChar(CharType ch
) 
 354         return ch 
== _T('-') || ch 
== _T('+') || 
 355                ch 
== _T('0') || ch 
== _T(' ') || ch 
== _T('#'); 
 358     void SkipDigits(const CharType 
**ptpc
) 
 360         while ( **ptpc 
>= _T('0') && **ptpc 
<= _T('9') ) 
 361             CopyFmtChar(*(*ptpc
)++); 
 364     // the translated format 
 365     wxCharTypeBuffer
<CharType
> m_fmt
; 
 368     // the original format 
 369     const CharType 
*m_fmtOrig
; 
 371     // the number of characters already copied (i.e. already parsed, but left 
 378 // on Windows, we should use %s and %c regardless of the build: 
 379 class wxPrintfFormatConverterWchar 
: public wxFormatConverterBase
<wchar_t> 
 381     virtual void HandleString(CharType 
WXUNUSED(conv
), 
 382                               SizeModifier 
WXUNUSED(size
), 
 383                               CharType
& outConv
, SizeModifier
& outSize
) 
 386         outSize 
= Size_Default
; 
 389     virtual void HandleChar(CharType 
WXUNUSED(conv
), 
 390                             SizeModifier 
WXUNUSED(size
), 
 391                             CharType
& outConv
, SizeModifier
& outSize
) 
 394         outSize 
= Size_Default
; 
 398 #else // !__WINDOWS__ 
 400 // on Unix, it's %s for ANSI functions and %ls for widechar: 
 402 #if !wxUSE_UTF8_LOCALE_ONLY 
 403 class wxPrintfFormatConverterWchar 
: public wxFormatConverterBase
<wchar_t> 
 405     virtual void HandleString(CharType 
WXUNUSED(conv
), 
 406                               SizeModifier 
WXUNUSED(size
), 
 407                               CharType
& outConv
, SizeModifier
& outSize
) 
 413     virtual void HandleChar(CharType 
WXUNUSED(conv
), 
 414                             SizeModifier 
WXUNUSED(size
), 
 415                             CharType
& outConv
, SizeModifier
& outSize
) 
 421 #endif // !wxUSE_UTF8_LOCALE_ONLY 
 423 #if wxUSE_UNICODE_UTF8 
 424 class wxPrintfFormatConverterUtf8 
: public wxFormatConverterBase
<char> 
 426     virtual void HandleString(CharType 
WXUNUSED(conv
), 
 427                               SizeModifier 
WXUNUSED(size
), 
 428                               CharType
& outConv
, SizeModifier
& outSize
) 
 431         outSize 
= Size_Default
; 
 434     virtual void HandleChar(CharType 
WXUNUSED(conv
), 
 435                             SizeModifier 
WXUNUSED(size
), 
 436                             CharType
& outConv
, SizeModifier
& outSize
) 
 438         // chars are represented using wchar_t in both builds, so this is 
 444 #endif // wxUSE_UNICODE_UTF8 
 446 #endif // __WINDOWS__/!__WINDOWS__ 
 448 #if !wxUSE_UNICODE // FIXME-UTF8: remove 
 449 class wxPrintfFormatConverterANSI 
: public wxFormatConverterBase
<char> 
 451     virtual void HandleString(CharType 
WXUNUSED(conv
), 
 452                               SizeModifier 
WXUNUSED(size
), 
 453                               CharType
& outConv
, SizeModifier
& outSize
) 
 456         outSize 
= Size_Default
; 
 459     virtual void HandleChar(CharType 
WXUNUSED(conv
), 
 460                             SizeModifier 
WXUNUSED(size
), 
 461                             CharType
& outConv
, SizeModifier
& outSize
) 
 464         outSize 
= Size_Default
; 
 472    wxScanf() format translation is different, we need to translate %s to %ls 
 473    and %c to %lc on Unix (but not Windows and for widechar functions only!). 
 475    So to use native functions in order to get our semantics we must do the 
 476    following translations in Unicode mode: 
 478    wxWidgets specifier      POSIX specifier 
 479    ---------------------------------------- 
 485 class wxScanfFormatConverterWchar 
: public wxFormatConverterBase
<wchar_t> 
 487     virtual void HandleString(CharType conv
, SizeModifier size
, 
 488                               CharType
& outConv
, SizeModifier
& outSize
) 
 491         outSize 
= GetOutSize(conv 
== 'S', size
); 
 494     virtual void HandleChar(CharType conv
, SizeModifier size
, 
 495                             CharType
& outConv
, SizeModifier
& outSize
) 
 498         outSize 
= GetOutSize(conv 
== 'C', size
); 
 501     SizeModifier 
GetOutSize(bool convIsUpper
, SizeModifier size
) 
 503         // %S and %hS -> %s and %lS -> %ls 
 506             if ( size 
== Size_Long 
) 
 513             if ( size 
== Size_Default 
) 
 521 const wxScopedWCharBuffer 
wxScanfConvertFormatW(const wchar_t *format
) 
 523     return wxScanfFormatConverterWchar().Convert(format
); 
 525 #endif // !__WINDOWS__ 
 528 // ---------------------------------------------------------------------------- 
 530 // ---------------------------------------------------------------------------- 
 532 #if !wxUSE_UNICODE_WCHAR 
 533 const char* wxFormatString::InputAsChar() 
 536         return m_char
.data(); 
 538     // in ANSI build, wx_str() returns char*, in UTF-8 build, this function 
 539     // is only called under UTF-8 locales, so we should return UTF-8 string, 
 540     // which is, again, what wx_str() returns: 
 542         return m_str
->wx_str(); 
 546         return m_cstr
->AsInternal(); 
 548     // the last case is that wide string was passed in: in that case, we need 
 552     m_char 
= wxConvLibc
.cWC2MB(m_wchar
.data()); 
 554     return m_char
.data(); 
 557 const char* wxFormatString::AsChar() 
 559     if ( !m_convertedChar 
) 
 560 #if !wxUSE_UNICODE // FIXME-UTF8: remove this 
 561         m_convertedChar 
= wxPrintfFormatConverterANSI().Convert(InputAsChar()); 
 563         m_convertedChar 
= wxPrintfFormatConverterUtf8().Convert(InputAsChar()); 
 566     return m_convertedChar
.data(); 
 568 #endif // !wxUSE_UNICODE_WCHAR 
 570 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY 
 571 const wchar_t* wxFormatString::InputAsWChar() 
 574         return m_wchar
.data(); 
 576 #if wxUSE_UNICODE_WCHAR 
 578         return m_str
->wc_str(); 
 580         return m_cstr
->AsInternal(); 
 581 #else // wxUSE_UNICODE_UTF8 
 584         m_wchar 
= m_str
->wc_str(); 
 585         return m_wchar
.data(); 
 589         m_wchar 
= m_cstr
->AsWCharBuf(); 
 590         return m_wchar
.data(); 
 592 #endif // wxUSE_UNICODE_WCHAR/UTF8 
 594     // the last case is that narrow string was passed in: in that case, we need 
 598     m_wchar 
= wxConvLibc
.cMB2WC(m_char
.data()); 
 600     return m_wchar
.data(); 
 603 const wchar_t* wxFormatString::AsWChar() 
 605     if ( !m_convertedWChar 
) 
 606         m_convertedWChar 
= wxPrintfFormatConverterWchar().Convert(InputAsWChar()); 
 608     return m_convertedWChar
.data(); 
 610 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY 
 612 wxString 
wxFormatString::InputAsString() const 
 617         return m_cstr
->AsString(); 
 619         return wxString(m_wchar
); 
 621         return wxString(m_char
); 
 623     wxFAIL_MSG( "invalid wxFormatString - not initialized?" ); 
 627 // ---------------------------------------------------------------------------- 
 628 // wxFormatString::GetArgumentType() 
 629 // ---------------------------------------------------------------------------- 
 634 template<typename CharType
> 
 635 wxFormatString::ArgumentType 
DoGetArgumentType(const CharType 
*format
, 
 638     wxCHECK_MSG( format
, wxFormatString::Arg_Other
, 
 639                  "empty format string not allowed here" ); 
 641     wxPrintfConvSpecParser
<CharType
> parser(format
); 
 643     wxCHECK_MSG( parser
.pspec
[n
-1] != NULL
, wxFormatString::Arg_Other
, 
 644                  "requested argument not found - invalid format string?" ); 
 646     switch ( parser
.pspec
[n
-1]->m_type 
) 
 650             return wxFormatString::Arg_Char
; 
 653             return wxFormatString::Arg_Other
; 
 657 } // anonymous namespace 
 659 wxFormatString::ArgumentType 
wxFormatString::GetArgumentType(unsigned n
) const 
 662         return DoGetArgumentType(m_char
.data(), n
); 
 664         return DoGetArgumentType(m_wchar
.data(), n
); 
 666         return DoGetArgumentType(m_str
->wx_str(), n
); 
 668         return DoGetArgumentType(m_cstr
->AsInternal(), n
); 
 670     wxFAIL_MSG( "unreachable code" );