1 /////////////////////////////////////////////////////////////////////////////// 
   2 // Name:        src/common/strvararg.cpp 
   3 // Purpose:     macros for implementing type-safe vararg passing of strings 
   4 // Author:      Vaclav Slavik 
   7 // Copyright:   (c) 2007 REA Elektronik GmbH 
   8 // Licence:     wxWindows licence 
   9 /////////////////////////////////////////////////////////////////////////////// 
  11 // ============================================================================ 
  13 // ============================================================================ 
  15 // ---------------------------------------------------------------------------- 
  17 // ---------------------------------------------------------------------------- 
  19 // for compilers that support precompilation, includes "wx.h". 
  20 #include "wx/wxprec.h" 
  26 #include "wx/strvararg.h" 
  27 #include "wx/string.h" 
  29 #include "wx/private/wxprintf.h" 
  31 // ============================================================================ 
  33 // ============================================================================ 
  35 // ---------------------------------------------------------------------------- 
  37 // ---------------------------------------------------------------------------- 
  39 const wxStringCharType 
*wxArgNormalizerNative
<const wxString
&>::get() const 
  41     return m_value
.wx_str(); 
  44 const wxStringCharType 
*wxArgNormalizerNative
<const wxCStrData
&>::get() const 
  46     return m_value
.AsInternal(); 
  49 #if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY 
  50 wxArgNormalizerWchar
<const wxString
&>::wxArgNormalizerWchar( 
  52                             const wxFormatString 
*fmt
, unsigned index
) 
  53     : wxArgNormalizerWithBuffer
<wchar_t>(s
.wc_str(), fmt
, index
) 
  57 wxArgNormalizerWchar
<const wxCStrData
&>::wxArgNormalizerWchar( 
  59                             const wxFormatString 
*fmt
, unsigned index
) 
  60     : wxArgNormalizerWithBuffer
<wchar_t>(s
.AsWCharBuf(), fmt
, index
) 
  63 #endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY 
  65 // ---------------------------------------------------------------------------- 
  66 // wxArgNormalizedString 
  67 // ---------------------------------------------------------------------------- 
  69 wxString 
wxArgNormalizedString::GetString() const 
  74 #if wxUSE_UTF8_LOCALE_ONLY 
  75     return wxString(reinterpret_cast<const char*>(m_ptr
)); 
  77     #if wxUSE_UNICODE_UTF8 
  79             return wxString(reinterpret_cast<const char*>(m_ptr
)); 
  82         return wxString(reinterpret_cast<const wxChar
*>(m_ptr
)); 
  83 #endif // !wxUSE_UTF8_LOCALE_ONLY 
  86 wxArgNormalizedString::operator wxString() const 
  91 // ---------------------------------------------------------------------------- 
  92 // wxFormatConverter: class doing the "%s" and "%c" normalization 
  93 // ---------------------------------------------------------------------------- 
  96    There are four problems with wxPrintf() etc. format strings: 
  98    1) The printf vararg macros convert all forms of strings into 
  99       wxStringCharType* representation. This may make the format string 
 100       incorrect: for example, if %ls was used together with a wchar_t* 
 101       variadic argument, this would no longer work, because the templates 
 102       would change wchar_t* argument to wxStringCharType* and %ls would now 
 103       be incorrect in e.g. UTF-8 build. We need make sure only one specifier 
 106    2) To complicate matters further, the meaning of %s and %c is different 
 107       under Windows and on Unix. The Windows/MS convention is as follows: 
 111        format specifier         results in 
 112        ----------------------------------- 
 114        %ls, %S, %lS             wchar_t* 
 118        format specifier         results in 
 119        ----------------------------------- 
 121        %s, %ls, %lS             wchar_t* 
 123        (While on POSIX systems we have %C identical to %lc and %c always means 
 124        char (in any mode) while %lc always means wchar_t.) 
 126       In other words, we should _only_ use %s on Windows and %ls on Unix for 
 127       wxUSE_UNICODE_WCHAR build. 
 129    3) To make things even worse, we need two forms in UTF-8 build: one for 
 130       passing strings to ANSI functions under UTF-8 locales (this one should 
 131       use %s) and one for widechar functions used under non-UTF-8 locales 
 132       (this one should use %ls). 
 134    And, of course, the same should be done for %c as well. 
 137    wxScanf() family of functions is simpler, because we don't normalize their 
 138    variadic arguments and we only have to handle 2) above and only for widechar 
 143 class wxFormatConverterBase
 
 148     wxFormatConverterBase() 
 155     wxCharTypeBuffer
<CharType
> Convert(const CharType 
*format
) 
 157         // this is reset to NULL if we modify the format string 
 162             if ( CopyFmtChar(*format
++) == _T('%') ) 
 165                 while ( IsFlagChar(*format
) ) 
 166                     CopyFmtChar(*format
++); 
 168                 // and possible width 
 169                 if ( *format 
== _T('*') ) 
 170                     CopyFmtChar(*format
++); 
 175                 if ( *format 
== _T('.') ) 
 177                     CopyFmtChar(*format
++); 
 178                     if ( *format 
== _T('*') ) 
 179                         CopyFmtChar(*format
++); 
 184                 // next we can have a size modifier 
 195                         // "ll" has a different meaning! 
 196                         if ( format
[1] != 'l' ) 
 208                 CharType outConv 
= *format
; 
 209                 SizeModifier outSize 
= size
; 
 211                 // and finally we should have the type 
 216                         // all strings were converted into the same form by 
 217                         // wxArgNormalizer<T>, this form depends on the context 
 218                         // in which the value is used (scanf/printf/wprintf): 
 219                         HandleString(*format
, size
, outConv
, outSize
); 
 224                         HandleChar(*format
, size
, outConv
, outSize
); 
 228                         // nothing special to do 
 232                 if ( outConv 
== *format 
&& outSize 
== size 
) // no change 
 234                     if ( size 
!= Size_Default 
) 
 235                         CopyFmtChar(*(format 
- 1)); 
 236                     CopyFmtChar(*format
); 
 238                 else // something changed 
 243                             InsertFmtChar(_T('l')); 
 247                             InsertFmtChar(_T('h')); 
 254                     InsertFmtChar(outConv
); 
 261         // notice that we only translated the string if m_fmtOrig == NULL (as 
 262         // set by CopyAllBefore()), otherwise we should simply use the original 
 266             return wxCharTypeBuffer
<CharType
>::CreateNonOwned(m_fmtOrig
); 
 270             // NULL-terminate converted format string: 
 276     virtual ~wxFormatConverterBase() {} 
 286     // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's' 
 287     // respectively), 'size' is the preceding size modifier; the new values of 
 288     // conversion and size specifiers must be written to outConv and outSize 
 289     virtual void HandleString(CharType conv
, SizeModifier size
, 
 290                               CharType
& outConv
, SizeModifier
& outSize
) = 0; 
 292     // ditto for %C or %c 
 293     virtual void HandleChar(CharType conv
, SizeModifier size
, 
 294                             CharType
& outConv
, SizeModifier
& outSize
) = 0; 
 297     // copy another character to the translated format: this function does the 
 298     // copy if we are translating but doesn't do anything at all if we don't, 
 299     // so we don't create the translated format string at all unless we really 
 300     // need to (i.e. InsertFmtChar() is called) 
 301     CharType 
CopyFmtChar(CharType ch
) 
 305             // we're translating, do copy 
 310             // simply increase the count which should be copied by 
 311             // CopyAllBefore() later if needed 
 318     // insert an extra character 
 319     void InsertFmtChar(CharType ch
) 
 323             // so far we haven't translated anything yet 
 332         wxASSERT_MSG( m_fmtOrig 
&& m_fmt
.data() == NULL
, "logic error" ); 
 334         // the modified format string is guaranteed to be no longer than 
 335         // 3/2 of the original (worst case: the entire format string consists 
 336         // of "%s" repeated and is expanded to "%ls" on Unix), so we can 
 337         // allocate the buffer now and not worry about running out of space if 
 338         // we over-allocate a bit: 
 339         size_t fmtLen 
= wxStrlen(m_fmtOrig
); 
 340         // worst case is of even length, so there's no rounding error in *3/2: 
 341         m_fmt
.extend(fmtLen 
* 3 / 2); 
 344             wxStrncpy(m_fmt
.data(), m_fmtOrig
, m_nCopied
); 
 345         m_fmtLast 
= m_fmt
.data() + m_nCopied
; 
 347         // we won't need it any longer and resetting it also indicates that we 
 348         // modified the format 
 352     static bool IsFlagChar(CharType ch
) 
 354         return ch 
== _T('-') || ch 
== _T('+') || 
 355                ch 
== _T('0') || ch 
== _T(' ') || ch 
== _T('#'); 
 358     void SkipDigits(const CharType 
**ptpc
) 
 360         while ( **ptpc 
>= _T('0') && **ptpc 
<= _T('9') ) 
 361             CopyFmtChar(*(*ptpc
)++); 
 364     // the translated format 
 365     wxCharTypeBuffer
<CharType
> m_fmt
; 
 368     // the original format 
 369     const CharType 
*m_fmtOrig
; 
 371     // the number of characters already copied (i.e. already parsed, but left 
 380 // on Windows, we should use %s and %c regardless of the build: 
 381 class wxPrintfFormatConverterWchar 
: public wxFormatConverterBase
<wchar_t> 
 383     virtual void HandleString(CharType 
WXUNUSED(conv
), 
 384                               SizeModifier 
WXUNUSED(size
), 
 385                               CharType
& outConv
, SizeModifier
& outSize
) 
 388         outSize 
= Size_Default
; 
 391     virtual void HandleChar(CharType 
WXUNUSED(conv
), 
 392                             SizeModifier 
WXUNUSED(size
), 
 393                             CharType
& outConv
, SizeModifier
& outSize
) 
 396         outSize 
= Size_Default
; 
 400 #else // !__WINDOWS__ 
 402 // on Unix, it's %s for ANSI functions and %ls for widechar: 
 404 #if !wxUSE_UTF8_LOCALE_ONLY 
 405 class wxPrintfFormatConverterWchar 
: public wxFormatConverterBase
<wchar_t> 
 407     virtual void HandleString(CharType 
WXUNUSED(conv
), 
 408                               SizeModifier 
WXUNUSED(size
), 
 409                               CharType
& outConv
, SizeModifier
& outSize
) 
 415     virtual void HandleChar(CharType 
WXUNUSED(conv
), 
 416                             SizeModifier 
WXUNUSED(size
), 
 417                             CharType
& outConv
, SizeModifier
& outSize
) 
 423 #endif // !wxUSE_UTF8_LOCALE_ONLY 
 425 #if wxUSE_UNICODE_UTF8 
 426 class wxPrintfFormatConverterUtf8 
: public wxFormatConverterBase
<char> 
 428     virtual void HandleString(CharType 
WXUNUSED(conv
), 
 429                               SizeModifier 
WXUNUSED(size
), 
 430                               CharType
& outConv
, SizeModifier
& outSize
) 
 433         outSize 
= Size_Default
; 
 436     virtual void HandleChar(CharType 
WXUNUSED(conv
), 
 437                             SizeModifier 
WXUNUSED(size
), 
 438                             CharType
& outConv
, SizeModifier
& outSize
) 
 440         // chars are represented using wchar_t in both builds, so this is 
 446 #endif // wxUSE_UNICODE_UTF8 
 448 #endif // __WINDOWS__/!__WINDOWS__ 
 450 #if !wxUSE_UNICODE // FIXME-UTF8: remove 
 451 class wxPrintfFormatConverterANSI 
: public wxFormatConverterBase
<char> 
 453     virtual void HandleString(CharType 
WXUNUSED(conv
), 
 454                               SizeModifier 
WXUNUSED(size
), 
 455                               CharType
& outConv
, SizeModifier
& outSize
) 
 458         outSize 
= Size_Default
; 
 461     virtual void HandleChar(CharType 
WXUNUSED(conv
), 
 462                             SizeModifier 
WXUNUSED(size
), 
 463                             CharType
& outConv
, SizeModifier
& outSize
) 
 466         outSize 
= Size_Default
; 
 474    wxScanf() format translation is different, we need to translate %s to %ls 
 475    and %c to %lc on Unix (but not Windows and for widechar functions only!). 
 477    So to use native functions in order to get our semantics we must do the 
 478    following translations in Unicode mode: 
 480    wxWidgets specifier      POSIX specifier 
 481    ---------------------------------------- 
 487 class wxScanfFormatConverterWchar 
: public wxFormatConverterBase
<wchar_t> 
 489     virtual void HandleString(CharType conv
, SizeModifier size
, 
 490                               CharType
& outConv
, SizeModifier
& outSize
) 
 493         outSize 
= GetOutSize(conv 
== 'S', size
); 
 496     virtual void HandleChar(CharType conv
, SizeModifier size
, 
 497                             CharType
& outConv
, SizeModifier
& outSize
) 
 500         outSize 
= GetOutSize(conv 
== 'C', size
); 
 503     SizeModifier 
GetOutSize(bool convIsUpper
, SizeModifier size
) 
 505         // %S and %hS -> %s and %lS -> %ls 
 508             if ( size 
== Size_Long 
) 
 515             if ( size 
== Size_Default 
) 
 523 const wxWCharBuffer 
wxScanfConvertFormatW(const wchar_t *format
) 
 525     return wxScanfFormatConverterWchar().Convert(format
); 
 527 #endif // !__WINDOWS__ 
 530 // ---------------------------------------------------------------------------- 
 532 // ---------------------------------------------------------------------------- 
 534 #if !wxUSE_UNICODE_WCHAR 
 535 const char* wxFormatString::InputAsChar() 
 538         return m_char
.data(); 
 540     // in ANSI build, wx_str() returns char*, in UTF-8 build, this function 
 541     // is only called under UTF-8 locales, so we should return UTF-8 string, 
 542     // which is, again, what wx_str() returns: 
 544         return m_str
->wx_str(); 
 548         return m_cstr
->AsInternal(); 
 550     // the last case is that wide string was passed in: in that case, we need 
 554     m_char 
= wxConvLibc
.cWC2MB(m_wchar
.data()); 
 556     return m_char
.data(); 
 559 const char* wxFormatString::AsChar() 
 561     if ( !m_convertedChar 
) 
 562 #if !wxUSE_UNICODE // FIXME-UTF8: remove this 
 563         m_convertedChar 
= wxPrintfFormatConverterANSI().Convert(InputAsChar()); 
 565         m_convertedChar 
= wxPrintfFormatConverterUtf8().Convert(InputAsChar()); 
 568     return m_convertedChar
.data(); 
 570 #endif // !wxUSE_UNICODE_WCHAR 
 572 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY 
 573 const wchar_t* wxFormatString::InputAsWChar() 
 576         return m_wchar
.data(); 
 578 #if wxUSE_UNICODE_WCHAR 
 580         return m_str
->wc_str(); 
 582         return m_cstr
->AsInternal(); 
 583 #else // wxUSE_UNICODE_UTF8 
 586         m_wchar 
= m_str
->wc_str(); 
 587         return m_wchar
.data(); 
 591         m_wchar 
= m_cstr
->AsWCharBuf(); 
 592         return m_wchar
.data(); 
 594 #endif // wxUSE_UNICODE_WCHAR/UTF8 
 596     // the last case is that narrow string was passed in: in that case, we need 
 600     m_wchar 
= wxConvLibc
.cMB2WC(m_char
.data()); 
 602     return m_wchar
.data(); 
 605 const wchar_t* wxFormatString::AsWChar() 
 607     if ( !m_convertedWChar 
) 
 608         m_convertedWChar 
= wxPrintfFormatConverterWchar().Convert(InputAsWChar()); 
 610     return m_convertedWChar
.data(); 
 612 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY 
 614 // ---------------------------------------------------------------------------- 
 615 // wxFormatString::GetArgumentType() 
 616 // ---------------------------------------------------------------------------- 
 621 template<typename CharType
> 
 622 wxFormatString::ArgumentType 
DoGetArgumentType(const CharType 
*format
, 
 625     wxCHECK_MSG( format
, wxFormatString::Arg_Other
, 
 626                  "empty format string not allowed here" ); 
 628     wxPrintfConvSpecParser
<CharType
> parser(format
); 
 630     wxCHECK_MSG( parser
.pspec
[n
-1] != NULL
, wxFormatString::Arg_Other
, 
 631                  "requested argument not found - invalid format string?" ); 
 633     switch ( parser
.pspec
[n
-1]->m_type 
) 
 637             return wxFormatString::Arg_Char
; 
 640             return wxFormatString::Arg_Other
; 
 644 } // anonymous namespace 
 646 wxFormatString::ArgumentType 
wxFormatString::GetArgumentType(unsigned n
) const 
 649         return DoGetArgumentType(m_char
.data(), n
); 
 651         return DoGetArgumentType(m_wchar
.data(), n
); 
 653         return DoGetArgumentType(m_str
->wx_str(), n
); 
 655         return DoGetArgumentType(m_cstr
->AsInternal(), n
); 
 657     wxFAIL_MSG( "unreachable code" );