src/common/strvararg.cpp

   1 ///////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strvararg.cpp
   3 // Purpose:     macros for implementing type-safe vararg passing of strings
   4 // Author:      Vaclav Slavik
   5 // Created:     2007-02-19
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2007 REA Elektronik GmbH
   8 // Licence:     wxWindows licence
   9 ///////////////////////////////////////////////////////////////////////////////
  10
  11 // ============================================================================
  12 // declarations
  13 // ============================================================================
  14
  15 // ----------------------------------------------------------------------------
  16 // headers
  17 // ----------------------------------------------------------------------------
  18
  19 // for compilers that support precompilation, includes "wx.h".
  20 #include "wx/wxprec.h"
  21
  22 #ifdef __BORLANDC__
  23     #pragma hdrstop
  24 #endif
  25
  26 #include "wx/strvararg.h"
  27 #include "wx/string.h"
  28 #include "wx/crt.h"
  29 #include "wx/private/wxprintf.h"
  30
  31 // ============================================================================
  32 // implementation
  33 // ============================================================================
  34
  35 // ----------------------------------------------------------------------------
  36 // wxArgNormalizer<>
  37 // ----------------------------------------------------------------------------
  38
  39 const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const
  40 {
  41     return m_value.wx_str();
  42 }
  43
  44 const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
  45 {
  46     return m_value.AsInternal();
  47 }
  48
  49 #if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
  50 wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(
  51                             const wxString& s,
  52                             const wxFormatString *fmt, unsigned index)
  53     : wxArgNormalizerWithBuffer<wchar_t>(s.wc_str(), fmt, index)
  54 {
  55 }
  56
  57 wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(
  58                             const wxCStrData& s,
  59                             const wxFormatString *fmt, unsigned index)
  60     : wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf(), fmt, index)
  61 {
  62 }
  63 #endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
  64
  65 // ----------------------------------------------------------------------------
  66 // wxArgNormalizedString
  67 // ----------------------------------------------------------------------------
  68
  69 wxString wxArgNormalizedString::GetString() const
  70 {
  71     if ( !IsValid() )
  72         return wxEmptyString;
  73
  74 #if wxUSE_UTF8_LOCALE_ONLY
  75     return wxString(reinterpret_cast<const char*>(m_ptr));
  76 #else
  77     #if wxUSE_UNICODE_UTF8
  78         if ( wxLocaleIsUtf8 )
  79             return wxString(reinterpret_cast<const char*>(m_ptr));
  80         else
  81     #endif
  82         return wxString(reinterpret_cast<const wxChar*>(m_ptr));
  83 #endif // !wxUSE_UTF8_LOCALE_ONLY
  84 }
  85
  86 wxArgNormalizedString::operator wxString() const
  87 {
  88     return GetString();
  89 }
  90
  91 // ----------------------------------------------------------------------------
  92 // wxFormatConverter: class doing the "%s" and "%c" normalization
  93 // ----------------------------------------------------------------------------
  94
  95 /*
  96    There are four problems with wxPrintf() etc. format strings:
  97
  98    1) The printf vararg macros convert all forms of strings into
  99       wxStringCharType* representation. This may make the format string
 100       incorrect: for example, if %ls was used together with a wchar_t*
 101       variadic argument, this would no longer work, because the templates
 102       would change wchar_t* argument to wxStringCharType* and %ls would now
 103       be incorrect in e.g. UTF-8 build. We need make sure only one specifier
 104       form is used.
 105
 106    2) To complicate matters further, the meaning of %s and %c is different
 107       under Windows and on Unix. The Windows/MS convention is as follows:
 108
 109        In ANSI mode:
 110
 111        format specifier         results in
 112        -----------------------------------
 113        %s, %hs, %hS             char*
 114        %ls, %S, %lS             wchar_t*
 115
 116        In Unicode mode:
 117
 118        format specifier         results in
 119        -----------------------------------
 120        %hs, %S, %hS             char*
 121        %s, %ls, %lS             wchar_t*
 122
 123        (While on POSIX systems we have %C identical to %lc and %c always means
 124        char (in any mode) while %lc always means wchar_t.)
 125
 126       In other words, we should _only_ use %s on Windows and %ls on Unix for
 127       wxUSE_UNICODE_WCHAR build.
 128
 129    3) To make things even worse, we need two forms in UTF-8 build: one for
 130       passing strings to ANSI functions under UTF-8 locales (this one should
 131       use %s) and one for widechar functions used under non-UTF-8 locales
 132       (this one should use %ls).
 133
 134    And, of course, the same should be done for %c as well.
 135
 136
 137    wxScanf() family of functions is simpler, because we don't normalize their
 138    variadic arguments and we only have to handle 2) above and only for widechar
 139    versions.
 140 */
 141
 142 template<typename T>
 143 class wxFormatConverterBase
 144 {
 145 public:
 146     typedef T CharType;
 147
 148     wxFormatConverterBase()
 149     {
 150         m_fmtOrig = NULL;
 151         m_fmtLast = NULL;
 152         m_nCopied = 0;
 153     }
 154
 155     wxScopedCharTypeBuffer<CharType> Convert(const CharType *format)
 156     {
 157         // this is reset to NULL if we modify the format string
 158         m_fmtOrig = format;
 159
 160         while ( *format )
 161         {
 162             if ( CopyFmtChar(*format++) == wxT('%') )
 163             {
 164                 // skip any flags
 165                 while ( IsFlagChar(*format) )
 166                     CopyFmtChar(*format++);
 167
 168                 // and possible width
 169                 if ( *format == wxT('*') )
 170                     CopyFmtChar(*format++);
 171                 else
 172                     SkipDigits(&format);
 173
 174                 // precision?
 175                 if ( *format == wxT('.') )
 176                 {
 177                     CopyFmtChar(*format++);
 178                     if ( *format == wxT('*') )
 179                         CopyFmtChar(*format++);
 180                     else
 181                         SkipDigits(&format);
 182                 }
 183
 184                 // next we can have a size modifier
 185                 SizeModifier size;
 186
 187                 switch ( *format )
 188                 {
 189                     case 'h':
 190                         size = Size_Short;
 191                         format++;
 192                         break;
 193
 194                     case 'l':
 195                         // "ll" has a different meaning!
 196                         if ( format[1] != 'l' )
 197                         {
 198                             size = Size_Long;
 199                             format++;
 200                             break;
 201                         }
 202                         //else: fall through
 203
 204                     default:
 205                         size = Size_Default;
 206                 }
 207
 208                 CharType outConv = *format;
 209                 SizeModifier outSize = size;
 210
 211                 // and finally we should have the type
 212                 switch ( *format )
 213                 {
 214                     case wxT('S'):
 215                     case wxT('s'):
 216                         // all strings were converted into the same form by
 217                         // wxArgNormalizer<T>, this form depends on the context
 218                         // in which the value is used (scanf/printf/wprintf):
 219                         HandleString(*format, size, outConv, outSize);
 220                         break;
 221
 222                     case wxT('C'):
 223                     case wxT('c'):
 224                         HandleChar(*format, size, outConv, outSize);
 225                         break;
 226
 227                     default:
 228                         // nothing special to do
 229                         break;
 230                 }
 231
 232                 if ( outConv == *format && outSize == size ) // no change
 233                 {
 234                     if ( size != Size_Default )
 235                         CopyFmtChar(*(format - 1));
 236                     CopyFmtChar(*format);
 237                 }
 238                 else // something changed
 239                 {
 240                     switch ( outSize )
 241                     {
 242                         case Size_Long:
 243                             InsertFmtChar(wxT('l'));
 244                             break;
 245
 246                         case Size_Short:
 247                             InsertFmtChar(wxT('h'));
 248                             break;
 249
 250                         case Size_Default:
 251                             // nothing to do
 252                             break;
 253                     }
 254                     InsertFmtChar(outConv);
 255                 }
 256
 257                 format++;
 258             }
 259         }
 260
 261         // notice that we only translated the string if m_fmtOrig == NULL (as
 262         // set by CopyAllBefore()), otherwise we should simply use the original
 263         // format
 264         if ( m_fmtOrig )
 265         {
 266             return wxScopedCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig);
 267         }
 268         else
 269         {
 270             // shrink converted format string to actual size (instead of
 271             // over-sized allocation from CopyAllBefore()) and NUL-terminate
 272             // it:
 273             m_fmt.shrink(m_fmtLast - m_fmt.data());
 274             return m_fmt;
 275         }
 276     }
 277
 278     virtual ~wxFormatConverterBase() {}
 279
 280 protected:
 281     enum SizeModifier
 282     {
 283         Size_Default,
 284         Size_Short,
 285         Size_Long
 286     };
 287
 288     // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
 289     // respectively), 'size' is the preceding size modifier; the new values of
 290     // conversion and size specifiers must be written to outConv and outSize
 291     virtual void HandleString(CharType conv, SizeModifier size,
 292                               CharType& outConv, SizeModifier& outSize) = 0;
 293
 294     // ditto for %C or %c
 295     virtual void HandleChar(CharType conv, SizeModifier size,
 296                             CharType& outConv, SizeModifier& outSize) = 0;
 297
 298 private:
 299     // copy another character to the translated format: this function does the
 300     // copy if we are translating but doesn't do anything at all if we don't,
 301     // so we don't create the translated format string at all unless we really
 302     // need to (i.e. InsertFmtChar() is called)
 303     CharType CopyFmtChar(CharType ch)
 304     {
 305         if ( !m_fmtOrig )
 306         {
 307             // we're translating, do copy
 308             *(m_fmtLast++) = ch;
 309         }
 310         else
 311         {
 312             // simply increase the count which should be copied by
 313             // CopyAllBefore() later if needed
 314             m_nCopied++;
 315         }
 316
 317         return ch;
 318     }
 319
 320     // insert an extra character
 321     void InsertFmtChar(CharType ch)
 322     {
 323         if ( m_fmtOrig )
 324         {
 325             // so far we haven't translated anything yet
 326             CopyAllBefore();
 327         }
 328
 329         *(m_fmtLast++) = ch;
 330     }
 331
 332     void CopyAllBefore()
 333     {
 334         wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" );
 335
 336         // the modified format string is guaranteed to be no longer than
 337         // 3/2 of the original (worst case: the entire format string consists
 338         // of "%s" repeated and is expanded to "%ls" on Unix), so we can
 339         // allocate the buffer now and not worry about running out of space if
 340         // we over-allocate a bit:
 341         size_t fmtLen = wxStrlen(m_fmtOrig);
 342         // worst case is of even length, so there's no rounding error in *3/2:
 343         m_fmt.extend(fmtLen * 3 / 2);
 344
 345         if ( m_nCopied > 0 )
 346             wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied);
 347         m_fmtLast = m_fmt.data() + m_nCopied;
 348
 349         // we won't need it any longer and resetting it also indicates that we
 350         // modified the format
 351         m_fmtOrig = NULL;
 352     }
 353
 354     static bool IsFlagChar(CharType ch)
 355     {
 356         return ch == wxT('-') || ch == wxT('+') ||
 357                ch == wxT('0') || ch == wxT(' ') || ch == wxT('#');
 358     }
 359
 360     void SkipDigits(const CharType **ptpc)
 361     {
 362         while ( **ptpc >= wxT('0') && **ptpc <= wxT('9') )
 363             CopyFmtChar(*(*ptpc)++);
 364     }
 365
 366     // the translated format
 367     wxCharTypeBuffer<CharType> m_fmt;
 368     CharType *m_fmtLast;
 369
 370     // the original format
 371     const CharType *m_fmtOrig;
 372
 373     // the number of characters already copied (i.e. already parsed, but left
 374     // unmodified)
 375     size_t m_nCopied;
 376 };
 377
 378 #ifdef __WINDOWS__
 379
 380 // on Windows, we should use %s and %c regardless of the build:
 381 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 382 {
 383     virtual void HandleString(CharType WXUNUSED(conv),
 384                               SizeModifier WXUNUSED(size),
 385                               CharType& outConv, SizeModifier& outSize)
 386     {
 387         outConv = 's';
 388         outSize = Size_Default;
 389     }
 390
 391     virtual void HandleChar(CharType WXUNUSED(conv),
 392                             SizeModifier WXUNUSED(size),
 393                             CharType& outConv, SizeModifier& outSize)
 394     {
 395         outConv = 'c';
 396         outSize = Size_Default;
 397     }
 398 };
 399
 400 #else // !__WINDOWS__
 401
 402 // on Unix, it's %s for ANSI functions and %ls for widechar:
 403
 404 #if !wxUSE_UTF8_LOCALE_ONLY
 405 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 406 {
 407     virtual void HandleString(CharType WXUNUSED(conv),
 408                               SizeModifier WXUNUSED(size),
 409                               CharType& outConv, SizeModifier& outSize)
 410     {
 411         outConv = 's';
 412         outSize = Size_Long;
 413     }
 414
 415     virtual void HandleChar(CharType WXUNUSED(conv),
 416                             SizeModifier WXUNUSED(size),
 417                             CharType& outConv, SizeModifier& outSize)
 418     {
 419         outConv = 'c';
 420         outSize = Size_Long;
 421     }
 422 };
 423 #endif // !wxUSE_UTF8_LOCALE_ONLY
 424
 425 #if wxUSE_UNICODE_UTF8
 426 class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char>
 427 {
 428     virtual void HandleString(CharType WXUNUSED(conv),
 429                               SizeModifier WXUNUSED(size),
 430                               CharType& outConv, SizeModifier& outSize)
 431     {
 432         outConv = 's';
 433         outSize = Size_Default;
 434     }
 435
 436     virtual void HandleChar(CharType WXUNUSED(conv),
 437                             SizeModifier WXUNUSED(size),
 438                             CharType& outConv, SizeModifier& outSize)
 439     {
 440         // chars are represented using wchar_t in both builds, so this is
 441         // the same as above
 442         outConv = 'c';
 443         outSize = Size_Long;
 444     }
 445 };
 446 #endif // wxUSE_UNICODE_UTF8
 447
 448 #endif // __WINDOWS__/!__WINDOWS__
 449
 450 #if !wxUSE_UNICODE // FIXME-UTF8: remove
 451 class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char>
 452 {
 453     virtual void HandleString(CharType WXUNUSED(conv),
 454                               SizeModifier WXUNUSED(size),
 455                               CharType& outConv, SizeModifier& outSize)
 456     {
 457         outConv = 's';
 458         outSize = Size_Default;
 459     }
 460
 461     virtual void HandleChar(CharType WXUNUSED(conv),
 462                             SizeModifier WXUNUSED(size),
 463                             CharType& outConv, SizeModifier& outSize)
 464     {
 465         outConv = 'c';
 466         outSize = Size_Default;
 467     }
 468 };
 469 #endif // ANSI
 470
 471 #ifndef __WINDOWS__
 472 /*
 473
 474    wxScanf() format translation is different, we need to translate %s to %ls
 475    and %c to %lc on Unix (but not Windows and for widechar functions only!).
 476
 477    So to use native functions in order to get our semantics we must do the
 478    following translations in Unicode mode:
 479
 480    wxWidgets specifier      POSIX specifier
 481    ----------------------------------------
 482
 483    %hc, %C, %hC             %c
 484    %c                       %lc
 485
 486  */
 487 class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 488 {
 489     virtual void HandleString(CharType conv, SizeModifier size,
 490                               CharType& outConv, SizeModifier& outSize)
 491     {
 492         outConv = 's';
 493         outSize = GetOutSize(conv == 'S', size);
 494     }
 495
 496     virtual void HandleChar(CharType conv, SizeModifier size,
 497                             CharType& outConv, SizeModifier& outSize)
 498     {
 499         outConv = 'c';
 500         outSize = GetOutSize(conv == 'C', size);
 501     }
 502
 503     SizeModifier GetOutSize(bool convIsUpper, SizeModifier size)
 504     {
 505         // %S and %hS -> %s and %lS -> %ls
 506         if ( convIsUpper )
 507         {
 508             if ( size == Size_Long )
 509                 return Size_Long;
 510             else
 511                 return Size_Default;
 512         }
 513         else // %s or %c
 514         {
 515             if ( size == Size_Default )
 516                 return Size_Long;
 517             else
 518                 return size;
 519         }
 520     }
 521 };
 522
 523 const wxScopedWCharBuffer wxScanfConvertFormatW(const wchar_t *format)
 524 {
 525     return wxScanfFormatConverterWchar().Convert(format);
 526 }
 527 #endif // !__WINDOWS__
 528
 529
 530 // ----------------------------------------------------------------------------
 531 // wxFormatString
 532 // ----------------------------------------------------------------------------
 533
 534 #if !wxUSE_UNICODE_WCHAR
 535 const char* wxFormatString::InputAsChar()
 536 {
 537     if ( m_char )
 538         return m_char.data();
 539
 540     // in ANSI build, wx_str() returns char*, in UTF-8 build, this function
 541     // is only called under UTF-8 locales, so we should return UTF-8 string,
 542     // which is, again, what wx_str() returns:
 543     if ( m_str )
 544         return m_str->wx_str();
 545
 546     // ditto wxCStrData:
 547     if ( m_cstr )
 548         return m_cstr->AsInternal();
 549
 550     // the last case is that wide string was passed in: in that case, we need
 551     // to convert it:
 552     wxASSERT( m_wchar );
 553
 554     m_char = wxConvLibc.cWC2MB(m_wchar.data());
 555
 556     return m_char.data();
 557 }
 558
 559 const char* wxFormatString::AsChar()
 560 {
 561     if ( !m_convertedChar )
 562 #if !wxUSE_UNICODE // FIXME-UTF8: remove this
 563         m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar());
 564 #else
 565         m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar());
 566 #endif
 567
 568     return m_convertedChar.data();
 569 }
 570 #endif // !wxUSE_UNICODE_WCHAR
 571
 572 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 573 const wchar_t* wxFormatString::InputAsWChar()
 574 {
 575     if ( m_wchar )
 576         return m_wchar.data();
 577
 578 #if wxUSE_UNICODE_WCHAR
 579     if ( m_str )
 580         return m_str->wc_str();
 581     if ( m_cstr )
 582         return m_cstr->AsInternal();
 583 #else // wxUSE_UNICODE_UTF8
 584     if ( m_str )
 585     {
 586         m_wchar = m_str->wc_str();
 587         return m_wchar.data();
 588     }
 589     if ( m_cstr )
 590     {
 591         m_wchar = m_cstr->AsWCharBuf();
 592         return m_wchar.data();
 593     }
 594 #endif // wxUSE_UNICODE_WCHAR/UTF8
 595
 596     // the last case is that narrow string was passed in: in that case, we need
 597     // to convert it:
 598     wxASSERT( m_char );
 599
 600     m_wchar = wxConvLibc.cMB2WC(m_char.data());
 601
 602     return m_wchar.data();
 603 }
 604
 605 const wchar_t* wxFormatString::AsWChar()
 606 {
 607     if ( !m_convertedWChar )
 608         m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar());
 609
 610     return m_convertedWChar.data();
 611 }
 612 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 613
 614 wxString wxFormatString::InputAsString() const
 615 {
 616     if ( m_str )
 617         return *m_str;
 618     if ( m_cstr )
 619         return m_cstr->AsString();
 620     if ( m_wchar )
 621         return wxString(m_wchar);
 622     if ( m_char )
 623         return wxString(m_char);
 624
 625     wxFAIL_MSG( "invalid wxFormatString - not initialized?" );
 626     return wxString();
 627 }
 628
 629 // ----------------------------------------------------------------------------
 630 // wxFormatString::GetArgumentType()
 631 // ----------------------------------------------------------------------------
 632
 633 namespace
 634 {
 635
 636 template<typename CharType>
 637 wxFormatString::ArgumentType DoGetArgumentType(const CharType *format,
 638                                                unsigned n)
 639 {
 640     wxCHECK_MSG( format, wxFormatString::Arg_Other,
 641                  "empty format string not allowed here" );
 642
 643     wxPrintfConvSpecParser<CharType> parser(format);
 644
 645     wxCHECK_MSG( parser.pspec[n-1] != NULL, wxFormatString::Arg_Other,
 646                  "requested argument not found - invalid format string?" );
 647
 648     switch ( parser.pspec[n-1]->m_type )
 649     {
 650         case wxPAT_CHAR:
 651         case wxPAT_WCHAR:
 652             return wxFormatString::Arg_Char;
 653
 654         default:
 655             return wxFormatString::Arg_Other;
 656     }
 657 }
 658
 659 } // anonymous namespace
 660
 661 wxFormatString::ArgumentType wxFormatString::GetArgumentType(unsigned n) const
 662 {
 663     if ( m_char )
 664         return DoGetArgumentType(m_char.data(), n);
 665     else if ( m_wchar )
 666         return DoGetArgumentType(m_wchar.data(), n);
 667     else if ( m_str )
 668         return DoGetArgumentType(m_str->wx_str(), n);
 669     else if ( m_cstr )
 670         return DoGetArgumentType(m_cstr->AsInternal(), n);
 671
 672     wxFAIL_MSG( "unreachable code" );
 673     return Arg_Other;
 674 }