src/common/strvararg.cpp

   1 ///////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strvararg.cpp
   3 // Purpose:     macros for implementing type-safe vararg passing of strings
   4 // Author:      Vaclav Slavik
   5 // Created:     2007-02-19
   6 // Copyright:   (c) 2007 REA Elektronik GmbH
   7 // Licence:     wxWindows licence
   8 ///////////////////////////////////////////////////////////////////////////////
   9
  10 // ============================================================================
  11 // declarations
  12 // ============================================================================
  13
  14 // ----------------------------------------------------------------------------
  15 // headers
  16 // ----------------------------------------------------------------------------
  17
  18 // for compilers that support precompilation, includes "wx.h".
  19 #include "wx/wxprec.h"
  20
  21 #ifdef __BORLANDC__
  22     #pragma hdrstop
  23 #endif
  24
  25 #include "wx/strvararg.h"
  26 #include "wx/string.h"
  27 #include "wx/crt.h"
  28 #include "wx/private/wxprintf.h"
  29
  30 // ============================================================================
  31 // implementation
  32 // ============================================================================
  33
  34 // ----------------------------------------------------------------------------
  35 // wxArgNormalizer<>
  36 // ----------------------------------------------------------------------------
  37
  38 const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const
  39 {
  40     return m_value.wx_str();
  41 }
  42
  43 const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
  44 {
  45     return m_value.AsInternal();
  46 }
  47
  48 #if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
  49 wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(
  50                             const wxString& s,
  51                             const wxFormatString *fmt, unsigned index)
  52     : wxArgNormalizerWithBuffer<wchar_t>(s.wc_str(), fmt, index)
  53 {
  54 }
  55
  56 wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(
  57                             const wxCStrData& s,
  58                             const wxFormatString *fmt, unsigned index)
  59     : wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf(), fmt, index)
  60 {
  61 }
  62 #endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
  63
  64 // ----------------------------------------------------------------------------
  65 // wxArgNormalizedString
  66 // ----------------------------------------------------------------------------
  67
  68 wxString wxArgNormalizedString::GetString() const
  69 {
  70     if ( !IsValid() )
  71         return wxEmptyString;
  72
  73 #if wxUSE_UTF8_LOCALE_ONLY
  74     return wxString(reinterpret_cast<const char*>(m_ptr));
  75 #else
  76     #if wxUSE_UNICODE_UTF8
  77         if ( wxLocaleIsUtf8 )
  78             return wxString(reinterpret_cast<const char*>(m_ptr));
  79         else
  80     #endif
  81         return wxString(reinterpret_cast<const wxChar*>(m_ptr));
  82 #endif // !wxUSE_UTF8_LOCALE_ONLY
  83 }
  84
  85 wxArgNormalizedString::operator wxString() const
  86 {
  87     return GetString();
  88 }
  89
  90 // ----------------------------------------------------------------------------
  91 // wxFormatConverter: class doing the "%s" and "%c" normalization
  92 // ----------------------------------------------------------------------------
  93
  94 /*
  95    There are four problems with wxPrintf() etc. format strings:
  96
  97    1) The printf vararg macros convert all forms of strings into
  98       wxStringCharType* representation. This may make the format string
  99       incorrect: for example, if %ls was used together with a wchar_t*
 100       variadic argument, this would no longer work, because the templates
 101       would change wchar_t* argument to wxStringCharType* and %ls would now
 102       be incorrect in e.g. UTF-8 build. We need make sure only one specifier
 103       form is used.
 104
 105    2) To complicate matters further, the meaning of %s and %c is different
 106       under Windows and on Unix. The Windows/MS convention is as follows:
 107
 108        In ANSI mode:
 109
 110        format specifier         results in
 111        -----------------------------------
 112        %s, %hs, %hS             char*
 113        %ls, %S, %lS             wchar_t*
 114
 115        In Unicode mode:
 116
 117        format specifier         results in
 118        -----------------------------------
 119        %hs, %S, %hS             char*
 120        %s, %ls, %lS             wchar_t*
 121
 122        (While on POSIX systems we have %C identical to %lc and %c always means
 123        char (in any mode) while %lc always means wchar_t.)
 124
 125       In other words, we should _only_ use %s on Windows and %ls on Unix for
 126       wxUSE_UNICODE_WCHAR build.
 127
 128    3) To make things even worse, we need two forms in UTF-8 build: one for
 129       passing strings to ANSI functions under UTF-8 locales (this one should
 130       use %s) and one for widechar functions used under non-UTF-8 locales
 131       (this one should use %ls).
 132
 133    And, of course, the same should be done for %c as well.
 134
 135
 136    wxScanf() family of functions is simpler, because we don't normalize their
 137    variadic arguments and we only have to handle 2) above and only for widechar
 138    versions.
 139 */
 140
 141 template<typename T>
 142 class wxFormatConverterBase
 143 {
 144 public:
 145     typedef T CharType;
 146
 147     wxFormatConverterBase()
 148     {
 149         m_fmtOrig = NULL;
 150         m_fmtLast = NULL;
 151         m_nCopied = 0;
 152     }
 153
 154     wxScopedCharTypeBuffer<CharType> Convert(const CharType *format)
 155     {
 156         // this is reset to NULL if we modify the format string
 157         m_fmtOrig = format;
 158
 159         while ( *format )
 160         {
 161             if ( CopyFmtChar(*format++) == wxT('%') )
 162             {
 163 #if wxUSE_PRINTF_POS_PARAMS
 164                 if ( *format >= '0' && *format <= '9' )
 165                 {
 166                     SkipDigits(&format);
 167                     if ( *format == '$' )
 168                     {
 169                         // It was a positional argument specification.
 170                         CopyFmtChar(*format++);
 171                     }
 172                     //else: it was a width specification, nothing else to do.
 173                 }
 174 #endif // wxUSE_PRINTF_POS_PARAMS
 175
 176                 // skip any flags
 177                 while ( IsFlagChar(*format) )
 178                     CopyFmtChar(*format++);
 179
 180                 // and possible width
 181                 if ( *format == wxT('*') )
 182                     CopyFmtChar(*format++);
 183                 else
 184                     SkipDigits(&format);
 185
 186                 // precision?
 187                 if ( *format == wxT('.') )
 188                 {
 189                     CopyFmtChar(*format++);
 190                     if ( *format == wxT('*') )
 191                         CopyFmtChar(*format++);
 192                     else
 193                         SkipDigits(&format);
 194                 }
 195
 196                 // next we can have a size modifier
 197                 SizeModifier size;
 198
 199                 switch ( *format )
 200                 {
 201                     case 'h':
 202                         size = Size_Short;
 203                         format++;
 204                         break;
 205
 206                     case 'l':
 207                         // "ll" has a different meaning!
 208                         if ( format[1] != 'l' )
 209                         {
 210                             size = Size_Long;
 211                             format++;
 212                             break;
 213                         }
 214                         //else: fall through
 215
 216                     default:
 217                         size = Size_Default;
 218                 }
 219
 220                 CharType outConv = *format;
 221                 SizeModifier outSize = size;
 222
 223                 // and finally we should have the type
 224                 switch ( *format )
 225                 {
 226                     case wxT('S'):
 227                     case wxT('s'):
 228                         // all strings were converted into the same form by
 229                         // wxArgNormalizer<T>, this form depends on the context
 230                         // in which the value is used (scanf/printf/wprintf):
 231                         HandleString(*format, size, outConv, outSize);
 232                         break;
 233
 234                     case wxT('C'):
 235                     case wxT('c'):
 236                         HandleChar(*format, size, outConv, outSize);
 237                         break;
 238
 239                     default:
 240                         // nothing special to do
 241                         break;
 242                 }
 243
 244                 if ( outConv == *format && outSize == size ) // no change
 245                 {
 246                     if ( size != Size_Default )
 247                         CopyFmtChar(*(format - 1));
 248                     CopyFmtChar(*format);
 249                 }
 250                 else // something changed
 251                 {
 252                     switch ( outSize )
 253                     {
 254                         case Size_Long:
 255                             InsertFmtChar(wxT('l'));
 256                             break;
 257
 258                         case Size_Short:
 259                             InsertFmtChar(wxT('h'));
 260                             break;
 261
 262                         case Size_Default:
 263                             // nothing to do
 264                             break;
 265                     }
 266                     InsertFmtChar(outConv);
 267                 }
 268
 269                 format++;
 270             }
 271         }
 272
 273         // notice that we only translated the string if m_fmtOrig == NULL (as
 274         // set by CopyAllBefore()), otherwise we should simply use the original
 275         // format
 276         if ( m_fmtOrig )
 277         {
 278             return wxScopedCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig);
 279         }
 280         else
 281         {
 282             // shrink converted format string to actual size (instead of
 283             // over-sized allocation from CopyAllBefore()) and NUL-terminate
 284             // it:
 285             m_fmt.shrink(m_fmtLast - m_fmt.data());
 286             return m_fmt;
 287         }
 288     }
 289
 290     virtual ~wxFormatConverterBase() {}
 291
 292 protected:
 293     enum SizeModifier
 294     {
 295         Size_Default,
 296         Size_Short,
 297         Size_Long
 298     };
 299
 300     // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
 301     // respectively), 'size' is the preceding size modifier; the new values of
 302     // conversion and size specifiers must be written to outConv and outSize
 303     virtual void HandleString(CharType conv, SizeModifier size,
 304                               CharType& outConv, SizeModifier& outSize) = 0;
 305
 306     // ditto for %C or %c
 307     virtual void HandleChar(CharType conv, SizeModifier size,
 308                             CharType& outConv, SizeModifier& outSize) = 0;
 309
 310 private:
 311     // copy another character to the translated format: this function does the
 312     // copy if we are translating but doesn't do anything at all if we don't,
 313     // so we don't create the translated format string at all unless we really
 314     // need to (i.e. InsertFmtChar() is called)
 315     CharType CopyFmtChar(CharType ch)
 316     {
 317         if ( !m_fmtOrig )
 318         {
 319             // we're translating, do copy
 320             *(m_fmtLast++) = ch;
 321         }
 322         else
 323         {
 324             // simply increase the count which should be copied by
 325             // CopyAllBefore() later if needed
 326             m_nCopied++;
 327         }
 328
 329         return ch;
 330     }
 331
 332     // insert an extra character
 333     void InsertFmtChar(CharType ch)
 334     {
 335         if ( m_fmtOrig )
 336         {
 337             // so far we haven't translated anything yet
 338             CopyAllBefore();
 339         }
 340
 341         *(m_fmtLast++) = ch;
 342     }
 343
 344     void CopyAllBefore()
 345     {
 346         wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" );
 347
 348         // the modified format string is guaranteed to be no longer than
 349         // 3/2 of the original (worst case: the entire format string consists
 350         // of "%s" repeated and is expanded to "%ls" on Unix), so we can
 351         // allocate the buffer now and not worry about running out of space if
 352         // we over-allocate a bit:
 353         size_t fmtLen = wxStrlen(m_fmtOrig);
 354         // worst case is of even length, so there's no rounding error in *3/2:
 355         m_fmt.extend(fmtLen * 3 / 2);
 356
 357         if ( m_nCopied > 0 )
 358             wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied);
 359         m_fmtLast = m_fmt.data() + m_nCopied;
 360
 361         // we won't need it any longer and resetting it also indicates that we
 362         // modified the format
 363         m_fmtOrig = NULL;
 364     }
 365
 366     static bool IsFlagChar(CharType ch)
 367     {
 368         return ch == wxT('-') || ch == wxT('+') ||
 369                ch == wxT('0') || ch == wxT(' ') || ch == wxT('#');
 370     }
 371
 372     void SkipDigits(const CharType **ptpc)
 373     {
 374         while ( **ptpc >= wxT('0') && **ptpc <= wxT('9') )
 375             CopyFmtChar(*(*ptpc)++);
 376     }
 377
 378     // the translated format
 379     wxCharTypeBuffer<CharType> m_fmt;
 380     CharType *m_fmtLast;
 381
 382     // the original format
 383     const CharType *m_fmtOrig;
 384
 385     // the number of characters already copied (i.e. already parsed, but left
 386     // unmodified)
 387     size_t m_nCopied;
 388 };
 389
 390 #if defined(__WINDOWS__) && !defined(__CYGWIN__)
 391
 392 // on Windows, we should use %s and %c regardless of the build:
 393 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 394 {
 395     virtual void HandleString(CharType WXUNUSED(conv),
 396                               SizeModifier WXUNUSED(size),
 397                               CharType& outConv, SizeModifier& outSize)
 398     {
 399         outConv = 's';
 400         outSize = Size_Default;
 401     }
 402
 403     virtual void HandleChar(CharType WXUNUSED(conv),
 404                             SizeModifier WXUNUSED(size),
 405                             CharType& outConv, SizeModifier& outSize)
 406     {
 407         outConv = 'c';
 408         outSize = Size_Default;
 409     }
 410 };
 411
 412 #else // !__WINDOWS__
 413
 414 // on Unix, it's %s for ANSI functions and %ls for widechar:
 415
 416 #if !wxUSE_UTF8_LOCALE_ONLY
 417 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 418 {
 419     virtual void HandleString(CharType WXUNUSED(conv),
 420                               SizeModifier WXUNUSED(size),
 421                               CharType& outConv, SizeModifier& outSize)
 422     {
 423         outConv = 's';
 424         outSize = Size_Long;
 425     }
 426
 427     virtual void HandleChar(CharType WXUNUSED(conv),
 428                             SizeModifier WXUNUSED(size),
 429                             CharType& outConv, SizeModifier& outSize)
 430     {
 431         outConv = 'c';
 432         outSize = Size_Long;
 433     }
 434 };
 435 #endif // !wxUSE_UTF8_LOCALE_ONLY
 436
 437 #endif // __WINDOWS__/!__WINDOWS__
 438
 439 #if wxUSE_UNICODE_UTF8
 440 class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char>
 441 {
 442     virtual void HandleString(CharType WXUNUSED(conv),
 443                               SizeModifier WXUNUSED(size),
 444                               CharType& outConv, SizeModifier& outSize)
 445     {
 446         outConv = 's';
 447         outSize = Size_Default;
 448     }
 449
 450     virtual void HandleChar(CharType WXUNUSED(conv),
 451                             SizeModifier WXUNUSED(size),
 452                             CharType& outConv, SizeModifier& outSize)
 453     {
 454         // chars are represented using wchar_t in both builds, so this is
 455         // the same as above
 456         outConv = 'c';
 457         outSize = Size_Long;
 458     }
 459 };
 460 #endif // wxUSE_UNICODE_UTF8
 461
 462 #if !wxUSE_UNICODE // FIXME-UTF8: remove
 463 class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char>
 464 {
 465     virtual void HandleString(CharType WXUNUSED(conv),
 466                               SizeModifier WXUNUSED(size),
 467                               CharType& outConv, SizeModifier& outSize)
 468     {
 469         outConv = 's';
 470         outSize = Size_Default;
 471     }
 472
 473     virtual void HandleChar(CharType WXUNUSED(conv),
 474                             SizeModifier WXUNUSED(size),
 475                             CharType& outConv, SizeModifier& outSize)
 476     {
 477         outConv = 'c';
 478         outSize = Size_Default;
 479     }
 480 };
 481 #endif // ANSI
 482
 483 #ifndef __WINDOWS__
 484 /*
 485
 486    wxScanf() format translation is different, we need to translate %s to %ls
 487    and %c to %lc on Unix (but not Windows and for widechar functions only!).
 488
 489    So to use native functions in order to get our semantics we must do the
 490    following translations in Unicode mode:
 491
 492    wxWidgets specifier      POSIX specifier
 493    ----------------------------------------
 494
 495    %hc, %C, %hC             %c
 496    %c                       %lc
 497
 498  */
 499 class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 500 {
 501     virtual void HandleString(CharType conv, SizeModifier size,
 502                               CharType& outConv, SizeModifier& outSize)
 503     {
 504         outConv = 's';
 505         outSize = GetOutSize(conv == 'S', size);
 506     }
 507
 508     virtual void HandleChar(CharType conv, SizeModifier size,
 509                             CharType& outConv, SizeModifier& outSize)
 510     {
 511         outConv = 'c';
 512         outSize = GetOutSize(conv == 'C', size);
 513     }
 514
 515     SizeModifier GetOutSize(bool convIsUpper, SizeModifier size)
 516     {
 517         // %S and %hS -> %s and %lS -> %ls
 518         if ( convIsUpper )
 519         {
 520             if ( size == Size_Long )
 521                 return Size_Long;
 522             else
 523                 return Size_Default;
 524         }
 525         else // %s or %c
 526         {
 527             if ( size == Size_Default )
 528                 return Size_Long;
 529             else
 530                 return size;
 531         }
 532     }
 533 };
 534
 535 const wxScopedWCharBuffer wxScanfConvertFormatW(const wchar_t *format)
 536 {
 537     return wxScanfFormatConverterWchar().Convert(format);
 538 }
 539 #endif // !__WINDOWS__
 540
 541
 542 // ----------------------------------------------------------------------------
 543 // wxFormatString
 544 // ----------------------------------------------------------------------------
 545
 546 #if !wxUSE_UNICODE_WCHAR
 547 const char* wxFormatString::InputAsChar()
 548 {
 549     if ( m_char )
 550         return m_char.data();
 551
 552     // in ANSI build, wx_str() returns char*, in UTF-8 build, this function
 553     // is only called under UTF-8 locales, so we should return UTF-8 string,
 554     // which is, again, what wx_str() returns:
 555     if ( m_str )
 556         return m_str->wx_str();
 557
 558     // ditto wxCStrData:
 559     if ( m_cstr )
 560         return m_cstr->AsInternal();
 561
 562     // the last case is that wide string was passed in: in that case, we need
 563     // to convert it:
 564     wxASSERT( m_wchar );
 565
 566     m_char = wxConvLibc.cWC2MB(m_wchar.data());
 567
 568     return m_char.data();
 569 }
 570
 571 const char* wxFormatString::AsChar()
 572 {
 573     if ( !m_convertedChar )
 574 #if !wxUSE_UNICODE // FIXME-UTF8: remove this
 575         m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar());
 576 #else
 577         m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar());
 578 #endif
 579
 580     return m_convertedChar.data();
 581 }
 582 #endif // !wxUSE_UNICODE_WCHAR
 583
 584 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 585 const wchar_t* wxFormatString::InputAsWChar()
 586 {
 587     if ( m_wchar )
 588         return m_wchar.data();
 589
 590 #if wxUSE_UNICODE_WCHAR
 591     if ( m_str )
 592         return m_str->wc_str();
 593     if ( m_cstr )
 594         return m_cstr->AsInternal();
 595 #else // wxUSE_UNICODE_UTF8
 596     if ( m_str )
 597     {
 598         m_wchar = m_str->wc_str();
 599         return m_wchar.data();
 600     }
 601     if ( m_cstr )
 602     {
 603         m_wchar = m_cstr->AsWCharBuf();
 604         return m_wchar.data();
 605     }
 606 #endif // wxUSE_UNICODE_WCHAR/UTF8
 607
 608     // the last case is that narrow string was passed in: in that case, we need
 609     // to convert it:
 610     wxASSERT( m_char );
 611
 612     m_wchar = wxConvLibc.cMB2WC(m_char.data());
 613
 614     return m_wchar.data();
 615 }
 616
 617 const wchar_t* wxFormatString::AsWChar()
 618 {
 619     if ( !m_convertedWChar )
 620         m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar());
 621
 622     return m_convertedWChar.data();
 623 }
 624 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 625
 626 wxString wxFormatString::InputAsString() const
 627 {
 628     if ( m_str )
 629         return *m_str;
 630     if ( m_cstr )
 631         return m_cstr->AsString();
 632     if ( m_wchar )
 633         return wxString(m_wchar);
 634     if ( m_char )
 635         return wxString(m_char);
 636
 637     wxFAIL_MSG( "invalid wxFormatString - not initialized?" );
 638     return wxString();
 639 }
 640
 641 // ----------------------------------------------------------------------------
 642 // wxFormatString::GetArgumentType()
 643 // ----------------------------------------------------------------------------
 644
 645 namespace
 646 {
 647
 648 template<typename CharType>
 649 wxFormatString::ArgumentType DoGetArgumentType(const CharType *format,
 650                                                unsigned n)
 651 {
 652     wxCHECK_MSG( format, wxFormatString::Arg_Unknown,
 653                  "empty format string not allowed here" );
 654
 655     wxPrintfConvSpecParser<CharType> parser(format);
 656
 657     wxCHECK_MSG( n <= parser.nargs, wxFormatString::Arg_Unknown,
 658                  "more arguments than format string specifiers?" );
 659
 660     wxCHECK_MSG( parser.pspec[n-1] != NULL, wxFormatString::Arg_Unknown,
 661                  "requested argument not found - invalid format string?" );
 662
 663     switch ( parser.pspec[n-1]->m_type )
 664     {
 665         case wxPAT_CHAR:
 666         case wxPAT_WCHAR:
 667             return wxFormatString::Arg_Char;
 668
 669         case wxPAT_PCHAR:
 670         case wxPAT_PWCHAR:
 671             return wxFormatString::Arg_String;
 672
 673         case wxPAT_INT:
 674             return wxFormatString::Arg_Int;
 675         case wxPAT_LONGINT:
 676             return wxFormatString::Arg_LongInt;
 677 #ifdef wxLongLong_t
 678         case wxPAT_LONGLONGINT:
 679             return wxFormatString::Arg_LongLongInt;
 680 #endif
 681         case wxPAT_SIZET:
 682             return wxFormatString::Arg_Size_t;
 683
 684         case wxPAT_DOUBLE:
 685             return wxFormatString::Arg_Double;
 686         case wxPAT_LONGDOUBLE:
 687             return wxFormatString::Arg_LongDouble;
 688
 689         case wxPAT_POINTER:
 690             return wxFormatString::Arg_Pointer;
 691
 692         case wxPAT_NINT:
 693             return wxFormatString::Arg_IntPtr;
 694         case wxPAT_NSHORTINT:
 695             return wxFormatString::Arg_ShortIntPtr;
 696         case wxPAT_NLONGINT:
 697             return wxFormatString::Arg_LongIntPtr;
 698
 699         case wxPAT_STAR:
 700             // "*" requires argument of type int
 701             return wxFormatString::Arg_Int;
 702
 703         case wxPAT_INVALID:
 704             // (handled after the switch statement)
 705             break;
 706     }
 707
 708     // silence warning
 709     wxFAIL_MSG( "unexpected argument type" );
 710     return wxFormatString::Arg_Unknown;
 711 }
 712
 713 } // anonymous namespace
 714
 715 wxFormatString::ArgumentType wxFormatString::GetArgumentType(unsigned n) const
 716 {
 717     if ( m_char )
 718         return DoGetArgumentType(m_char.data(), n);
 719     else if ( m_wchar )
 720         return DoGetArgumentType(m_wchar.data(), n);
 721     else if ( m_str )
 722         return DoGetArgumentType(m_str->wx_str(), n);
 723     else if ( m_cstr )
 724         return DoGetArgumentType(m_cstr->AsInternal(), n);
 725
 726     wxFAIL_MSG( "unreachable code" );
 727     return Arg_Unknown;
 728 }