src/common/strvararg.cpp

   1 ///////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strvararg.cpp
   3 // Purpose:     macros for implementing type-safe vararg passing of strings
   4 // Author:      Vaclav Slavik
   5 // Created:     2007-02-19
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2007 REA Elektronik GmbH
   8 // Licence:     wxWindows licence
   9 ///////////////////////////////////////////////////////////////////////////////
  10
  11 // ============================================================================
  12 // declarations
  13 // ============================================================================
  14
  15 // ----------------------------------------------------------------------------
  16 // headers
  17 // ----------------------------------------------------------------------------
  18
  19 // for compilers that support precompilation, includes "wx.h".
  20 #include "wx/wxprec.h"
  21
  22 #ifdef __BORLANDC__
  23     #pragma hdrstop
  24 #endif
  25
  26 #include "wx/strvararg.h"
  27 #include "wx/string.h"
  28 #include "wx/crt.h"
  29 #include "wx/private/wxprintf.h"
  30
  31 // ============================================================================
  32 // implementation
  33 // ============================================================================
  34
  35 // ----------------------------------------------------------------------------
  36 // wxArgNormalizer<>
  37 // ----------------------------------------------------------------------------
  38
  39 const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const
  40 {
  41     return m_value.wx_str();
  42 }
  43
  44 const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
  45 {
  46     return m_value.AsInternal();
  47 }
  48
  49 #if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
  50 wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(
  51                             const wxString& s,
  52                             const wxFormatString *fmt, unsigned index)
  53     : wxArgNormalizerWithBuffer<wchar_t>(s.wc_str(), fmt, index)
  54 {
  55 }
  56
  57 wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(
  58                             const wxCStrData& s,
  59                             const wxFormatString *fmt, unsigned index)
  60     : wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf(), fmt, index)
  61 {
  62 }
  63 #endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
  64
  65 // ----------------------------------------------------------------------------
  66 // wxArgNormalizedString
  67 // ----------------------------------------------------------------------------
  68
  69 wxString wxArgNormalizedString::GetString() const
  70 {
  71     if ( !IsValid() )
  72         return wxEmptyString;
  73
  74 #if wxUSE_UTF8_LOCALE_ONLY
  75     return wxString(reinterpret_cast<const char*>(m_ptr));
  76 #else
  77     #if wxUSE_UNICODE_UTF8
  78         if ( wxLocaleIsUtf8 )
  79             return wxString(reinterpret_cast<const char*>(m_ptr));
  80         else
  81     #endif
  82         return wxString(reinterpret_cast<const wxChar*>(m_ptr));
  83 #endif // !wxUSE_UTF8_LOCALE_ONLY
  84 }
  85
  86 wxArgNormalizedString::operator wxString() const
  87 {
  88     return GetString();
  89 }
  90
  91 // ----------------------------------------------------------------------------
  92 // wxFormatConverter: class doing the "%s" and "%c" normalization
  93 // ----------------------------------------------------------------------------
  94
  95 /*
  96    There are four problems with wxPrintf() etc. format strings:
  97
  98    1) The printf vararg macros convert all forms of strings into
  99       wxStringCharType* representation. This may make the format string
 100       incorrect: for example, if %ls was used together with a wchar_t*
 101       variadic argument, this would no longer work, because the templates
 102       would change wchar_t* argument to wxStringCharType* and %ls would now
 103       be incorrect in e.g. UTF-8 build. We need make sure only one specifier
 104       form is used.
 105
 106    2) To complicate matters further, the meaning of %s and %c is different
 107       under Windows and on Unix. The Windows/MS convention is as follows:
 108
 109        In ANSI mode:
 110
 111        format specifier         results in
 112        -----------------------------------
 113        %s, %hs, %hS             char*
 114        %ls, %S, %lS             wchar_t*
 115
 116        In Unicode mode:
 117
 118        format specifier         results in
 119        -----------------------------------
 120        %hs, %S, %hS             char*
 121        %s, %ls, %lS             wchar_t*
 122
 123        (While on POSIX systems we have %C identical to %lc and %c always means
 124        char (in any mode) while %lc always means wchar_t.)
 125
 126       In other words, we should _only_ use %s on Windows and %ls on Unix for
 127       wxUSE_UNICODE_WCHAR build.
 128
 129    3) To make things even worse, we need two forms in UTF-8 build: one for
 130       passing strings to ANSI functions under UTF-8 locales (this one should
 131       use %s) and one for widechar functions used under non-UTF-8 locales
 132       (this one should use %ls).
 133
 134    And, of course, the same should be done for %c as well.
 135
 136
 137    wxScanf() family of functions is simpler, because we don't normalize their
 138    variadic arguments and we only have to handle 2) above and only for widechar
 139    versions.
 140 */
 141
 142 template<typename T>
 143 class wxFormatConverterBase
 144 {
 145 public:
 146     typedef T CharType;
 147
 148     wxFormatConverterBase()
 149     {
 150         m_fmtOrig = NULL;
 151         m_fmtLast = NULL;
 152         m_nCopied = 0;
 153     }
 154
 155     wxScopedCharTypeBuffer<CharType> Convert(const CharType *format)
 156     {
 157         // this is reset to NULL if we modify the format string
 158         m_fmtOrig = format;
 159
 160         while ( *format )
 161         {
 162             if ( CopyFmtChar(*format++) == wxT('%') )
 163             {
 164 #if wxUSE_PRINTF_POS_PARAMS
 165                 if ( *format >= '0' && *format <= '9' )
 166                 {
 167                     SkipDigits(&format);
 168                     if ( *format == '$' )
 169                     {
 170                         // It was a positional argument specification.
 171                         CopyFmtChar(*format++);
 172                     }
 173                     //else: it was a width specification, nothing else to do.
 174                 }
 175 #endif // wxUSE_PRINTF_POS_PARAMS
 176
 177                 // skip any flags
 178                 while ( IsFlagChar(*format) )
 179                     CopyFmtChar(*format++);
 180
 181                 // and possible width
 182                 if ( *format == wxT('*') )
 183                     CopyFmtChar(*format++);
 184                 else
 185                     SkipDigits(&format);
 186
 187                 // precision?
 188                 if ( *format == wxT('.') )
 189                 {
 190                     CopyFmtChar(*format++);
 191                     if ( *format == wxT('*') )
 192                         CopyFmtChar(*format++);
 193                     else
 194                         SkipDigits(&format);
 195                 }
 196
 197                 // next we can have a size modifier
 198                 SizeModifier size;
 199
 200                 switch ( *format )
 201                 {
 202                     case 'h':
 203                         size = Size_Short;
 204                         format++;
 205                         break;
 206
 207                     case 'l':
 208                         // "ll" has a different meaning!
 209                         if ( format[1] != 'l' )
 210                         {
 211                             size = Size_Long;
 212                             format++;
 213                             break;
 214                         }
 215                         //else: fall through
 216
 217                     default:
 218                         size = Size_Default;
 219                 }
 220
 221                 CharType outConv = *format;
 222                 SizeModifier outSize = size;
 223
 224                 // and finally we should have the type
 225                 switch ( *format )
 226                 {
 227                     case wxT('S'):
 228                     case wxT('s'):
 229                         // all strings were converted into the same form by
 230                         // wxArgNormalizer<T>, this form depends on the context
 231                         // in which the value is used (scanf/printf/wprintf):
 232                         HandleString(*format, size, outConv, outSize);
 233                         break;
 234
 235                     case wxT('C'):
 236                     case wxT('c'):
 237                         HandleChar(*format, size, outConv, outSize);
 238                         break;
 239
 240                     default:
 241                         // nothing special to do
 242                         break;
 243                 }
 244
 245                 if ( outConv == *format && outSize == size ) // no change
 246                 {
 247                     if ( size != Size_Default )
 248                         CopyFmtChar(*(format - 1));
 249                     CopyFmtChar(*format);
 250                 }
 251                 else // something changed
 252                 {
 253                     switch ( outSize )
 254                     {
 255                         case Size_Long:
 256                             InsertFmtChar(wxT('l'));
 257                             break;
 258
 259                         case Size_Short:
 260                             InsertFmtChar(wxT('h'));
 261                             break;
 262
 263                         case Size_Default:
 264                             // nothing to do
 265                             break;
 266                     }
 267                     InsertFmtChar(outConv);
 268                 }
 269
 270                 format++;
 271             }
 272         }
 273
 274         // notice that we only translated the string if m_fmtOrig == NULL (as
 275         // set by CopyAllBefore()), otherwise we should simply use the original
 276         // format
 277         if ( m_fmtOrig )
 278         {
 279             return wxScopedCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig);
 280         }
 281         else
 282         {
 283             // shrink converted format string to actual size (instead of
 284             // over-sized allocation from CopyAllBefore()) and NUL-terminate
 285             // it:
 286             m_fmt.shrink(m_fmtLast - m_fmt.data());
 287             return m_fmt;
 288         }
 289     }
 290
 291     virtual ~wxFormatConverterBase() {}
 292
 293 protected:
 294     enum SizeModifier
 295     {
 296         Size_Default,
 297         Size_Short,
 298         Size_Long
 299     };
 300
 301     // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
 302     // respectively), 'size' is the preceding size modifier; the new values of
 303     // conversion and size specifiers must be written to outConv and outSize
 304     virtual void HandleString(CharType conv, SizeModifier size,
 305                               CharType& outConv, SizeModifier& outSize) = 0;
 306
 307     // ditto for %C or %c
 308     virtual void HandleChar(CharType conv, SizeModifier size,
 309                             CharType& outConv, SizeModifier& outSize) = 0;
 310
 311 private:
 312     // copy another character to the translated format: this function does the
 313     // copy if we are translating but doesn't do anything at all if we don't,
 314     // so we don't create the translated format string at all unless we really
 315     // need to (i.e. InsertFmtChar() is called)
 316     CharType CopyFmtChar(CharType ch)
 317     {
 318         if ( !m_fmtOrig )
 319         {
 320             // we're translating, do copy
 321             *(m_fmtLast++) = ch;
 322         }
 323         else
 324         {
 325             // simply increase the count which should be copied by
 326             // CopyAllBefore() later if needed
 327             m_nCopied++;
 328         }
 329
 330         return ch;
 331     }
 332
 333     // insert an extra character
 334     void InsertFmtChar(CharType ch)
 335     {
 336         if ( m_fmtOrig )
 337         {
 338             // so far we haven't translated anything yet
 339             CopyAllBefore();
 340         }
 341
 342         *(m_fmtLast++) = ch;
 343     }
 344
 345     void CopyAllBefore()
 346     {
 347         wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" );
 348
 349         // the modified format string is guaranteed to be no longer than
 350         // 3/2 of the original (worst case: the entire format string consists
 351         // of "%s" repeated and is expanded to "%ls" on Unix), so we can
 352         // allocate the buffer now and not worry about running out of space if
 353         // we over-allocate a bit:
 354         size_t fmtLen = wxStrlen(m_fmtOrig);
 355         // worst case is of even length, so there's no rounding error in *3/2:
 356         m_fmt.extend(fmtLen * 3 / 2);
 357
 358         if ( m_nCopied > 0 )
 359             wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied);
 360         m_fmtLast = m_fmt.data() + m_nCopied;
 361
 362         // we won't need it any longer and resetting it also indicates that we
 363         // modified the format
 364         m_fmtOrig = NULL;
 365     }
 366
 367     static bool IsFlagChar(CharType ch)
 368     {
 369         return ch == wxT('-') || ch == wxT('+') ||
 370                ch == wxT('0') || ch == wxT(' ') || ch == wxT('#');
 371     }
 372
 373     void SkipDigits(const CharType **ptpc)
 374     {
 375         while ( **ptpc >= wxT('0') && **ptpc <= wxT('9') )
 376             CopyFmtChar(*(*ptpc)++);
 377     }
 378
 379     // the translated format
 380     wxCharTypeBuffer<CharType> m_fmt;
 381     CharType *m_fmtLast;
 382
 383     // the original format
 384     const CharType *m_fmtOrig;
 385
 386     // the number of characters already copied (i.e. already parsed, but left
 387     // unmodified)
 388     size_t m_nCopied;
 389 };
 390
 391 #if defined(__WINDOWS__) && !defined(__CYGWIN__)
 392
 393 // on Windows, we should use %s and %c regardless of the build:
 394 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 395 {
 396     virtual void HandleString(CharType WXUNUSED(conv),
 397                               SizeModifier WXUNUSED(size),
 398                               CharType& outConv, SizeModifier& outSize)
 399     {
 400         outConv = 's';
 401         outSize = Size_Default;
 402     }
 403
 404     virtual void HandleChar(CharType WXUNUSED(conv),
 405                             SizeModifier WXUNUSED(size),
 406                             CharType& outConv, SizeModifier& outSize)
 407     {
 408         outConv = 'c';
 409         outSize = Size_Default;
 410     }
 411 };
 412
 413 #else // !__WINDOWS__
 414
 415 // on Unix, it's %s for ANSI functions and %ls for widechar:
 416
 417 #if !wxUSE_UTF8_LOCALE_ONLY
 418 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 419 {
 420     virtual void HandleString(CharType WXUNUSED(conv),
 421                               SizeModifier WXUNUSED(size),
 422                               CharType& outConv, SizeModifier& outSize)
 423     {
 424         outConv = 's';
 425         outSize = Size_Long;
 426     }
 427
 428     virtual void HandleChar(CharType WXUNUSED(conv),
 429                             SizeModifier WXUNUSED(size),
 430                             CharType& outConv, SizeModifier& outSize)
 431     {
 432         outConv = 'c';
 433         outSize = Size_Long;
 434     }
 435 };
 436 #endif // !wxUSE_UTF8_LOCALE_ONLY
 437
 438 #if wxUSE_UNICODE_UTF8
 439 class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char>
 440 {
 441     virtual void HandleString(CharType WXUNUSED(conv),
 442                               SizeModifier WXUNUSED(size),
 443                               CharType& outConv, SizeModifier& outSize)
 444     {
 445         outConv = 's';
 446         outSize = Size_Default;
 447     }
 448
 449     virtual void HandleChar(CharType WXUNUSED(conv),
 450                             SizeModifier WXUNUSED(size),
 451                             CharType& outConv, SizeModifier& outSize)
 452     {
 453         // chars are represented using wchar_t in both builds, so this is
 454         // the same as above
 455         outConv = 'c';
 456         outSize = Size_Long;
 457     }
 458 };
 459 #endif // wxUSE_UNICODE_UTF8
 460
 461 #endif // __WINDOWS__/!__WINDOWS__
 462
 463 #if !wxUSE_UNICODE // FIXME-UTF8: remove
 464 class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char>
 465 {
 466     virtual void HandleString(CharType WXUNUSED(conv),
 467                               SizeModifier WXUNUSED(size),
 468                               CharType& outConv, SizeModifier& outSize)
 469     {
 470         outConv = 's';
 471         outSize = Size_Default;
 472     }
 473
 474     virtual void HandleChar(CharType WXUNUSED(conv),
 475                             SizeModifier WXUNUSED(size),
 476                             CharType& outConv, SizeModifier& outSize)
 477     {
 478         outConv = 'c';
 479         outSize = Size_Default;
 480     }
 481 };
 482 #endif // ANSI
 483
 484 #ifndef __WINDOWS__
 485 /*
 486
 487    wxScanf() format translation is different, we need to translate %s to %ls
 488    and %c to %lc on Unix (but not Windows and for widechar functions only!).
 489
 490    So to use native functions in order to get our semantics we must do the
 491    following translations in Unicode mode:
 492
 493    wxWidgets specifier      POSIX specifier
 494    ----------------------------------------
 495
 496    %hc, %C, %hC             %c
 497    %c                       %lc
 498
 499  */
 500 class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 501 {
 502     virtual void HandleString(CharType conv, SizeModifier size,
 503                               CharType& outConv, SizeModifier& outSize)
 504     {
 505         outConv = 's';
 506         outSize = GetOutSize(conv == 'S', size);
 507     }
 508
 509     virtual void HandleChar(CharType conv, SizeModifier size,
 510                             CharType& outConv, SizeModifier& outSize)
 511     {
 512         outConv = 'c';
 513         outSize = GetOutSize(conv == 'C', size);
 514     }
 515
 516     SizeModifier GetOutSize(bool convIsUpper, SizeModifier size)
 517     {
 518         // %S and %hS -> %s and %lS -> %ls
 519         if ( convIsUpper )
 520         {
 521             if ( size == Size_Long )
 522                 return Size_Long;
 523             else
 524                 return Size_Default;
 525         }
 526         else // %s or %c
 527         {
 528             if ( size == Size_Default )
 529                 return Size_Long;
 530             else
 531                 return size;
 532         }
 533     }
 534 };
 535
 536 const wxScopedWCharBuffer wxScanfConvertFormatW(const wchar_t *format)
 537 {
 538     return wxScanfFormatConverterWchar().Convert(format);
 539 }
 540 #endif // !__WINDOWS__
 541
 542
 543 // ----------------------------------------------------------------------------
 544 // wxFormatString
 545 // ----------------------------------------------------------------------------
 546
 547 #if !wxUSE_UNICODE_WCHAR
 548 const char* wxFormatString::InputAsChar()
 549 {
 550     if ( m_char )
 551         return m_char.data();
 552
 553     // in ANSI build, wx_str() returns char*, in UTF-8 build, this function
 554     // is only called under UTF-8 locales, so we should return UTF-8 string,
 555     // which is, again, what wx_str() returns:
 556     if ( m_str )
 557         return m_str->wx_str();
 558
 559     // ditto wxCStrData:
 560     if ( m_cstr )
 561         return m_cstr->AsInternal();
 562
 563     // the last case is that wide string was passed in: in that case, we need
 564     // to convert it:
 565     wxASSERT( m_wchar );
 566
 567     m_char = wxConvLibc.cWC2MB(m_wchar.data());
 568
 569     return m_char.data();
 570 }
 571
 572 const char* wxFormatString::AsChar()
 573 {
 574     if ( !m_convertedChar )
 575 #if !wxUSE_UNICODE // FIXME-UTF8: remove this
 576         m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar());
 577 #else
 578         m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar());
 579 #endif
 580
 581     return m_convertedChar.data();
 582 }
 583 #endif // !wxUSE_UNICODE_WCHAR
 584
 585 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 586 const wchar_t* wxFormatString::InputAsWChar()
 587 {
 588     if ( m_wchar )
 589         return m_wchar.data();
 590
 591 #if wxUSE_UNICODE_WCHAR
 592     if ( m_str )
 593         return m_str->wc_str();
 594     if ( m_cstr )
 595         return m_cstr->AsInternal();
 596 #else // wxUSE_UNICODE_UTF8
 597     if ( m_str )
 598     {
 599         m_wchar = m_str->wc_str();
 600         return m_wchar.data();
 601     }
 602     if ( m_cstr )
 603     {
 604         m_wchar = m_cstr->AsWCharBuf();
 605         return m_wchar.data();
 606     }
 607 #endif // wxUSE_UNICODE_WCHAR/UTF8
 608
 609     // the last case is that narrow string was passed in: in that case, we need
 610     // to convert it:
 611     wxASSERT( m_char );
 612
 613     m_wchar = wxConvLibc.cMB2WC(m_char.data());
 614
 615     return m_wchar.data();
 616 }
 617
 618 const wchar_t* wxFormatString::AsWChar()
 619 {
 620     if ( !m_convertedWChar )
 621         m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar());
 622
 623     return m_convertedWChar.data();
 624 }
 625 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 626
 627 wxString wxFormatString::InputAsString() const
 628 {
 629     if ( m_str )
 630         return *m_str;
 631     if ( m_cstr )
 632         return m_cstr->AsString();
 633     if ( m_wchar )
 634         return wxString(m_wchar);
 635     if ( m_char )
 636         return wxString(m_char);
 637
 638     wxFAIL_MSG( "invalid wxFormatString - not initialized?" );
 639     return wxString();
 640 }
 641
 642 // ----------------------------------------------------------------------------
 643 // wxFormatString::GetArgumentType()
 644 // ----------------------------------------------------------------------------
 645
 646 namespace
 647 {
 648
 649 template<typename CharType>
 650 wxFormatString::ArgumentType DoGetArgumentType(const CharType *format,
 651                                                unsigned n)
 652 {
 653     wxCHECK_MSG( format, wxFormatString::Arg_Unknown,
 654                  "empty format string not allowed here" );
 655
 656     wxPrintfConvSpecParser<CharType> parser(format);
 657
 658     wxCHECK_MSG( n <= parser.nargs, wxFormatString::Arg_Unknown,
 659                  "more arguments than format string specifiers?" );
 660
 661     wxCHECK_MSG( parser.pspec[n-1] != NULL, wxFormatString::Arg_Unknown,
 662                  "requested argument not found - invalid format string?" );
 663
 664     switch ( parser.pspec[n-1]->m_type )
 665     {
 666         case wxPAT_CHAR:
 667         case wxPAT_WCHAR:
 668             return wxFormatString::Arg_Char;
 669
 670         case wxPAT_PCHAR:
 671         case wxPAT_PWCHAR:
 672             return wxFormatString::Arg_String;
 673
 674         case wxPAT_INT:
 675             return wxFormatString::Arg_Int;
 676         case wxPAT_LONGINT:
 677             return wxFormatString::Arg_LongInt;
 678 #ifdef wxLongLong_t
 679         case wxPAT_LONGLONGINT:
 680             return wxFormatString::Arg_LongLongInt;
 681 #endif
 682         case wxPAT_SIZET:
 683             return wxFormatString::Arg_Size_t;
 684
 685         case wxPAT_DOUBLE:
 686             return wxFormatString::Arg_Double;
 687         case wxPAT_LONGDOUBLE:
 688             return wxFormatString::Arg_LongDouble;
 689
 690         case wxPAT_POINTER:
 691             return wxFormatString::Arg_Pointer;
 692
 693         case wxPAT_NINT:
 694             return wxFormatString::Arg_IntPtr;
 695         case wxPAT_NSHORTINT:
 696             return wxFormatString::Arg_ShortIntPtr;
 697         case wxPAT_NLONGINT:
 698             return wxFormatString::Arg_LongIntPtr;
 699
 700         case wxPAT_STAR:
 701             // "*" requires argument of type int
 702             return wxFormatString::Arg_Int;
 703
 704         case wxPAT_INVALID:
 705             // (handled after the switch statement)
 706             break;
 707     }
 708
 709     // silence warning
 710     wxFAIL_MSG( "unexpected argument type" );
 711     return wxFormatString::Arg_Unknown;
 712 }
 713
 714 } // anonymous namespace
 715
 716 wxFormatString::ArgumentType wxFormatString::GetArgumentType(unsigned n) const
 717 {
 718     if ( m_char )
 719         return DoGetArgumentType(m_char.data(), n);
 720     else if ( m_wchar )
 721         return DoGetArgumentType(m_wchar.data(), n);
 722     else if ( m_str )
 723         return DoGetArgumentType(m_str->wx_str(), n);
 724     else if ( m_cstr )
 725         return DoGetArgumentType(m_cstr->AsInternal(), n);
 726
 727     wxFAIL_MSG( "unreachable code" );
 728     return Arg_Unknown;
 729 }