src/common/strvararg.cpp

   1 ///////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strvararg.cpp
   3 // Purpose:     macros for implementing type-safe vararg passing of strings
   4 // Author:      Vaclav Slavik
   5 // Created:     2007-02-19
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2007 REA Elektronik GmbH
   8 // Licence:     wxWindows licence
   9 ///////////////////////////////////////////////////////////////////////////////
  10
  11 // ============================================================================
  12 // declarations
  13 // ============================================================================
  14
  15 // ----------------------------------------------------------------------------
  16 // headers
  17 // ----------------------------------------------------------------------------
  18
  19 // for compilers that support precompilation, includes "wx.h".
  20 #include "wx/wxprec.h"
  21
  22 #ifdef __BORLANDC__
  23     #pragma hdrstop
  24 #endif
  25
  26 #include "wx/strvararg.h"
  27 #include "wx/string.h"
  28 #include "wx/crt.h"
  29 #include "wx/private/wxprintf.h"
  30
  31 // ============================================================================
  32 // implementation
  33 // ============================================================================
  34
  35 // ----------------------------------------------------------------------------
  36 // wxArgNormalizer<>
  37 // ----------------------------------------------------------------------------
  38
  39 const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const
  40 {
  41     return m_value.wx_str();
  42 }
  43
  44 const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
  45 {
  46     return m_value.AsInternal();
  47 }
  48
  49 #if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
  50 wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(
  51                             const wxString& s,
  52                             const wxFormatString *fmt, unsigned index)
  53     : wxArgNormalizerWithBuffer<wchar_t>(s.wc_str(), fmt, index)
  54 {
  55 }
  56
  57 wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(
  58                             const wxCStrData& s,
  59                             const wxFormatString *fmt, unsigned index)
  60     : wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf(), fmt, index)
  61 {
  62 }
  63 #endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
  64
  65 // ----------------------------------------------------------------------------
  66 // wxArgNormalizedString
  67 // ----------------------------------------------------------------------------
  68
  69 wxString wxArgNormalizedString::GetString() const
  70 {
  71     if ( !IsValid() )
  72         return wxEmptyString;
  73
  74 #if wxUSE_UTF8_LOCALE_ONLY
  75     return wxString(reinterpret_cast<const char*>(m_ptr));
  76 #else
  77     #if wxUSE_UNICODE_UTF8
  78         if ( wxLocaleIsUtf8 )
  79             return wxString(reinterpret_cast<const char*>(m_ptr));
  80         else
  81     #endif
  82         return wxString(reinterpret_cast<const wxChar*>(m_ptr));
  83 #endif // !wxUSE_UTF8_LOCALE_ONLY
  84 }
  85
  86 wxArgNormalizedString::operator wxString() const
  87 {
  88     return GetString();
  89 }
  90
  91 // ----------------------------------------------------------------------------
  92 // wxFormatConverter: class doing the "%s" and "%c" normalization
  93 // ----------------------------------------------------------------------------
  94
  95 /*
  96    There are four problems with wxPrintf() etc. format strings:
  97
  98    1) The printf vararg macros convert all forms of strings into
  99       wxStringCharType* representation. This may make the format string
 100       incorrect: for example, if %ls was used together with a wchar_t*
 101       variadic argument, this would no longer work, because the templates
 102       would change wchar_t* argument to wxStringCharType* and %ls would now
 103       be incorrect in e.g. UTF-8 build. We need make sure only one specifier
 104       form is used.
 105
 106    2) To complicate matters further, the meaning of %s and %c is different
 107       under Windows and on Unix. The Windows/MS convention is as follows:
 108
 109        In ANSI mode:
 110
 111        format specifier         results in
 112        -----------------------------------
 113        %s, %hs, %hS             char*
 114        %ls, %S, %lS             wchar_t*
 115
 116        In Unicode mode:
 117
 118        format specifier         results in
 119        -----------------------------------
 120        %hs, %S, %hS             char*
 121        %s, %ls, %lS             wchar_t*
 122
 123        (While on POSIX systems we have %C identical to %lc and %c always means
 124        char (in any mode) while %lc always means wchar_t.)
 125
 126       In other words, we should _only_ use %s on Windows and %ls on Unix for
 127       wxUSE_UNICODE_WCHAR build.
 128
 129    3) To make things even worse, we need two forms in UTF-8 build: one for
 130       passing strings to ANSI functions under UTF-8 locales (this one should
 131       use %s) and one for widechar functions used under non-UTF-8 locales
 132       (this one should use %ls).
 133
 134    And, of course, the same should be done for %c as well.
 135
 136
 137    wxScanf() family of functions is simpler, because we don't normalize their
 138    variadic arguments and we only have to handle 2) above and only for widechar
 139    versions.
 140 */
 141
 142 template<typename T>
 143 class wxFormatConverterBase
 144 {
 145 public:
 146     typedef T CharType;
 147
 148     wxFormatConverterBase()
 149     {
 150         m_fmtOrig = NULL;
 151         m_fmtLast = NULL;
 152         m_nCopied = 0;
 153     }
 154
 155     wxCharTypeBuffer<CharType> Convert(const CharType *format)
 156     {
 157         // this is reset to NULL if we modify the format string
 158         m_fmtOrig = format;
 159
 160         while ( *format )
 161         {
 162             if ( CopyFmtChar(*format++) == _T('%') )
 163             {
 164                 // skip any flags
 165                 while ( IsFlagChar(*format) )
 166                     CopyFmtChar(*format++);
 167
 168                 // and possible width
 169                 if ( *format == _T('*') )
 170                     CopyFmtChar(*format++);
 171                 else
 172                     SkipDigits(&format);
 173
 174                 // precision?
 175                 if ( *format == _T('.') )
 176                 {
 177                     CopyFmtChar(*format++);
 178                     if ( *format == _T('*') )
 179                         CopyFmtChar(*format++);
 180                     else
 181                         SkipDigits(&format);
 182                 }
 183
 184                 // next we can have a size modifier
 185                 SizeModifier size;
 186
 187                 switch ( *format )
 188                 {
 189                     case 'h':
 190                         size = Size_Short;
 191                         format++;
 192                         break;
 193
 194                     case 'l':
 195                         // "ll" has a different meaning!
 196                         if ( format[1] != 'l' )
 197                         {
 198                             size = Size_Long;
 199                             format++;
 200                             break;
 201                         }
 202                         //else: fall through
 203
 204                     default:
 205                         size = Size_Default;
 206                 }
 207
 208                 CharType outConv = *format;
 209                 SizeModifier outSize = size;
 210
 211                 // and finally we should have the type
 212                 switch ( *format )
 213                 {
 214                     case _T('S'):
 215                     case _T('s'):
 216                         // all strings were converted into the same form by
 217                         // wxArgNormalizer<T>, this form depends on the context
 218                         // in which the value is used (scanf/printf/wprintf):
 219                         HandleString(*format, size, outConv, outSize);
 220                         break;
 221
 222                     case _T('C'):
 223                     case _T('c'):
 224                         HandleChar(*format, size, outConv, outSize);
 225                         break;
 226
 227                     default:
 228                         // nothing special to do
 229                         break;
 230                 }
 231
 232                 if ( outConv == *format && outSize == size ) // no change
 233                 {
 234                     if ( size != Size_Default )
 235                         CopyFmtChar(*(format - 1));
 236                     CopyFmtChar(*format);
 237                 }
 238                 else // something changed
 239                 {
 240                     switch ( outSize )
 241                     {
 242                         case Size_Long:
 243                             InsertFmtChar(_T('l'));
 244                             break;
 245
 246                         case Size_Short:
 247                             InsertFmtChar(_T('h'));
 248                             break;
 249
 250                         case Size_Default:
 251                             // nothing to do
 252                             break;
 253                     }
 254                     InsertFmtChar(outConv);
 255                 }
 256
 257                 format++;
 258             }
 259         }
 260
 261         // notice that we only translated the string if m_fmtOrig == NULL (as
 262         // set by CopyAllBefore()), otherwise we should simply use the original
 263         // format
 264         if ( m_fmtOrig )
 265         {
 266             return wxCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig);
 267         }
 268         else
 269         {
 270             // NULL-terminate converted format string:
 271             *m_fmtLast = 0;
 272             return m_fmt;
 273         }
 274     }
 275
 276     virtual ~wxFormatConverterBase() {}
 277
 278 protected:
 279     enum SizeModifier
 280     {
 281         Size_Default,
 282         Size_Short,
 283         Size_Long
 284     };
 285
 286     // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
 287     // respectively), 'size' is the preceding size modifier; the new values of
 288     // conversion and size specifiers must be written to outConv and outSize
 289     virtual void HandleString(CharType conv, SizeModifier size,
 290                               CharType& outConv, SizeModifier& outSize) = 0;
 291
 292     // ditto for %C or %c
 293     virtual void HandleChar(CharType conv, SizeModifier size,
 294                             CharType& outConv, SizeModifier& outSize) = 0;
 295
 296 private:
 297     // copy another character to the translated format: this function does the
 298     // copy if we are translating but doesn't do anything at all if we don't,
 299     // so we don't create the translated format string at all unless we really
 300     // need to (i.e. InsertFmtChar() is called)
 301     CharType CopyFmtChar(CharType ch)
 302     {
 303         if ( !m_fmtOrig )
 304         {
 305             // we're translating, do copy
 306             *(m_fmtLast++) = ch;
 307         }
 308         else
 309         {
 310             // simply increase the count which should be copied by
 311             // CopyAllBefore() later if needed
 312             m_nCopied++;
 313         }
 314
 315         return ch;
 316     }
 317
 318     // insert an extra character
 319     void InsertFmtChar(CharType ch)
 320     {
 321         if ( m_fmtOrig )
 322         {
 323             // so far we haven't translated anything yet
 324             CopyAllBefore();
 325         }
 326
 327         *(m_fmtLast++) = ch;
 328     }
 329
 330     void CopyAllBefore()
 331     {
 332         wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" );
 333
 334         // the modified format string is guaranteed to be no longer than
 335         // 3/2 of the original (worst case: the entire format string consists
 336         // of "%s" repeated and is expanded to "%ls" on Unix), so we can
 337         // allocate the buffer now and not worry about running out of space if
 338         // we over-allocate a bit:
 339         size_t fmtLen = wxStrlen(m_fmtOrig);
 340         // worst case is of even length, so there's no rounding error in *3/2:
 341         m_fmt.extend(fmtLen * 3 / 2);
 342
 343         if ( m_nCopied > 0 )
 344             wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied);
 345         m_fmtLast = m_fmt.data() + m_nCopied;
 346
 347         // we won't need it any longer and resetting it also indicates that we
 348         // modified the format
 349         m_fmtOrig = NULL;
 350     }
 351
 352     static bool IsFlagChar(CharType ch)
 353     {
 354         return ch == _T('-') || ch == _T('+') ||
 355                ch == _T('0') || ch == _T(' ') || ch == _T('#');
 356     }
 357
 358     void SkipDigits(const CharType **ptpc)
 359     {
 360         while ( **ptpc >= _T('0') && **ptpc <= _T('9') )
 361             CopyFmtChar(*(*ptpc)++);
 362     }
 363
 364     // the translated format
 365     wxCharTypeBuffer<CharType> m_fmt;
 366     CharType *m_fmtLast;
 367
 368     // the original format
 369     const CharType *m_fmtOrig;
 370
 371     // the number of characters already copied (i.e. already parsed, but left
 372     // unmodified)
 373     size_t m_nCopied;
 374 };
 375
 376
 377
 378 #ifdef __WINDOWS
 379
 380 // on Windows, we should use %s and %c regardless of the build:
 381 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 382 {
 383     virtual void HandleString(CharType WXUNUSED(conv),
 384                               SizeModifier WXUNUSED(size),
 385                               CharType& outConv, SizeModifier& outSize)
 386     {
 387         outConv = 's';
 388         outSize = Size_Default;
 389     }
 390
 391     virtual void HandleChar(CharType WXUNUSED(conv),
 392                             SizeModifier WXUNUSED(size),
 393                             CharType& outConv, SizeModifier& outSize)
 394     {
 395         outConv = 'c';
 396         outSize = Size_Default;
 397     }
 398 };
 399
 400 #else // !__WINDOWS__
 401
 402 // on Unix, it's %s for ANSI functions and %ls for widechar:
 403
 404 #if !wxUSE_UTF8_LOCALE_ONLY
 405 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 406 {
 407     virtual void HandleString(CharType WXUNUSED(conv),
 408                               SizeModifier WXUNUSED(size),
 409                               CharType& outConv, SizeModifier& outSize)
 410     {
 411         outConv = 's';
 412         outSize = Size_Long;
 413     }
 414
 415     virtual void HandleChar(CharType WXUNUSED(conv),
 416                             SizeModifier WXUNUSED(size),
 417                             CharType& outConv, SizeModifier& outSize)
 418     {
 419         outConv = 'c';
 420         outSize = Size_Long;
 421     }
 422 };
 423 #endif // !wxUSE_UTF8_LOCALE_ONLY
 424
 425 #if wxUSE_UNICODE_UTF8
 426 class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char>
 427 {
 428     virtual void HandleString(CharType WXUNUSED(conv),
 429                               SizeModifier WXUNUSED(size),
 430                               CharType& outConv, SizeModifier& outSize)
 431     {
 432         outConv = 's';
 433         outSize = Size_Default;
 434     }
 435
 436     virtual void HandleChar(CharType WXUNUSED(conv),
 437                             SizeModifier WXUNUSED(size),
 438                             CharType& outConv, SizeModifier& outSize)
 439     {
 440         // chars are represented using wchar_t in both builds, so this is
 441         // the same as above
 442         outConv = 'c';
 443         outSize = Size_Long;
 444     }
 445 };
 446 #endif // wxUSE_UNICODE_UTF8
 447
 448 #endif // __WINDOWS__/!__WINDOWS__
 449
 450 #if !wxUSE_UNICODE // FIXME-UTF8: remove
 451 class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char>
 452 {
 453     virtual void HandleString(CharType WXUNUSED(conv),
 454                               SizeModifier WXUNUSED(size),
 455                               CharType& outConv, SizeModifier& outSize)
 456     {
 457         outConv = 's';
 458         outSize = Size_Default;
 459     }
 460
 461     virtual void HandleChar(CharType WXUNUSED(conv),
 462                             SizeModifier WXUNUSED(size),
 463                             CharType& outConv, SizeModifier& outSize)
 464     {
 465         outConv = 'c';
 466         outSize = Size_Default;
 467     }
 468 };
 469 #endif // ANSI
 470
 471 #ifndef __WINDOWS__
 472 /*
 473
 474    wxScanf() format translation is different, we need to translate %s to %ls
 475    and %c to %lc on Unix (but not Windows and for widechar functions only!).
 476
 477    So to use native functions in order to get our semantics we must do the
 478    following translations in Unicode mode:
 479
 480    wxWidgets specifier      POSIX specifier
 481    ----------------------------------------
 482
 483    %hc, %C, %hC             %c
 484    %c                       %lc
 485
 486  */
 487 class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 488 {
 489     virtual void HandleString(CharType conv, SizeModifier size,
 490                               CharType& outConv, SizeModifier& outSize)
 491     {
 492         outConv = 's';
 493         outSize = GetOutSize(conv == 'S', size);
 494     }
 495
 496     virtual void HandleChar(CharType conv, SizeModifier size,
 497                             CharType& outConv, SizeModifier& outSize)
 498     {
 499         outConv = 'c';
 500         outSize = GetOutSize(conv == 'C', size);
 501     }
 502
 503     SizeModifier GetOutSize(bool convIsUpper, SizeModifier size)
 504     {
 505         // %S and %hS -> %s and %lS -> %ls
 506         if ( convIsUpper )
 507         {
 508             if ( size == Size_Long )
 509                 return Size_Long;
 510             else
 511                 return Size_Default;
 512         }
 513         else // %s or %c
 514         {
 515             if ( size == Size_Default )
 516                 return Size_Long;
 517             else
 518                 return size;
 519         }
 520     }
 521 };
 522
 523 const wxWCharBuffer wxScanfConvertFormatW(const wchar_t *format)
 524 {
 525     return wxScanfFormatConverterWchar().Convert(format);
 526 }
 527 #endif // !__WINDOWS__
 528
 529
 530 // ----------------------------------------------------------------------------
 531 // wxFormatString
 532 // ----------------------------------------------------------------------------
 533
 534 #if !wxUSE_UNICODE_WCHAR
 535 const char* wxFormatString::InputAsChar()
 536 {
 537     if ( m_char )
 538         return m_char.data();
 539
 540     // in ANSI build, wx_str() returns char*, in UTF-8 build, this function
 541     // is only called under UTF-8 locales, so we should return UTF-8 string,
 542     // which is, again, what wx_str() returns:
 543     if ( m_str )
 544         return m_str->wx_str();
 545
 546     // ditto wxCStrData:
 547     if ( m_cstr )
 548         return m_cstr->AsInternal();
 549
 550     // the last case is that wide string was passed in: in that case, we need
 551     // to convert it:
 552     wxASSERT( m_wchar );
 553
 554     m_char = wxConvLibc.cWC2MB(m_wchar.data());
 555
 556     return m_char.data();
 557 }
 558
 559 const char* wxFormatString::AsChar()
 560 {
 561     if ( !m_convertedChar )
 562 #if !wxUSE_UNICODE // FIXME-UTF8: remove this
 563         m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar());
 564 #else
 565         m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar());
 566 #endif
 567
 568     return m_convertedChar.data();
 569 }
 570 #endif // !wxUSE_UNICODE_WCHAR
 571
 572 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 573 const wchar_t* wxFormatString::InputAsWChar()
 574 {
 575     if ( m_wchar )
 576         return m_wchar.data();
 577
 578 #if wxUSE_UNICODE_WCHAR
 579     if ( m_str )
 580         return m_str->wc_str();
 581     if ( m_cstr )
 582         return m_cstr->AsInternal();
 583 #else // wxUSE_UNICODE_UTF8
 584     if ( m_str )
 585     {
 586         m_wchar = m_str->wc_str();
 587         return m_wchar.data();
 588     }
 589     if ( m_cstr )
 590     {
 591         m_wchar = m_cstr->AsWCharBuf();
 592         return m_wchar.data();
 593     }
 594 #endif // wxUSE_UNICODE_WCHAR/UTF8
 595
 596     // the last case is that narrow string was passed in: in that case, we need
 597     // to convert it:
 598     wxASSERT( m_char );
 599
 600     m_wchar = wxConvLibc.cMB2WC(m_char.data());
 601
 602     return m_wchar.data();
 603 }
 604
 605 const wchar_t* wxFormatString::AsWChar()
 606 {
 607     if ( !m_convertedWChar )
 608         m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar());
 609
 610     return m_convertedWChar.data();
 611 }
 612 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 613
 614 wxString wxFormatString::InputAsString() const
 615 {
 616     if ( m_str )
 617         return *m_str;
 618     if ( m_cstr )
 619         return m_cstr->AsString();
 620     if ( m_wchar )
 621         return wxString(m_wchar);
 622     if ( m_char )
 623         return wxString(m_char);
 624
 625     wxFAIL_MSG( "invalid wxFormatString - not initialized?" );
 626     return wxString();
 627 }
 628
 629 // ----------------------------------------------------------------------------
 630 // wxFormatString::GetArgumentType()
 631 // ----------------------------------------------------------------------------
 632
 633 namespace
 634 {
 635
 636 template<typename CharType>
 637 wxFormatString::ArgumentType DoGetArgumentType(const CharType *format,
 638                                                unsigned n)
 639 {
 640     wxCHECK_MSG( format, wxFormatString::Arg_Other,
 641                  "empty format string not allowed here" );
 642
 643     wxPrintfConvSpecParser<CharType> parser(format);
 644
 645     wxCHECK_MSG( parser.pspec[n-1] != NULL, wxFormatString::Arg_Other,
 646                  "requested argument not found - invalid format string?" );
 647
 648     switch ( parser.pspec[n-1]->m_type )
 649     {
 650         case wxPAT_CHAR:
 651         case wxPAT_WCHAR:
 652             return wxFormatString::Arg_Char;
 653
 654         default:
 655             return wxFormatString::Arg_Other;
 656     }
 657 }
 658
 659 } // anonymous namespace
 660
 661 wxFormatString::ArgumentType wxFormatString::GetArgumentType(unsigned n) const
 662 {
 663     if ( m_char )
 664         return DoGetArgumentType(m_char.data(), n);
 665     else if ( m_wchar )
 666         return DoGetArgumentType(m_wchar.data(), n);
 667     else if ( m_str )
 668         return DoGetArgumentType(m_str->wx_str(), n);
 669     else if ( m_cstr )
 670         return DoGetArgumentType(m_cstr->AsInternal(), n);
 671
 672     wxFAIL_MSG( "unreachable code" );
 673     return Arg_Other;
 674 }