src/common/strvararg.cpp

   1 ///////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strvararg.cpp
   3 // Purpose:     macros for implementing type-safe vararg passing of strings
   4 // Author:      Vaclav Slavik
   5 // Created:     2007-02-19
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2007 REA Elektronik GmbH
   8 // Licence:     wxWindows licence
   9 ///////////////////////////////////////////////////////////////////////////////
  10
  11 // ============================================================================
  12 // declarations
  13 // ============================================================================
  14
  15 // ----------------------------------------------------------------------------
  16 // headers
  17 // ----------------------------------------------------------------------------
  18
  19 // for compilers that support precompilation, includes "wx.h".
  20 #include "wx/wxprec.h"
  21
  22 #ifdef __BORLANDC__
  23     #pragma hdrstop
  24 #endif
  25
  26 #include "wx/strvararg.h"
  27 #include "wx/string.h"
  28 #include "wx/crt.h"
  29 #include "wx/private/wxprintf.h"
  30
  31 // ============================================================================
  32 // implementation
  33 // ============================================================================
  34
  35 // ----------------------------------------------------------------------------
  36 // wxArgNormalizer<>
  37 // ----------------------------------------------------------------------------
  38
  39 const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const
  40 {
  41     return m_value.wx_str();
  42 }
  43
  44 const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
  45 {
  46     return m_value.AsInternal();
  47 }
  48
  49 #if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
  50 wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(
  51                             const wxString& s,
  52                             const wxFormatString *fmt, unsigned index)
  53     : wxArgNormalizerWithBuffer<wchar_t>(s.wc_str(), fmt, index)
  54 {
  55 }
  56
  57 wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(
  58                             const wxCStrData& s,
  59                             const wxFormatString *fmt, unsigned index)
  60     : wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf(), fmt, index)
  61 {
  62 }
  63 #endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
  64
  65 // ----------------------------------------------------------------------------
  66 // wxArgNormalizedString
  67 // ----------------------------------------------------------------------------
  68
  69 wxString wxArgNormalizedString::GetString() const
  70 {
  71     if ( !IsValid() )
  72         return wxEmptyString;
  73
  74 #if wxUSE_UTF8_LOCALE_ONLY
  75     return wxString(reinterpret_cast<const char*>(m_ptr));
  76 #else
  77     #if wxUSE_UNICODE_UTF8
  78         if ( wxLocaleIsUtf8 )
  79             return wxString(reinterpret_cast<const char*>(m_ptr));
  80         else
  81     #endif
  82         return wxString(reinterpret_cast<const wxChar*>(m_ptr));
  83 #endif // !wxUSE_UTF8_LOCALE_ONLY
  84 }
  85
  86 wxArgNormalizedString::operator wxString() const
  87 {
  88     return GetString();
  89 }
  90
  91 // ----------------------------------------------------------------------------
  92 // wxFormatConverter: class doing the "%s" and "%c" normalization
  93 // ----------------------------------------------------------------------------
  94
  95 /*
  96    There are four problems with wxPrintf() etc. format strings:
  97
  98    1) The printf vararg macros convert all forms of strings into
  99       wxStringCharType* representation. This may make the format string
 100       incorrect: for example, if %ls was used together with a wchar_t*
 101       variadic argument, this would no longer work, because the templates
 102       would change wchar_t* argument to wxStringCharType* and %ls would now
 103       be incorrect in e.g. UTF-8 build. We need make sure only one specifier
 104       form is used.
 105
 106    2) To complicate matters further, the meaning of %s and %c is different
 107       under Windows and on Unix. The Windows/MS convention is as follows:
 108
 109        In ANSI mode:
 110
 111        format specifier         results in
 112        -----------------------------------
 113        %s, %hs, %hS             char*
 114        %ls, %S, %lS             wchar_t*
 115
 116        In Unicode mode:
 117
 118        format specifier         results in
 119        -----------------------------------
 120        %hs, %S, %hS             char*
 121        %s, %ls, %lS             wchar_t*
 122
 123        (While on POSIX systems we have %C identical to %lc and %c always means
 124        char (in any mode) while %lc always means wchar_t.)
 125
 126       In other words, we should _only_ use %s on Windows and %ls on Unix for
 127       wxUSE_UNICODE_WCHAR build.
 128
 129    3) To make things even worse, we need two forms in UTF-8 build: one for
 130       passing strings to ANSI functions under UTF-8 locales (this one should
 131       use %s) and one for widechar functions used under non-UTF-8 locales
 132       (this one should use %ls).
 133
 134    And, of course, the same should be done for %c as well.
 135
 136
 137    wxScanf() family of functions is simpler, because we don't normalize their
 138    variadic arguments and we only have to handle 2) above and only for widechar
 139    versions.
 140 */
 141
 142 template<typename T>
 143 class wxFormatConverterBase
 144 {
 145 public:
 146     typedef T CharType;
 147
 148     wxFormatConverterBase()
 149     {
 150         m_fmtOrig = NULL;
 151         m_fmtLast = NULL;
 152         m_nCopied = 0;
 153     }
 154
 155     wxCharTypeBuffer<CharType> Convert(const CharType *format)
 156     {
 157         // this is reset to NULL if we modify the format string
 158         m_fmtOrig = format;
 159
 160         while ( *format )
 161         {
 162             if ( CopyFmtChar(*format++) == _T('%') )
 163             {
 164                 // skip any flags
 165                 while ( IsFlagChar(*format) )
 166                     CopyFmtChar(*format++);
 167
 168                 // and possible width
 169                 if ( *format == _T('*') )
 170                     CopyFmtChar(*format++);
 171                 else
 172                     SkipDigits(&format);
 173
 174                 // precision?
 175                 if ( *format == _T('.') )
 176                 {
 177                     CopyFmtChar(*format++);
 178                     if ( *format == _T('*') )
 179                         CopyFmtChar(*format++);
 180                     else
 181                         SkipDigits(&format);
 182                 }
 183
 184                 // next we can have a size modifier
 185                 SizeModifier size;
 186
 187                 switch ( *format )
 188                 {
 189                     case 'h':
 190                         size = Size_Short;
 191                         format++;
 192                         break;
 193
 194                     case 'l':
 195                         // "ll" has a different meaning!
 196                         if ( format[1] != 'l' )
 197                         {
 198                             size = Size_Long;
 199                             format++;
 200                             break;
 201                         }
 202                         //else: fall through
 203
 204                     default:
 205                         size = Size_Default;
 206                 }
 207
 208                 CharType outConv = *format;
 209                 SizeModifier outSize = size;
 210
 211                 // and finally we should have the type
 212                 switch ( *format )
 213                 {
 214                     case _T('S'):
 215                     case _T('s'):
 216                         // all strings were converted into the same form by
 217                         // wxArgNormalizer<T>, this form depends on the context
 218                         // in which the value is used (scanf/printf/wprintf):
 219                         HandleString(*format, size, outConv, outSize);
 220                         break;
 221
 222                     case _T('C'):
 223                     case _T('c'):
 224                         HandleChar(*format, size, outConv, outSize);
 225                         break;
 226
 227                     default:
 228                         // nothing special to do
 229                         break;
 230                 }
 231
 232                 if ( outConv == *format && outSize == size ) // no change
 233                 {
 234                     if ( size != Size_Default )
 235                         CopyFmtChar(*(format - 1));
 236                     CopyFmtChar(*format);
 237                 }
 238                 else // something changed
 239                 {
 240                     switch ( outSize )
 241                     {
 242                         case Size_Long:
 243                             InsertFmtChar(_T('l'));
 244                             break;
 245
 246                         case Size_Short:
 247                             InsertFmtChar(_T('h'));
 248                             break;
 249
 250                         case Size_Default:
 251                             // nothing to do
 252                             break;
 253                     }
 254                     InsertFmtChar(outConv);
 255                 }
 256
 257                 format++;
 258             }
 259         }
 260
 261         // notice that we only translated the string if m_fmtOrig == NULL (as
 262         // set by CopyAllBefore()), otherwise we should simply use the original
 263         // format
 264         if ( m_fmtOrig )
 265         {
 266             return wxCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig);
 267         }
 268         else
 269         {
 270             // NULL-terminate converted format string:
 271             *m_fmtLast = 0;
 272             return m_fmt;
 273         }
 274     }
 275
 276     virtual ~wxFormatConverterBase() {}
 277
 278 protected:
 279     enum SizeModifier
 280     {
 281         Size_Default,
 282         Size_Short,
 283         Size_Long
 284     };
 285
 286     // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
 287     // respectively), 'size' is the preceding size modifier; the new values of
 288     // conversion and size specifiers must be written to outConv and outSize
 289     virtual void HandleString(CharType conv, SizeModifier size,
 290                               CharType& outConv, SizeModifier& outSize) = 0;
 291
 292     // ditto for %C or %c
 293     virtual void HandleChar(CharType conv, SizeModifier size,
 294                             CharType& outConv, SizeModifier& outSize) = 0;
 295
 296 private:
 297     // copy another character to the translated format: this function does the
 298     // copy if we are translating but doesn't do anything at all if we don't,
 299     // so we don't create the translated format string at all unless we really
 300     // need to (i.e. InsertFmtChar() is called)
 301     CharType CopyFmtChar(CharType ch)
 302     {
 303         if ( !m_fmtOrig )
 304         {
 305             // we're translating, do copy
 306             *(m_fmtLast++) = ch;
 307         }
 308         else
 309         {
 310             // simply increase the count which should be copied by
 311             // CopyAllBefore() later if needed
 312             m_nCopied++;
 313         }
 314
 315         return ch;
 316     }
 317
 318     // insert an extra character
 319     void InsertFmtChar(CharType ch)
 320     {
 321         if ( m_fmtOrig )
 322         {
 323             // so far we haven't translated anything yet
 324             CopyAllBefore();
 325         }
 326
 327         *(m_fmtLast++) = ch;
 328     }
 329
 330     void CopyAllBefore()
 331     {
 332         wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" );
 333
 334         // the modified format string is guaranteed to be no longer than
 335         // 3/2 of the original (worst case: the entire format string consists
 336         // of "%s" repeated and is expanded to "%ls" on Unix), so we can
 337         // allocate the buffer now and not worry about running out of space if
 338         // we over-allocate a bit:
 339         size_t fmtLen = wxStrlen(m_fmtOrig);
 340         // worst case is of even length, so there's no rounding error in *3/2:
 341         m_fmt.extend(fmtLen * 3 / 2);
 342
 343         if ( m_nCopied > 0 )
 344             wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied);
 345         m_fmtLast = m_fmt.data() + m_nCopied;
 346
 347         // we won't need it any longer and resetting it also indicates that we
 348         // modified the format
 349         m_fmtOrig = NULL;
 350     }
 351
 352     static bool IsFlagChar(CharType ch)
 353     {
 354         return ch == _T('-') || ch == _T('+') ||
 355                ch == _T('0') || ch == _T(' ') || ch == _T('#');
 356     }
 357
 358     void SkipDigits(const CharType **ptpc)
 359     {
 360         while ( **ptpc >= _T('0') && **ptpc <= _T('9') )
 361             CopyFmtChar(*(*ptpc)++);
 362     }
 363
 364     // the translated format
 365     wxCharTypeBuffer<CharType> m_fmt;
 366     CharType *m_fmtLast;
 367
 368     // the original format
 369     const CharType *m_fmtOrig;
 370
 371     // the number of characters already copied (i.e. already parsed, but left
 372     // unmodified)
 373     size_t m_nCopied;
 374 };
 375
 376 #ifdef __WINDOWS__
 377
 378 // on Windows, we should use %s and %c regardless of the build:
 379 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 380 {
 381     virtual void HandleString(CharType WXUNUSED(conv),
 382                               SizeModifier WXUNUSED(size),
 383                               CharType& outConv, SizeModifier& outSize)
 384     {
 385         outConv = 's';
 386         outSize = Size_Default;
 387     }
 388
 389     virtual void HandleChar(CharType WXUNUSED(conv),
 390                             SizeModifier WXUNUSED(size),
 391                             CharType& outConv, SizeModifier& outSize)
 392     {
 393         outConv = 'c';
 394         outSize = Size_Default;
 395     }
 396 };
 397
 398 #else // !__WINDOWS__
 399
 400 // on Unix, it's %s for ANSI functions and %ls for widechar:
 401
 402 #if !wxUSE_UTF8_LOCALE_ONLY
 403 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 404 {
 405     virtual void HandleString(CharType WXUNUSED(conv),
 406                               SizeModifier WXUNUSED(size),
 407                               CharType& outConv, SizeModifier& outSize)
 408     {
 409         outConv = 's';
 410         outSize = Size_Long;
 411     }
 412
 413     virtual void HandleChar(CharType WXUNUSED(conv),
 414                             SizeModifier WXUNUSED(size),
 415                             CharType& outConv, SizeModifier& outSize)
 416     {
 417         outConv = 'c';
 418         outSize = Size_Long;
 419     }
 420 };
 421 #endif // !wxUSE_UTF8_LOCALE_ONLY
 422
 423 #if wxUSE_UNICODE_UTF8
 424 class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char>
 425 {
 426     virtual void HandleString(CharType WXUNUSED(conv),
 427                               SizeModifier WXUNUSED(size),
 428                               CharType& outConv, SizeModifier& outSize)
 429     {
 430         outConv = 's';
 431         outSize = Size_Default;
 432     }
 433
 434     virtual void HandleChar(CharType WXUNUSED(conv),
 435                             SizeModifier WXUNUSED(size),
 436                             CharType& outConv, SizeModifier& outSize)
 437     {
 438         // chars are represented using wchar_t in both builds, so this is
 439         // the same as above
 440         outConv = 'c';
 441         outSize = Size_Long;
 442     }
 443 };
 444 #endif // wxUSE_UNICODE_UTF8
 445
 446 #endif // __WINDOWS__/!__WINDOWS__
 447
 448 #if !wxUSE_UNICODE // FIXME-UTF8: remove
 449 class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char>
 450 {
 451     virtual void HandleString(CharType WXUNUSED(conv),
 452                               SizeModifier WXUNUSED(size),
 453                               CharType& outConv, SizeModifier& outSize)
 454     {
 455         outConv = 's';
 456         outSize = Size_Default;
 457     }
 458
 459     virtual void HandleChar(CharType WXUNUSED(conv),
 460                             SizeModifier WXUNUSED(size),
 461                             CharType& outConv, SizeModifier& outSize)
 462     {
 463         outConv = 'c';
 464         outSize = Size_Default;
 465     }
 466 };
 467 #endif // ANSI
 468
 469 #ifndef __WINDOWS__
 470 /*
 471
 472    wxScanf() format translation is different, we need to translate %s to %ls
 473    and %c to %lc on Unix (but not Windows and for widechar functions only!).
 474
 475    So to use native functions in order to get our semantics we must do the
 476    following translations in Unicode mode:
 477
 478    wxWidgets specifier      POSIX specifier
 479    ----------------------------------------
 480
 481    %hc, %C, %hC             %c
 482    %c                       %lc
 483
 484  */
 485 class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 486 {
 487     virtual void HandleString(CharType conv, SizeModifier size,
 488                               CharType& outConv, SizeModifier& outSize)
 489     {
 490         outConv = 's';
 491         outSize = GetOutSize(conv == 'S', size);
 492     }
 493
 494     virtual void HandleChar(CharType conv, SizeModifier size,
 495                             CharType& outConv, SizeModifier& outSize)
 496     {
 497         outConv = 'c';
 498         outSize = GetOutSize(conv == 'C', size);
 499     }
 500
 501     SizeModifier GetOutSize(bool convIsUpper, SizeModifier size)
 502     {
 503         // %S and %hS -> %s and %lS -> %ls
 504         if ( convIsUpper )
 505         {
 506             if ( size == Size_Long )
 507                 return Size_Long;
 508             else
 509                 return Size_Default;
 510         }
 511         else // %s or %c
 512         {
 513             if ( size == Size_Default )
 514                 return Size_Long;
 515             else
 516                 return size;
 517         }
 518     }
 519 };
 520
 521 const wxScopedWCharBuffer wxScanfConvertFormatW(const wchar_t *format)
 522 {
 523     return wxScanfFormatConverterWchar().Convert(format);
 524 }
 525 #endif // !__WINDOWS__
 526
 527
 528 // ----------------------------------------------------------------------------
 529 // wxFormatString
 530 // ----------------------------------------------------------------------------
 531
 532 #if !wxUSE_UNICODE_WCHAR
 533 const char* wxFormatString::InputAsChar()
 534 {
 535     if ( m_char )
 536         return m_char.data();
 537
 538     // in ANSI build, wx_str() returns char*, in UTF-8 build, this function
 539     // is only called under UTF-8 locales, so we should return UTF-8 string,
 540     // which is, again, what wx_str() returns:
 541     if ( m_str )
 542         return m_str->wx_str();
 543
 544     // ditto wxCStrData:
 545     if ( m_cstr )
 546         return m_cstr->AsInternal();
 547
 548     // the last case is that wide string was passed in: in that case, we need
 549     // to convert it:
 550     wxASSERT( m_wchar );
 551
 552     m_char = wxConvLibc.cWC2MB(m_wchar.data());
 553
 554     return m_char.data();
 555 }
 556
 557 const char* wxFormatString::AsChar()
 558 {
 559     if ( !m_convertedChar )
 560 #if !wxUSE_UNICODE // FIXME-UTF8: remove this
 561         m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar());
 562 #else
 563         m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar());
 564 #endif
 565
 566     return m_convertedChar.data();
 567 }
 568 #endif // !wxUSE_UNICODE_WCHAR
 569
 570 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 571 const wchar_t* wxFormatString::InputAsWChar()
 572 {
 573     if ( m_wchar )
 574         return m_wchar.data();
 575
 576 #if wxUSE_UNICODE_WCHAR
 577     if ( m_str )
 578         return m_str->wc_str();
 579     if ( m_cstr )
 580         return m_cstr->AsInternal();
 581 #else // wxUSE_UNICODE_UTF8
 582     if ( m_str )
 583     {
 584         m_wchar = m_str->wc_str();
 585         return m_wchar.data();
 586     }
 587     if ( m_cstr )
 588     {
 589         m_wchar = m_cstr->AsWCharBuf();
 590         return m_wchar.data();
 591     }
 592 #endif // wxUSE_UNICODE_WCHAR/UTF8
 593
 594     // the last case is that narrow string was passed in: in that case, we need
 595     // to convert it:
 596     wxASSERT( m_char );
 597
 598     m_wchar = wxConvLibc.cMB2WC(m_char.data());
 599
 600     return m_wchar.data();
 601 }
 602
 603 const wchar_t* wxFormatString::AsWChar()
 604 {
 605     if ( !m_convertedWChar )
 606         m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar());
 607
 608     return m_convertedWChar.data();
 609 }
 610 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 611
 612 wxString wxFormatString::InputAsString() const
 613 {
 614     if ( m_str )
 615         return *m_str;
 616     if ( m_cstr )
 617         return m_cstr->AsString();
 618     if ( m_wchar )
 619         return wxString(m_wchar);
 620     if ( m_char )
 621         return wxString(m_char);
 622
 623     wxFAIL_MSG( "invalid wxFormatString - not initialized?" );
 624     return wxString();
 625 }
 626
 627 // ----------------------------------------------------------------------------
 628 // wxFormatString::GetArgumentType()
 629 // ----------------------------------------------------------------------------
 630
 631 namespace
 632 {
 633
 634 template<typename CharType>
 635 wxFormatString::ArgumentType DoGetArgumentType(const CharType *format,
 636                                                unsigned n)
 637 {
 638     wxCHECK_MSG( format, wxFormatString::Arg_Other,
 639                  "empty format string not allowed here" );
 640
 641     wxPrintfConvSpecParser<CharType> parser(format);
 642
 643     wxCHECK_MSG( parser.pspec[n-1] != NULL, wxFormatString::Arg_Other,
 644                  "requested argument not found - invalid format string?" );
 645
 646     switch ( parser.pspec[n-1]->m_type )
 647     {
 648         case wxPAT_CHAR:
 649         case wxPAT_WCHAR:
 650             return wxFormatString::Arg_Char;
 651
 652         default:
 653             return wxFormatString::Arg_Other;
 654     }
 655 }
 656
 657 } // anonymous namespace
 658
 659 wxFormatString::ArgumentType wxFormatString::GetArgumentType(unsigned n) const
 660 {
 661     if ( m_char )
 662         return DoGetArgumentType(m_char.data(), n);
 663     else if ( m_wchar )
 664         return DoGetArgumentType(m_wchar.data(), n);
 665     else if ( m_str )
 666         return DoGetArgumentType(m_str->wx_str(), n);
 667     else if ( m_cstr )
 668         return DoGetArgumentType(m_cstr->AsInternal(), n);
 669
 670     wxFAIL_MSG( "unreachable code" );
 671     return Arg_Other;
 672 }