src/common/strvararg.cpp

   1 ///////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strvararg.cpp
   3 // Purpose:     macros for implementing type-safe vararg passing of strings
   4 // Author:      Vaclav Slavik
   5 // Created:     2007-02-19
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2007 REA Elektronik GmbH
   8 // Licence:     wxWindows licence
   9 ///////////////////////////////////////////////////////////////////////////////
  10
  11 // ============================================================================
  12 // declarations
  13 // ============================================================================
  14
  15 // ----------------------------------------------------------------------------
  16 // headers
  17 // ----------------------------------------------------------------------------
  18
  19 // for compilers that support precompilation, includes "wx.h".
  20 #include "wx/wxprec.h"
  21
  22 #ifdef __BORLANDC__
  23     #pragma hdrstop
  24 #endif
  25
  26 #include "wx/strvararg.h"
  27 #include "wx/string.h"
  28
  29 // ============================================================================
  30 // implementation
  31 // ============================================================================
  32
  33 // ----------------------------------------------------------------------------
  34 // wxArgNormalizer<>
  35 // ----------------------------------------------------------------------------
  36
  37 const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const
  38 {
  39     return m_value.wx_str();
  40 }
  41
  42 const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
  43 {
  44     return m_value.AsInternal();
  45 }
  46
  47 #if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
  48 wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(const wxString& s)
  49     : wxArgNormalizerWithBuffer<wchar_t>(s.wc_str())
  50 {
  51 }
  52
  53 wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(const wxCStrData& s)
  54     : wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf())
  55 {
  56 }
  57 #endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
  58
  59 // ----------------------------------------------------------------------------
  60 // wxArgNormalizedString
  61 // ----------------------------------------------------------------------------
  62
  63 wxString wxArgNormalizedString::GetString() const
  64 {
  65     if ( !IsValid() )
  66         return wxEmptyString;
  67
  68 #if wxUSE_UTF8_LOCALE_ONLY
  69     return wxString(wx_reinterpret_cast(const char*, m_ptr));
  70 #else
  71     #if wxUSE_UNICODE_UTF8
  72         if ( wxLocaleIsUtf8 )
  73             return wxString(wx_reinterpret_cast(const char*, m_ptr));
  74         else
  75     #endif
  76         return wxString(wx_reinterpret_cast(const wxChar*, m_ptr));
  77 #endif // !wxUSE_UTF8_LOCALE_ONLY
  78 }
  79
  80 wxArgNormalizedString::operator wxString() const
  81 {
  82     return GetString();
  83 }
  84
  85 // ----------------------------------------------------------------------------
  86 // wxFormatConverter: class doing the "%s" and "%c" normalization
  87 // ----------------------------------------------------------------------------
  88
  89 /*
  90    There are four problems with wxPrintf() etc. format strings:
  91
  92    1) The printf vararg macros convert all forms of strings into
  93       wxStringCharType* representation. This may make the format string
  94       incorrect: for example, if %ls was used together with a wchar_t*
  95       variadic argument, this would no longer work, because the templates
  96       would change wchar_t* argument to wxStringCharType* and %ls would now
  97       be incorrect in e.g. UTF-8 build. We need make sure only one specifier
  98       form is used.
  99
 100    2) To complicate matters further, the meaning of %s and %c is different
 101       under Windows and on Unix. The Windows/MS convention is as follows:
 102
 103        In ANSI mode:
 104
 105        format specifier         results in
 106        -----------------------------------
 107        %s, %hs, %hS             char*
 108        %ls, %S, %lS             wchar_t*
 109
 110        In Unicode mode:
 111
 112        format specifier         results in
 113        -----------------------------------
 114        %hs, %S, %hS             char*
 115        %s, %ls, %lS             wchar_t*
 116
 117        (While on POSIX systems we have %C identical to %lc and %c always means
 118        char (in any mode) while %lc always means wchar_t.)
 119
 120       In other words, we should _only_ use %s on Windows and %ls on Unix for
 121       wxUSE_UNICODE_WCHAR build.
 122
 123    3) To make things even worse, we need two forms in UTF-8 build: one for
 124       passing strings to ANSI functions under UTF-8 locales (this one should
 125       use %s) and one for widechar functions used under non-UTF-8 locales
 126       (this one should use %ls).
 127
 128    And, of course, the same should be done for %c as well.
 129
 130    4) Finally, in UTF-8 build when calling ANSI printf() function, we need to
 131       translate %c to %s, because not every Unicode character can be
 132       represented by a char.
 133
 134
 135    wxScanf() family of functions is simpler, because we don't normalize their
 136    variadic arguments and we only have to handle 2) above and only for widechar
 137    versions.
 138 */
 139
 140 template<typename T>
 141 class wxFormatConverterBase
 142 {
 143 public:
 144     typedef T CharType;
 145
 146     wxFormatConverterBase()
 147     {
 148         m_fmtOrig = NULL;
 149         m_fmtLast = NULL;
 150         m_nCopied = 0;
 151     }
 152
 153     wxCharTypeBuffer<CharType> Convert(const CharType *format)
 154     {
 155         // this is reset to NULL if we modify the format string
 156         m_fmtOrig = format;
 157
 158         while ( *format )
 159         {
 160             if ( CopyFmtChar(*format++) == _T('%') )
 161             {
 162                 // skip any flags
 163                 while ( IsFlagChar(*format) )
 164                     CopyFmtChar(*format++);
 165
 166                 // and possible width
 167                 if ( *format == _T('*') )
 168                     CopyFmtChar(*format++);
 169                 else
 170                     SkipDigits(&format);
 171
 172                 // precision?
 173                 if ( *format == _T('.') )
 174                 {
 175                     CopyFmtChar(*format++);
 176                     if ( *format == _T('*') )
 177                         CopyFmtChar(*format++);
 178                     else
 179                         SkipDigits(&format);
 180                 }
 181
 182                 // next we can have a size modifier
 183                 SizeModifier size;
 184
 185                 switch ( *format )
 186                 {
 187                     case 'h':
 188                         size = Size_Short;
 189                         format++;
 190                         break;
 191
 192                     case 'l':
 193                         // "ll" has a different meaning!
 194                         if ( format[1] != 'l' )
 195                         {
 196                             size = Size_Long;
 197                             format++;
 198                             break;
 199                         }
 200                         //else: fall through
 201
 202                     default:
 203                         size = Size_Default;
 204                 }
 205
 206                 CharType outConv = *format;
 207                 SizeModifier outSize = size;
 208
 209                 // and finally we should have the type
 210                 switch ( *format )
 211                 {
 212                     case _T('S'):
 213                     case _T('s'):
 214                         // all strings were converted into the same form by
 215                         // wxArgNormalizer<T>, this form depends on the context
 216                         // in which the value is used (scanf/printf/wprintf):
 217                         HandleString(*format, size, outConv, outSize);
 218                         break;
 219
 220                     case _T('C'):
 221                     case _T('c'):
 222                         HandleChar(*format, size, outConv, outSize);
 223                         break;
 224
 225                     default:
 226                         // nothing special to do
 227                         break;
 228                 }
 229
 230                 if ( outConv == *format && outSize == size ) // no change
 231                 {
 232                     if ( size != Size_Default )
 233                         CopyFmtChar(*(format - 1));
 234                     CopyFmtChar(*format);
 235                 }
 236                 else // something changed
 237                 {
 238                     switch ( outSize )
 239                     {
 240                         case Size_Long:
 241                             InsertFmtChar(_T('l'));
 242                             break;
 243
 244                         case Size_Short:
 245                             InsertFmtChar(_T('h'));
 246                             break;
 247
 248                         case Size_Default:
 249                             // nothing to do
 250                             break;
 251                     }
 252                     InsertFmtChar(outConv);
 253                 }
 254
 255                 format++;
 256             }
 257         }
 258
 259         // notice that we only translated the string if m_fmtOrig == NULL (as
 260         // set by CopyAllBefore()), otherwise we should simply use the original
 261         // format
 262         if ( m_fmtOrig )
 263         {
 264             return wxCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig);
 265         }
 266         else
 267         {
 268             // NULL-terminate converted format string:
 269             *m_fmtLast = 0;
 270             return m_fmt;
 271         }
 272     }
 273
 274     virtual ~wxFormatConverterBase() {}
 275
 276 protected:
 277     enum SizeModifier
 278     {
 279         Size_Default,
 280         Size_Short,
 281         Size_Long
 282     };
 283
 284     // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
 285     // respectively), 'size' is the preceding size modifier; the new values of
 286     // conversion and size specifiers must be written to outConv and outSize
 287     virtual void HandleString(CharType conv, SizeModifier size,
 288                               CharType& outConv, SizeModifier& outSize) = 0;
 289
 290     // ditto for %C or %c
 291     virtual void HandleChar(CharType conv, SizeModifier size,
 292                             CharType& outConv, SizeModifier& outSize) = 0;
 293
 294 private:
 295     // copy another character to the translated format: this function does the
 296     // copy if we are translating but doesn't do anything at all if we don't,
 297     // so we don't create the translated format string at all unless we really
 298     // need to (i.e. InsertFmtChar() is called)
 299     CharType CopyFmtChar(CharType ch)
 300     {
 301         if ( !m_fmtOrig )
 302         {
 303             // we're translating, do copy
 304             *(m_fmtLast++) = ch;
 305         }
 306         else
 307         {
 308             // simply increase the count which should be copied by
 309             // CopyAllBefore() later if needed
 310             m_nCopied++;
 311         }
 312
 313         return ch;
 314     }
 315
 316     // insert an extra character
 317     void InsertFmtChar(CharType ch)
 318     {
 319         if ( m_fmtOrig )
 320         {
 321             // so far we haven't translated anything yet
 322             CopyAllBefore();
 323         }
 324
 325         *(m_fmtLast++) = ch;
 326     }
 327
 328     void CopyAllBefore()
 329     {
 330         wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" );
 331
 332         // the modified format string is guaranteed to be no longer than
 333         // 3/2 of the original (worst case: the entire format string consists
 334         // of "%s" repeated and is expanded to "%ls" on Unix), so we can
 335         // allocate the buffer now and not worry about running out of space if
 336         // we over-allocate a bit:
 337         size_t fmtLen = wxStrlen(m_fmtOrig);
 338         // worst case is of even length, so there's no rounding error in *3/2:
 339         m_fmt.extend(fmtLen * 3 / 2);
 340
 341         if ( m_nCopied > 0 )
 342             wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied);
 343         m_fmtLast = m_fmt.data() + m_nCopied;
 344
 345         // we won't need it any longer and resetting it also indicates that we
 346         // modified the format
 347         m_fmtOrig = NULL;
 348     }
 349
 350     static bool IsFlagChar(CharType ch)
 351     {
 352         return ch == _T('-') || ch == _T('+') ||
 353                ch == _T('0') || ch == _T(' ') || ch == _T('#');
 354     }
 355
 356     void SkipDigits(const CharType **ptpc)
 357     {
 358         while ( **ptpc >= _T('0') && **ptpc <= _T('9') )
 359             CopyFmtChar(*(*ptpc)++);
 360     }
 361
 362     // the translated format
 363     wxCharTypeBuffer<CharType> m_fmt;
 364     CharType *m_fmtLast;
 365
 366     // the original format
 367     const CharType *m_fmtOrig;
 368
 369     // the number of characters already copied (i.e. already parsed, but left
 370     // unmodified)
 371     size_t m_nCopied;
 372 };
 373
 374
 375
 376 #ifdef __WINDOWS
 377
 378 // on Windows, we should use %s and %c regardless of the build:
 379 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 380 {
 381     virtual void HandleString(CharType WXUNUSED(conv),
 382                               SizeModifier WXUNUSED(size),
 383                               CharType& outConv, SizeModifier& outSize)
 384     {
 385         outConv = 's';
 386         outSize = Size_Default;
 387     }
 388
 389     virtual void HandleChar(CharType WXUNUSED(conv),
 390                             SizeModifier WXUNUSED(size),
 391                             CharType& outConv, SizeModifier& outSize)
 392     {
 393         outConv = 'c';
 394         outSize = Size_Default;
 395     }
 396 };
 397
 398 #else // !__WINDOWS__
 399
 400 // on Unix, it's %s for ANSI functions and %ls for widechar:
 401
 402 #if !wxUSE_UTF8_LOCALE_ONLY
 403 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 404 {
 405     virtual void HandleString(CharType WXUNUSED(conv),
 406                               SizeModifier WXUNUSED(size),
 407                               CharType& outConv, SizeModifier& outSize)
 408     {
 409         outConv = 's';
 410         outSize = Size_Long;
 411     }
 412
 413     virtual void HandleChar(CharType WXUNUSED(conv),
 414                             SizeModifier WXUNUSED(size),
 415                             CharType& outConv, SizeModifier& outSize)
 416     {
 417         outConv = 'c';
 418         outSize = Size_Long;
 419     }
 420 };
 421 #endif // !wxUSE_UTF8_LOCALE_ONLY
 422
 423 #if wxUSE_UNICODE_UTF8
 424 class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char>
 425 {
 426     virtual void HandleString(CharType WXUNUSED(conv),
 427                               SizeModifier WXUNUSED(size),
 428                               CharType& outConv, SizeModifier& outSize)
 429     {
 430         outConv = 's';
 431         outSize = Size_Default;
 432     }
 433
 434     virtual void HandleChar(CharType WXUNUSED(conv),
 435                             SizeModifier WXUNUSED(size),
 436                             CharType& outConv, SizeModifier& outSize)
 437     {
 438         // added complication: %c should be translated to %s in UTF-8 build
 439         outConv = 's';
 440         outSize = Size_Default;
 441     }
 442 };
 443 #endif // wxUSE_UNICODE_UTF8
 444
 445 #endif // __WINDOWS__/!__WINDOWS__
 446
 447 #if !wxUSE_UNICODE // FIXME-UTF8: remove
 448 class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char>
 449 {
 450     virtual void HandleString(CharType WXUNUSED(conv),
 451                               SizeModifier WXUNUSED(size),
 452                               CharType& outConv, SizeModifier& outSize)
 453     {
 454         outConv = 's';
 455         outSize = Size_Default;
 456     }
 457
 458     virtual void HandleChar(CharType WXUNUSED(conv),
 459                             SizeModifier WXUNUSED(size),
 460                             CharType& outConv, SizeModifier& outSize)
 461     {
 462         outConv = 'c';
 463         outSize = Size_Default;
 464     }
 465 };
 466 #endif // ANSI
 467
 468 #ifndef __WINDOWS__
 469 /*
 470
 471    wxScanf() format translation is different, we need to translate %s to %ls
 472    and %c to %lc on Unix (but not Windows and for widechar functions only!).
 473
 474    So to use native functions in order to get our semantics we must do the
 475    following translations in Unicode mode:
 476
 477    wxWidgets specifier      POSIX specifier
 478    ----------------------------------------
 479
 480    %hc, %C, %hC             %c
 481    %c                       %lc
 482
 483  */
 484 class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 485 {
 486     virtual void HandleString(CharType conv, SizeModifier size,
 487                               CharType& outConv, SizeModifier& outSize)
 488     {
 489         outConv = 's';
 490         outSize = GetOutSize(conv == 'S', size);
 491     }
 492
 493     virtual void HandleChar(CharType conv, SizeModifier size,
 494                             CharType& outConv, SizeModifier& outSize)
 495     {
 496         outConv = 'c';
 497         outSize = GetOutSize(conv == 'C', size);
 498     }
 499
 500     SizeModifier GetOutSize(bool convIsUpper, SizeModifier size)
 501     {
 502         // %S and %hS -> %s and %lS -> %ls
 503         if ( convIsUpper )
 504         {
 505             if ( size == Size_Long )
 506                 return Size_Long;
 507             else
 508                 return Size_Default;
 509         }
 510         else // %s or %c
 511         {
 512             if ( size == Size_Default )
 513                 return Size_Long;
 514             else
 515                 return size;
 516         }
 517     }
 518 };
 519
 520 const wxWCharBuffer wxScanfConvertFormatW(const wchar_t *format)
 521 {
 522     return wxScanfFormatConverterWchar().Convert(format);
 523 }
 524 #endif // !__WINDOWS__
 525
 526
 527 // ----------------------------------------------------------------------------
 528 // wxFormatString
 529 // ----------------------------------------------------------------------------
 530
 531 #if !wxUSE_UNICODE_WCHAR
 532 const char* wxFormatString::InputAsChar()
 533 {
 534     if ( m_char )
 535         return m_char.data();
 536
 537     // in ANSI build, wx_str() returns char*, in UTF-8 build, this function
 538     // is only called under UTF-8 locales, so we should return UTF-8 string,
 539     // which is, again, what wx_str() returns:
 540     if ( m_str )
 541         return m_str->wx_str();
 542
 543     // ditto wxCStrData:
 544     if ( m_cstr )
 545         return m_cstr->AsInternal();
 546
 547     // the last case is that wide string was passed in: in that case, we need
 548     // to convert it:
 549     wxASSERT( m_wchar );
 550
 551     m_char = wxConvLibc.cWC2MB(m_wchar.data());
 552
 553     return m_char.data();
 554 }
 555
 556 const char* wxFormatString::AsChar()
 557 {
 558     if ( !m_convertedChar )
 559 #if !wxUSE_UNICODE // FIXME-UTF8: remove this
 560         m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar());
 561 #else
 562         m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar());
 563 #endif
 564
 565     return m_convertedChar.data();
 566 }
 567 #endif // !wxUSE_UNICODE_WCHAR
 568
 569 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 570 const wchar_t* wxFormatString::InputAsWChar()
 571 {
 572     if ( m_wchar )
 573         return m_wchar.data();
 574
 575 #if wxUSE_UNICODE_WCHAR
 576     if ( m_str )
 577         return m_str->wc_str();
 578     if ( m_cstr )
 579         return m_cstr->AsInternal();
 580 #else // wxUSE_UNICODE_UTF8
 581     if ( m_str )
 582     {
 583         m_wchar = m_str->wc_str();
 584         return m_wchar.data();
 585     }
 586     if ( m_cstr )
 587     {
 588         m_wchar = m_cstr->AsWCharBuf();
 589         return m_wchar.data();
 590     }
 591 #endif // wxUSE_UNICODE_WCHAR/UTF8
 592
 593     // the last case is that narrow string was passed in: in that case, we need
 594     // to convert it:
 595     wxASSERT( m_char );
 596
 597     m_wchar = wxConvLibc.cMB2WC(m_char.data());
 598
 599     return m_wchar.data();
 600 }
 601
 602 const wchar_t* wxFormatString::AsWChar()
 603 {
 604     if ( !m_convertedWChar )
 605         m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar());
 606
 607     return m_convertedWChar.data();
 608 }
 609 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY