src/common/strvararg.cpp

   1 ///////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/strvararg.cpp
   3 // Purpose:     macros for implementing type-safe vararg passing of strings
   4 // Author:      Vaclav Slavik
   5 // Created:     2007-02-19
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2007 REA Elektronik GmbH
   8 // Licence:     wxWindows licence
   9 ///////////////////////////////////////////////////////////////////////////////
  10
  11 // ============================================================================
  12 // declarations
  13 // ============================================================================
  14
  15 // ----------------------------------------------------------------------------
  16 // headers
  17 // ----------------------------------------------------------------------------
  18
  19 // for compilers that support precompilation, includes "wx.h".
  20 #include "wx/wxprec.h"
  21
  22 #ifdef __BORLANDC__
  23     #pragma hdrstop
  24 #endif
  25
  26 #include "wx/strvararg.h"
  27 #include "wx/string.h"
  28 #include "wx/crt.h"
  29
  30 // ============================================================================
  31 // implementation
  32 // ============================================================================
  33
  34 // ----------------------------------------------------------------------------
  35 // wxArgNormalizer<>
  36 // ----------------------------------------------------------------------------
  37
  38 const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const
  39 {
  40     return m_value.wx_str();
  41 }
  42
  43 const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
  44 {
  45     return m_value.AsInternal();
  46 }
  47
  48 #if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
  49 wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(const wxString& s)
  50     : wxArgNormalizerWithBuffer<wchar_t>(s.wc_str())
  51 {
  52 }
  53
  54 wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(const wxCStrData& s)
  55     : wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf())
  56 {
  57 }
  58 #endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
  59
  60 // ----------------------------------------------------------------------------
  61 // wxArgNormalizedString
  62 // ----------------------------------------------------------------------------
  63
  64 wxString wxArgNormalizedString::GetString() const
  65 {
  66     if ( !IsValid() )
  67         return wxEmptyString;
  68
  69 #if wxUSE_UTF8_LOCALE_ONLY
  70     return wxString(wx_reinterpret_cast(const char*, m_ptr));
  71 #else
  72     #if wxUSE_UNICODE_UTF8
  73         if ( wxLocaleIsUtf8 )
  74             return wxString(wx_reinterpret_cast(const char*, m_ptr));
  75         else
  76     #endif
  77         return wxString(wx_reinterpret_cast(const wxChar*, m_ptr));
  78 #endif // !wxUSE_UTF8_LOCALE_ONLY
  79 }
  80
  81 wxArgNormalizedString::operator wxString() const
  82 {
  83     return GetString();
  84 }
  85
  86 // ----------------------------------------------------------------------------
  87 // wxFormatConverter: class doing the "%s" and "%c" normalization
  88 // ----------------------------------------------------------------------------
  89
  90 /*
  91    There are four problems with wxPrintf() etc. format strings:
  92
  93    1) The printf vararg macros convert all forms of strings into
  94       wxStringCharType* representation. This may make the format string
  95       incorrect: for example, if %ls was used together with a wchar_t*
  96       variadic argument, this would no longer work, because the templates
  97       would change wchar_t* argument to wxStringCharType* and %ls would now
  98       be incorrect in e.g. UTF-8 build. We need make sure only one specifier
  99       form is used.
 100
 101    2) To complicate matters further, the meaning of %s and %c is different
 102       under Windows and on Unix. The Windows/MS convention is as follows:
 103
 104        In ANSI mode:
 105
 106        format specifier         results in
 107        -----------------------------------
 108        %s, %hs, %hS             char*
 109        %ls, %S, %lS             wchar_t*
 110
 111        In Unicode mode:
 112
 113        format specifier         results in
 114        -----------------------------------
 115        %hs, %S, %hS             char*
 116        %s, %ls, %lS             wchar_t*
 117
 118        (While on POSIX systems we have %C identical to %lc and %c always means
 119        char (in any mode) while %lc always means wchar_t.)
 120
 121       In other words, we should _only_ use %s on Windows and %ls on Unix for
 122       wxUSE_UNICODE_WCHAR build.
 123
 124    3) To make things even worse, we need two forms in UTF-8 build: one for
 125       passing strings to ANSI functions under UTF-8 locales (this one should
 126       use %s) and one for widechar functions used under non-UTF-8 locales
 127       (this one should use %ls).
 128
 129    And, of course, the same should be done for %c as well.
 130
 131    4) Finally, in UTF-8 build when calling ANSI printf() function, we need to
 132       translate %c to %s, because not every Unicode character can be
 133       represented by a char.
 134
 135
 136    wxScanf() family of functions is simpler, because we don't normalize their
 137    variadic arguments and we only have to handle 2) above and only for widechar
 138    versions.
 139 */
 140
 141 template<typename T>
 142 class wxFormatConverterBase
 143 {
 144 public:
 145     typedef T CharType;
 146
 147     wxFormatConverterBase()
 148     {
 149         m_fmtOrig = NULL;
 150         m_fmtLast = NULL;
 151         m_nCopied = 0;
 152     }
 153
 154     wxCharTypeBuffer<CharType> Convert(const CharType *format)
 155     {
 156         // this is reset to NULL if we modify the format string
 157         m_fmtOrig = format;
 158
 159         while ( *format )
 160         {
 161             if ( CopyFmtChar(*format++) == _T('%') )
 162             {
 163                 // skip any flags
 164                 while ( IsFlagChar(*format) )
 165                     CopyFmtChar(*format++);
 166
 167                 // and possible width
 168                 if ( *format == _T('*') )
 169                     CopyFmtChar(*format++);
 170                 else
 171                     SkipDigits(&format);
 172
 173                 // precision?
 174                 if ( *format == _T('.') )
 175                 {
 176                     CopyFmtChar(*format++);
 177                     if ( *format == _T('*') )
 178                         CopyFmtChar(*format++);
 179                     else
 180                         SkipDigits(&format);
 181                 }
 182
 183                 // next we can have a size modifier
 184                 SizeModifier size;
 185
 186                 switch ( *format )
 187                 {
 188                     case 'h':
 189                         size = Size_Short;
 190                         format++;
 191                         break;
 192
 193                     case 'l':
 194                         // "ll" has a different meaning!
 195                         if ( format[1] != 'l' )
 196                         {
 197                             size = Size_Long;
 198                             format++;
 199                             break;
 200                         }
 201                         //else: fall through
 202
 203                     default:
 204                         size = Size_Default;
 205                 }
 206
 207                 CharType outConv = *format;
 208                 SizeModifier outSize = size;
 209
 210                 // and finally we should have the type
 211                 switch ( *format )
 212                 {
 213                     case _T('S'):
 214                     case _T('s'):
 215                         // all strings were converted into the same form by
 216                         // wxArgNormalizer<T>, this form depends on the context
 217                         // in which the value is used (scanf/printf/wprintf):
 218                         HandleString(*format, size, outConv, outSize);
 219                         break;
 220
 221                     case _T('C'):
 222                     case _T('c'):
 223                         HandleChar(*format, size, outConv, outSize);
 224                         break;
 225
 226                     default:
 227                         // nothing special to do
 228                         break;
 229                 }
 230
 231                 if ( outConv == *format && outSize == size ) // no change
 232                 {
 233                     if ( size != Size_Default )
 234                         CopyFmtChar(*(format - 1));
 235                     CopyFmtChar(*format);
 236                 }
 237                 else // something changed
 238                 {
 239                     switch ( outSize )
 240                     {
 241                         case Size_Long:
 242                             InsertFmtChar(_T('l'));
 243                             break;
 244
 245                         case Size_Short:
 246                             InsertFmtChar(_T('h'));
 247                             break;
 248
 249                         case Size_Default:
 250                             // nothing to do
 251                             break;
 252                     }
 253                     InsertFmtChar(outConv);
 254                 }
 255
 256                 format++;
 257             }
 258         }
 259
 260         // notice that we only translated the string if m_fmtOrig == NULL (as
 261         // set by CopyAllBefore()), otherwise we should simply use the original
 262         // format
 263         if ( m_fmtOrig )
 264         {
 265             return wxCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig);
 266         }
 267         else
 268         {
 269             // NULL-terminate converted format string:
 270             *m_fmtLast = 0;
 271             return m_fmt;
 272         }
 273     }
 274
 275     virtual ~wxFormatConverterBase() {}
 276
 277 protected:
 278     enum SizeModifier
 279     {
 280         Size_Default,
 281         Size_Short,
 282         Size_Long
 283     };
 284
 285     // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
 286     // respectively), 'size' is the preceding size modifier; the new values of
 287     // conversion and size specifiers must be written to outConv and outSize
 288     virtual void HandleString(CharType conv, SizeModifier size,
 289                               CharType& outConv, SizeModifier& outSize) = 0;
 290
 291     // ditto for %C or %c
 292     virtual void HandleChar(CharType conv, SizeModifier size,
 293                             CharType& outConv, SizeModifier& outSize) = 0;
 294
 295 private:
 296     // copy another character to the translated format: this function does the
 297     // copy if we are translating but doesn't do anything at all if we don't,
 298     // so we don't create the translated format string at all unless we really
 299     // need to (i.e. InsertFmtChar() is called)
 300     CharType CopyFmtChar(CharType ch)
 301     {
 302         if ( !m_fmtOrig )
 303         {
 304             // we're translating, do copy
 305             *(m_fmtLast++) = ch;
 306         }
 307         else
 308         {
 309             // simply increase the count which should be copied by
 310             // CopyAllBefore() later if needed
 311             m_nCopied++;
 312         }
 313
 314         return ch;
 315     }
 316
 317     // insert an extra character
 318     void InsertFmtChar(CharType ch)
 319     {
 320         if ( m_fmtOrig )
 321         {
 322             // so far we haven't translated anything yet
 323             CopyAllBefore();
 324         }
 325
 326         *(m_fmtLast++) = ch;
 327     }
 328
 329     void CopyAllBefore()
 330     {
 331         wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" );
 332
 333         // the modified format string is guaranteed to be no longer than
 334         // 3/2 of the original (worst case: the entire format string consists
 335         // of "%s" repeated and is expanded to "%ls" on Unix), so we can
 336         // allocate the buffer now and not worry about running out of space if
 337         // we over-allocate a bit:
 338         size_t fmtLen = wxStrlen(m_fmtOrig);
 339         // worst case is of even length, so there's no rounding error in *3/2:
 340         m_fmt.extend(fmtLen * 3 / 2);
 341
 342         if ( m_nCopied > 0 )
 343             wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied);
 344         m_fmtLast = m_fmt.data() + m_nCopied;
 345
 346         // we won't need it any longer and resetting it also indicates that we
 347         // modified the format
 348         m_fmtOrig = NULL;
 349     }
 350
 351     static bool IsFlagChar(CharType ch)
 352     {
 353         return ch == _T('-') || ch == _T('+') ||
 354                ch == _T('0') || ch == _T(' ') || ch == _T('#');
 355     }
 356
 357     void SkipDigits(const CharType **ptpc)
 358     {
 359         while ( **ptpc >= _T('0') && **ptpc <= _T('9') )
 360             CopyFmtChar(*(*ptpc)++);
 361     }
 362
 363     // the translated format
 364     wxCharTypeBuffer<CharType> m_fmt;
 365     CharType *m_fmtLast;
 366
 367     // the original format
 368     const CharType *m_fmtOrig;
 369
 370     // the number of characters already copied (i.e. already parsed, but left
 371     // unmodified)
 372     size_t m_nCopied;
 373 };
 374
 375
 376
 377 #ifdef __WINDOWS
 378
 379 // on Windows, we should use %s and %c regardless of the build:
 380 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 381 {
 382     virtual void HandleString(CharType WXUNUSED(conv),
 383                               SizeModifier WXUNUSED(size),
 384                               CharType& outConv, SizeModifier& outSize)
 385     {
 386         outConv = 's';
 387         outSize = Size_Default;
 388     }
 389
 390     virtual void HandleChar(CharType WXUNUSED(conv),
 391                             SizeModifier WXUNUSED(size),
 392                             CharType& outConv, SizeModifier& outSize)
 393     {
 394         outConv = 'c';
 395         outSize = Size_Default;
 396     }
 397 };
 398
 399 #else // !__WINDOWS__
 400
 401 // on Unix, it's %s for ANSI functions and %ls for widechar:
 402
 403 #if !wxUSE_UTF8_LOCALE_ONLY
 404 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 405 {
 406     virtual void HandleString(CharType WXUNUSED(conv),
 407                               SizeModifier WXUNUSED(size),
 408                               CharType& outConv, SizeModifier& outSize)
 409     {
 410         outConv = 's';
 411         outSize = Size_Long;
 412     }
 413
 414     virtual void HandleChar(CharType WXUNUSED(conv),
 415                             SizeModifier WXUNUSED(size),
 416                             CharType& outConv, SizeModifier& outSize)
 417     {
 418         outConv = 'c';
 419         outSize = Size_Long;
 420     }
 421 };
 422 #endif // !wxUSE_UTF8_LOCALE_ONLY
 423
 424 #if wxUSE_UNICODE_UTF8
 425 class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char>
 426 {
 427     virtual void HandleString(CharType WXUNUSED(conv),
 428                               SizeModifier WXUNUSED(size),
 429                               CharType& outConv, SizeModifier& outSize)
 430     {
 431         outConv = 's';
 432         outSize = Size_Default;
 433     }
 434
 435     virtual void HandleChar(CharType WXUNUSED(conv),
 436                             SizeModifier WXUNUSED(size),
 437                             CharType& outConv, SizeModifier& outSize)
 438     {
 439         // added complication: %c should be translated to %s in UTF-8 build
 440         outConv = 's';
 441         outSize = Size_Default;
 442     }
 443 };
 444 #endif // wxUSE_UNICODE_UTF8
 445
 446 #endif // __WINDOWS__/!__WINDOWS__
 447
 448 #if !wxUSE_UNICODE // FIXME-UTF8: remove
 449 class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char>
 450 {
 451     virtual void HandleString(CharType WXUNUSED(conv),
 452                               SizeModifier WXUNUSED(size),
 453                               CharType& outConv, SizeModifier& outSize)
 454     {
 455         outConv = 's';
 456         outSize = Size_Default;
 457     }
 458
 459     virtual void HandleChar(CharType WXUNUSED(conv),
 460                             SizeModifier WXUNUSED(size),
 461                             CharType& outConv, SizeModifier& outSize)
 462     {
 463         outConv = 'c';
 464         outSize = Size_Default;
 465     }
 466 };
 467 #endif // ANSI
 468
 469 #ifndef __WINDOWS__
 470 /*
 471
 472    wxScanf() format translation is different, we need to translate %s to %ls
 473    and %c to %lc on Unix (but not Windows and for widechar functions only!).
 474
 475    So to use native functions in order to get our semantics we must do the
 476    following translations in Unicode mode:
 477
 478    wxWidgets specifier      POSIX specifier
 479    ----------------------------------------
 480
 481    %hc, %C, %hC             %c
 482    %c                       %lc
 483
 484  */
 485 class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
 486 {
 487     virtual void HandleString(CharType conv, SizeModifier size,
 488                               CharType& outConv, SizeModifier& outSize)
 489     {
 490         outConv = 's';
 491         outSize = GetOutSize(conv == 'S', size);
 492     }
 493
 494     virtual void HandleChar(CharType conv, SizeModifier size,
 495                             CharType& outConv, SizeModifier& outSize)
 496     {
 497         outConv = 'c';
 498         outSize = GetOutSize(conv == 'C', size);
 499     }
 500
 501     SizeModifier GetOutSize(bool convIsUpper, SizeModifier size)
 502     {
 503         // %S and %hS -> %s and %lS -> %ls
 504         if ( convIsUpper )
 505         {
 506             if ( size == Size_Long )
 507                 return Size_Long;
 508             else
 509                 return Size_Default;
 510         }
 511         else // %s or %c
 512         {
 513             if ( size == Size_Default )
 514                 return Size_Long;
 515             else
 516                 return size;
 517         }
 518     }
 519 };
 520
 521 const wxWCharBuffer wxScanfConvertFormatW(const wchar_t *format)
 522 {
 523     return wxScanfFormatConverterWchar().Convert(format);
 524 }
 525 #endif // !__WINDOWS__
 526
 527
 528 // ----------------------------------------------------------------------------
 529 // wxFormatString
 530 // ----------------------------------------------------------------------------
 531
 532 #if !wxUSE_UNICODE_WCHAR
 533 const char* wxFormatString::InputAsChar()
 534 {
 535     if ( m_char )
 536         return m_char.data();
 537
 538     // in ANSI build, wx_str() returns char*, in UTF-8 build, this function
 539     // is only called under UTF-8 locales, so we should return UTF-8 string,
 540     // which is, again, what wx_str() returns:
 541     if ( m_str )
 542         return m_str->wx_str();
 543
 544     // ditto wxCStrData:
 545     if ( m_cstr )
 546         return m_cstr->AsInternal();
 547
 548     // the last case is that wide string was passed in: in that case, we need
 549     // to convert it:
 550     wxASSERT( m_wchar );
 551
 552     m_char = wxConvLibc.cWC2MB(m_wchar.data());
 553
 554     return m_char.data();
 555 }
 556
 557 const char* wxFormatString::AsChar()
 558 {
 559     if ( !m_convertedChar )
 560 #if !wxUSE_UNICODE // FIXME-UTF8: remove this
 561         m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar());
 562 #else
 563         m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar());
 564 #endif
 565
 566     return m_convertedChar.data();
 567 }
 568 #endif // !wxUSE_UNICODE_WCHAR
 569
 570 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 571 const wchar_t* wxFormatString::InputAsWChar()
 572 {
 573     if ( m_wchar )
 574         return m_wchar.data();
 575
 576 #if wxUSE_UNICODE_WCHAR
 577     if ( m_str )
 578         return m_str->wc_str();
 579     if ( m_cstr )
 580         return m_cstr->AsInternal();
 581 #else // wxUSE_UNICODE_UTF8
 582     if ( m_str )
 583     {
 584         m_wchar = m_str->wc_str();
 585         return m_wchar.data();
 586     }
 587     if ( m_cstr )
 588     {
 589         m_wchar = m_cstr->AsWCharBuf();
 590         return m_wchar.data();
 591     }
 592 #endif // wxUSE_UNICODE_WCHAR/UTF8
 593
 594     // the last case is that narrow string was passed in: in that case, we need
 595     // to convert it:
 596     wxASSERT( m_char );
 597
 598     m_wchar = wxConvLibc.cMB2WC(m_char.data());
 599
 600     return m_wchar.data();
 601 }
 602
 603 const wchar_t* wxFormatString::AsWChar()
 604 {
 605     if ( !m_convertedWChar )
 606         m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar());
 607
 608     return m_convertedWChar.data();
 609 }
 610 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY