src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27     #include "wx/intl.h"
  28     #include "wx/log.h"
  29 #endif
  30
  31 #include <ctype.h>
  32
  33 #ifndef __WXWINCE__
  34     #include <errno.h>
  35 #endif
  36
  37 #include <string.h>
  38 #include <stdlib.h>
  39
  40 #include "wx/hashmap.h"
  41 #include "wx/vector.h"
  42 #include "wx/xlocale.h"
  43
  44 #ifdef __WINDOWS__
  45     #include "wx/msw/wrapwin.h"
  46 #endif // __WINDOWS__
  47
  48 #if wxUSE_STD_IOSTREAM
  49     #include <sstream>
  50 #endif
  51
  52 // string handling functions used by wxString:
  53 #if wxUSE_UNICODE_UTF8
  54     #define wxStringMemcpy   memcpy
  55     #define wxStringMemcmp   memcmp
  56     #define wxStringMemchr   memchr
  57     #define wxStringStrlen   strlen
  58 #else
  59     #define wxStringMemcpy   wxTmemcpy
  60     #define wxStringMemcmp   wxTmemcmp
  61     #define wxStringMemchr   wxTmemchr
  62     #define wxStringStrlen   wxStrlen
  63 #endif
  64
  65 // define a function declared in wx/buffer.h here as we don't have buffer.cpp
  66 // and don't want to add it just because of this simple function
  67 namespace wxPrivate
  68 {
  69
  70 // wxXXXBuffer classes can be (implicitly) used during global statics
  71 // initialization so wrap the status UntypedBufferData variable in a function
  72 // to make it safe to access it even before all global statics are initialized
  73 UntypedBufferData *GetUntypedNullData()
  74 {
  75     static UntypedBufferData s_untypedNullData(NULL, 0);
  76
  77     return &s_untypedNullData;
  78 }
  79
  80 } // namespace wxPrivate
  81
  82 // ---------------------------------------------------------------------------
  83 // static class variables definition
  84 // ---------------------------------------------------------------------------
  85
  86 //According to STL _must_ be a -1 size_t
  87 const size_t wxString::npos = (size_t) -1;
  88
  89 #if wxUSE_STRING_POS_CACHE
  90
  91 #ifdef wxHAS_COMPILER_TLS
  92
  93 wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
  94
  95 #else // !wxHAS_COMPILER_TLS
  96
  97 struct wxStrCacheInitializer
  98 {
  99     wxStrCacheInitializer()
 100     {
 101         // calling this function triggers s_cache initialization in it, and
 102         // from now on it becomes safe to call from multiple threads
 103         wxString::GetCache();
 104     }
 105 };
 106
 107 /*
 108 wxString::Cache& wxString::GetCache()
 109 {
 110     static wxTLS_TYPE(Cache) s_cache;
 111
 112     return wxTLS_VALUE(s_cache);
 113 }
 114 */
 115
 116 static wxStrCacheInitializer gs_stringCacheInit;
 117
 118 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
 119
 120 // gdb seems to be unable to display thread-local variables correctly, at least
 121 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
 122 #if wxDEBUG_LEVEL >= 2
 123
 124 struct wxStrCacheDumper
 125 {
 126     static void ShowAll()
 127     {
 128         puts("*** wxString cache dump:");
 129         for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
 130         {
 131             const wxString::Cache::Element&
 132                 c = wxString::GetCacheBegin()[n];
 133
 134             printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
 135                    n,
 136                    n == wxString::LastUsedCacheElement() ? " [*]" : "",
 137                    c.str,
 138                    (unsigned long)c.pos,
 139                    (unsigned long)c.impl,
 140                    (long)c.len);
 141         }
 142     }
 143 };
 144
 145 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
 146
 147 #endif // wxDEBUG_LEVEL >= 2
 148
 149 #ifdef wxPROFILE_STRING_CACHE
 150
 151 wxString::CacheStats wxString::ms_cacheStats;
 152
 153 struct wxStrCacheStatsDumper
 154 {
 155     ~wxStrCacheStatsDumper()
 156     {
 157         const wxString::CacheStats& stats = wxString::ms_cacheStats;
 158
 159         if ( stats.postot )
 160         {
 161             puts("*** wxString cache statistics:");
 162             printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
 163                    stats.postot);
 164             printf("\tHits %u (of which %u not used) or %.2f%%\n",
 165                    stats.poshits,
 166                    stats.mishits,
 167                    100.*float(stats.poshits - stats.mishits)/stats.postot);
 168             printf("\tAverage position requested: %.2f\n",
 169                    float(stats.sumpos) / stats.postot);
 170             printf("\tAverage offset after cached hint: %.2f\n",
 171                    float(stats.sumofs) / stats.postot);
 172         }
 173
 174         if ( stats.lentot )
 175         {
 176             printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
 177                    stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
 178         }
 179     }
 180 };
 181
 182 static wxStrCacheStatsDumper s_showCacheStats;
 183
 184 #endif // wxPROFILE_STRING_CACHE
 185
 186 #endif // wxUSE_STRING_POS_CACHE
 187
 188 // ----------------------------------------------------------------------------
 189 // global functions
 190 // ----------------------------------------------------------------------------
 191
 192 #if wxUSE_STD_IOSTREAM
 193
 194 #include <iostream>
 195
 196 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
 197 {
 198 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
 199     const wxScopedCharBuffer buf(str.AsCharBuf());
 200     if ( !buf )
 201         os.clear(wxSTD ios_base::failbit);
 202     else
 203         os << buf.data();
 204
 205     return os;
 206 #else
 207     return os << str.AsInternal();
 208 #endif
 209 }
 210
 211 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
 212 {
 213     return os << str.c_str();
 214 }
 215
 216 wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str)
 217 {
 218     return os << str.data();
 219 }
 220
 221 #ifndef __BORLANDC__
 222 wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str)
 223 {
 224     return os << str.data();
 225 }
 226 #endif
 227
 228 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 229
 230 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
 231 {
 232     return wos << str.wc_str();
 233 }
 234
 235 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
 236 {
 237     return wos << str.AsWChar();
 238 }
 239
 240 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str)
 241 {
 242     return wos << str.data();
 243 }
 244
 245 #endif  // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 246
 247 #endif // wxUSE_STD_IOSTREAM
 248
 249 // ===========================================================================
 250 // wxString class core
 251 // ===========================================================================
 252
 253 #if wxUSE_UNICODE_UTF8
 254
 255 void wxString::PosLenToImpl(size_t pos, size_t len,
 256                             size_t *implPos, size_t *implLen) const
 257 {
 258     if ( pos == npos )
 259     {
 260         *implPos = npos;
 261     }
 262     else // have valid start position
 263     {
 264         const const_iterator b = GetIterForNthChar(pos);
 265         *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
 266         if ( len == npos )
 267         {
 268             *implLen = npos;
 269         }
 270         else // have valid length too
 271         {
 272             // we need to handle the case of length specifying a substring
 273             // going beyond the end of the string, just as std::string does
 274             const const_iterator e(end());
 275             const_iterator i(b);
 276             while ( len && i <= e )
 277             {
 278                 ++i;
 279                 --len;
 280             }
 281
 282             *implLen = i.impl() - b.impl();
 283         }
 284     }
 285 }
 286
 287 #endif // wxUSE_UNICODE_UTF8
 288
 289 // ----------------------------------------------------------------------------
 290 // wxCStrData converted strings caching
 291 // ----------------------------------------------------------------------------
 292
 293 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 294 //             string objects; re-enable after fixing this bug and benchmarking
 295 //             performance to see if using a hash is a good idea at all
 296 #if 0
 297
 298 // For backward compatibility reasons, it must be possible to assign the value
 299 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 300 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 301 // because the memory would be freed immediately, but it has to be valid as long
 302 // as the string is not modified, so that code like this still works:
 303 //
 304 // const wxChar *s = str.c_str();
 305 // while ( s ) { ... }
 306
 307 // FIXME-UTF8: not thread safe!
 308 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 309 //             destroyed, but we should do it when the string is modified, to
 310 //             keep memory usage down
 311 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 312 //             invalidated the cache on every change, we could keep the previous
 313 //             conversion
 314 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 315 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 316
 317 template<typename T>
 318 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 319 {
 320     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 321     if ( i != hash.end() )
 322     {
 323         free(i->second);
 324         hash.erase(i);
 325     }
 326 }
 327
 328 #if wxUSE_UNICODE
 329 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 330 //     so we have to use wxString* here and const-cast when used
 331 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 332                     wxStringCharConversionCache);
 333 static wxStringCharConversionCache gs_stringsCharCache;
 334
 335 const char* wxCStrData::AsChar() const
 336 {
 337     // remove previously cache value, if any (see FIXMEs above):
 338     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 339
 340     // convert the string and keep it:
 341     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 342         m_str->mb_str().release();
 343
 344     return s + m_offset;
 345 }
 346 #endif // wxUSE_UNICODE
 347
 348 #if !wxUSE_UNICODE_WCHAR
 349 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 350                     wxStringWCharConversionCache);
 351 static wxStringWCharConversionCache gs_stringsWCharCache;
 352
 353 const wchar_t* wxCStrData::AsWChar() const
 354 {
 355     // remove previously cache value, if any (see FIXMEs above):
 356     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 357
 358     // convert the string and keep it:
 359     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 360         m_str->wc_str().release();
 361
 362     return s + m_offset;
 363 }
 364 #endif // !wxUSE_UNICODE_WCHAR
 365
 366 wxString::~wxString()
 367 {
 368 #if wxUSE_UNICODE
 369     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 370     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 371 #endif
 372 #if !wxUSE_UNICODE_WCHAR
 373     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 374 #endif
 375 }
 376 #endif
 377
 378 // ===========================================================================
 379 // wxString class core
 380 // ===========================================================================
 381
 382 // ---------------------------------------------------------------------------
 383 // construction and conversion
 384 // ---------------------------------------------------------------------------
 385
 386 #if wxUSE_UNICODE_WCHAR
 387 /* static */
 388 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 389                                                const wxMBConv& conv)
 390 {
 391     // anything to do?
 392     if ( !psz || nLength == 0 )
 393         return SubstrBufFromMB(wxWCharBuffer(L""), 0);
 394
 395     if ( nLength == npos )
 396         nLength = wxNO_LEN;
 397
 398     size_t wcLen;
 399     wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 400     if ( !wcLen )
 401         return SubstrBufFromMB(wxWCharBuffer(L""), 0);
 402     else
 403         return SubstrBufFromMB(wcBuf, wcLen);
 404 }
 405 #endif // wxUSE_UNICODE_WCHAR
 406
 407 #if wxUSE_UNICODE_UTF8
 408 /* static */
 409 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 410                                                const wxMBConv& conv)
 411 {
 412     // anything to do?
 413     if ( !psz || nLength == 0 )
 414         return SubstrBufFromMB(wxCharBuffer(""), 0);
 415
 416     // if psz is already in UTF-8, we don't have to do the roundtrip to
 417     // wchar_t* and back:
 418     if ( conv.IsUTF8() )
 419     {
 420         // we need to validate the input because UTF8 iterators assume valid
 421         // UTF-8 sequence and psz may be invalid:
 422         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 423         {
 424             // we must pass the real string length to SubstrBufFromMB ctor
 425             if ( nLength == npos )
 426                 nLength = psz ? strlen(psz) : 0;
 427             return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz, nLength),
 428                                    nLength);
 429         }
 430         // else: do the roundtrip through wchar_t*
 431     }
 432
 433     if ( nLength == npos )
 434         nLength = wxNO_LEN;
 435
 436     // first convert to wide string:
 437     size_t wcLen;
 438     wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 439     if ( !wcLen )
 440         return SubstrBufFromMB(wxCharBuffer(""), 0);
 441
 442     // and then to UTF-8:
 443     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
 444     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 445     wxASSERT_MSG( buf.data, wxT("conversion to UTF-8 failed") );
 446
 447     return buf;
 448 }
 449 #endif // wxUSE_UNICODE_UTF8
 450
 451 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 452 /* static */
 453 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 454                                                const wxMBConv& conv)
 455 {
 456     // anything to do?
 457     if ( !pwz || nLength == 0 )
 458         return SubstrBufFromWC(wxCharBuffer(""), 0);
 459
 460     if ( nLength == npos )
 461         nLength = wxNO_LEN;
 462
 463     size_t mbLen;
 464     wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 465     if ( !mbLen )
 466         return SubstrBufFromWC(wxCharBuffer(""), 0);
 467     else
 468         return SubstrBufFromWC(mbBuf, mbLen);
 469 }
 470 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 471
 472 // This std::string::c_str()-like method returns a wide char pointer to string
 473 // contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return
 474 // a pointer to the internal representation. Otherwise a conversion is required
 475 // and it returns a temporary buffer.
 476 //
 477 // However for compatibility with c_str() and to avoid breaking existing code
 478 // doing
 479 //
 480 //      for ( const wchar_t *p = s.wc_str(); *p; p++ )
 481 //          ... use *p...
 482 //
 483 // we actually need to ensure that the returned buffer is _not_ temporary and
 484 // so we use wxString::m_convertedToWChar to store the returned data
 485 #if !wxUSE_UNICODE_WCHAR
 486
 487 const wchar_t *wxString::AsWChar(const wxMBConv& conv) const
 488 {
 489     const char * const strMB = m_impl.c_str();
 490     const size_t lenMB = m_impl.length();
 491
 492     // find out the size of the buffer needed
 493     const size_t lenWC = conv.ToWChar(NULL, 0, strMB, lenMB);
 494     if ( lenWC == wxCONV_FAILED )
 495         return NULL;
 496
 497     // keep the same buffer if the string size didn't change: this is not only
 498     // an optimization but also ensure that code which modifies string
 499     // character by character (without changing its length) can continue to use
 500     // the pointer returned by a previous wc_str() call even after changing the
 501     // string
 502
 503     // TODO-UTF8: we could check for ">" instead of "!=" here as this would
 504     //            allow to save on buffer reallocations but at the cost of
 505     //            consuming (even) more memory, we should benchmark this to
 506     //            determine if it's worth doing
 507     if ( !m_convertedToWChar.m_str || lenWC != m_convertedToWChar.m_len )
 508     {
 509         if ( !const_cast<wxString *>(this)->m_convertedToWChar.Extend(lenWC) )
 510             return NULL;
 511     }
 512
 513     // finally do convert
 514     m_convertedToWChar.m_str[lenWC] = L'\0';
 515     if ( conv.ToWChar(m_convertedToWChar.m_str, lenWC,
 516                       strMB, lenMB) == wxCONV_FAILED )
 517         return NULL;
 518
 519     return m_convertedToWChar.m_str;
 520 }
 521
 522 #endif // !wxUSE_UNICODE_WCHAR
 523
 524
 525 // Same thing for mb_str() which returns a normal char pointer to string
 526 // contents: this always requires converting it to the specified encoding in
 527 // non-ANSI build except if we need to convert to UTF-8 and this is what we
 528 // already use internally.
 529 #if wxUSE_UNICODE
 530
 531 const char *wxString::AsChar(const wxMBConv& conv) const
 532 {
 533 #if wxUSE_UNICODE_UTF8
 534     if ( conv.IsUTF8() )
 535         return m_impl.c_str();
 536
 537     const wchar_t * const strWC = AsWChar(wxMBConvStrictUTF8());
 538     const size_t lenWC = m_convertedToWChar.m_len;
 539 #else // wxUSE_UNICODE_WCHAR
 540     const wchar_t * const strWC = m_impl.c_str();
 541     const size_t lenWC = m_impl.length();
 542 #endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR
 543
 544     const size_t lenMB = conv.FromWChar(NULL, 0, strWC, lenWC);
 545     if ( lenMB == wxCONV_FAILED )
 546         return NULL;
 547
 548     if ( !m_convertedToChar.m_str || lenMB != m_convertedToChar.m_len )
 549     {
 550         if ( !const_cast<wxString *>(this)->m_convertedToChar.Extend(lenMB) )
 551             return NULL;
 552     }
 553
 554     m_convertedToChar.m_str[lenMB] = '\0';
 555     if ( conv.FromWChar(m_convertedToChar.m_str, lenMB,
 556                         strWC, lenWC) == wxCONV_FAILED )
 557         return NULL;
 558
 559     return m_convertedToChar.m_str;
 560 }
 561
 562 #endif // wxUSE_UNICODE
 563
 564 // shrink to minimal size (releasing extra memory)
 565 bool wxString::Shrink()
 566 {
 567   wxString tmp(begin(), end());
 568   swap(tmp);
 569   return tmp.length() == length();
 570 }
 571
 572 // deprecated compatibility code:
 573 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 574 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 575 {
 576     return DoGetWriteBuf(nLen);
 577 }
 578
 579 void wxString::UngetWriteBuf()
 580 {
 581     DoUngetWriteBuf();
 582 }
 583
 584 void wxString::UngetWriteBuf(size_t nLen)
 585 {
 586     DoUngetWriteBuf(nLen);
 587 }
 588 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 589
 590
 591 // ---------------------------------------------------------------------------
 592 // data access
 593 // ---------------------------------------------------------------------------
 594
 595 // all functions are inline in string.h
 596
 597 // ---------------------------------------------------------------------------
 598 // concatenation operators
 599 // ---------------------------------------------------------------------------
 600
 601 /*
 602  * concatenation functions come in 5 flavours:
 603  *  string + string
 604  *  char   + string      and      string + char
 605  *  C str  + string      and      string + C str
 606  */
 607
 608 wxString operator+(const wxString& str1, const wxString& str2)
 609 {
 610 #if !wxUSE_STL_BASED_WXSTRING
 611     wxASSERT( str1.IsValid() );
 612     wxASSERT( str2.IsValid() );
 613 #endif
 614
 615     wxString s = str1;
 616     s += str2;
 617
 618     return s;
 619 }
 620
 621 wxString operator+(const wxString& str, wxUniChar ch)
 622 {
 623 #if !wxUSE_STL_BASED_WXSTRING
 624     wxASSERT( str.IsValid() );
 625 #endif
 626
 627     wxString s = str;
 628     s += ch;
 629
 630     return s;
 631 }
 632
 633 wxString operator+(wxUniChar ch, const wxString& str)
 634 {
 635 #if !wxUSE_STL_BASED_WXSTRING
 636     wxASSERT( str.IsValid() );
 637 #endif
 638
 639     wxString s = ch;
 640     s += str;
 641
 642     return s;
 643 }
 644
 645 wxString operator+(const wxString& str, const char *psz)
 646 {
 647 #if !wxUSE_STL_BASED_WXSTRING
 648     wxASSERT( str.IsValid() );
 649 #endif
 650
 651     wxString s;
 652     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 653         wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
 654     }
 655     s += str;
 656     s += psz;
 657
 658     return s;
 659 }
 660
 661 wxString operator+(const wxString& str, const wchar_t *pwz)
 662 {
 663 #if !wxUSE_STL_BASED_WXSTRING
 664     wxASSERT( str.IsValid() );
 665 #endif
 666
 667     wxString s;
 668     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 669         wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
 670     }
 671     s += str;
 672     s += pwz;
 673
 674     return s;
 675 }
 676
 677 wxString operator+(const char *psz, const wxString& str)
 678 {
 679 #if !wxUSE_STL_BASED_WXSTRING
 680     wxASSERT( str.IsValid() );
 681 #endif
 682
 683     wxString s;
 684     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 685         wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
 686     }
 687     s = psz;
 688     s += str;
 689
 690     return s;
 691 }
 692
 693 wxString operator+(const wchar_t *pwz, const wxString& str)
 694 {
 695 #if !wxUSE_STL_BASED_WXSTRING
 696     wxASSERT( str.IsValid() );
 697 #endif
 698
 699     wxString s;
 700     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 701         wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
 702     }
 703     s = pwz;
 704     s += str;
 705
 706     return s;
 707 }
 708
 709 // ---------------------------------------------------------------------------
 710 // string comparison
 711 // ---------------------------------------------------------------------------
 712
 713 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 714 {
 715     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 716                                : wxToupper(GetChar(0u)) == wxToupper(c));
 717 }
 718
 719 #ifdef HAVE_STD_STRING_COMPARE
 720
 721 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 722 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 723 //     sort strings in characters code point order by sorting the byte sequence
 724 //     in byte values order (i.e. what strcmp() and memcmp() do).
 725
 726 int wxString::compare(const wxString& str) const
 727 {
 728     return m_impl.compare(str.m_impl);
 729 }
 730
 731 int wxString::compare(size_t nStart, size_t nLen,
 732                       const wxString& str) const
 733 {
 734     size_t pos, len;
 735     PosLenToImpl(nStart, nLen, &pos, &len);
 736     return m_impl.compare(pos, len, str.m_impl);
 737 }
 738
 739 int wxString::compare(size_t nStart, size_t nLen,
 740                       const wxString& str,
 741                       size_t nStart2, size_t nLen2) const
 742 {
 743     size_t pos, len;
 744     PosLenToImpl(nStart, nLen, &pos, &len);
 745
 746     size_t pos2, len2;
 747     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 748
 749     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 750 }
 751
 752 int wxString::compare(const char* sz) const
 753 {
 754     return m_impl.compare(ImplStr(sz));
 755 }
 756
 757 int wxString::compare(const wchar_t* sz) const
 758 {
 759     return m_impl.compare(ImplStr(sz));
 760 }
 761
 762 int wxString::compare(size_t nStart, size_t nLen,
 763                       const char* sz, size_t nCount) const
 764 {
 765     size_t pos, len;
 766     PosLenToImpl(nStart, nLen, &pos, &len);
 767
 768     SubstrBufFromMB str(ImplStr(sz, nCount));
 769
 770     return m_impl.compare(pos, len, str.data, str.len);
 771 }
 772
 773 int wxString::compare(size_t nStart, size_t nLen,
 774                       const wchar_t* sz, size_t nCount) const
 775 {
 776     size_t pos, len;
 777     PosLenToImpl(nStart, nLen, &pos, &len);
 778
 779     SubstrBufFromWC str(ImplStr(sz, nCount));
 780
 781     return m_impl.compare(pos, len, str.data, str.len);
 782 }
 783
 784 #else // !HAVE_STD_STRING_COMPARE
 785
 786 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 787                           const wxStringCharType* s2, size_t l2)
 788 {
 789     if( l1 == l2 )
 790         return wxStringMemcmp(s1, s2, l1);
 791     else if( l1 < l2 )
 792     {
 793         int ret = wxStringMemcmp(s1, s2, l1);
 794         return ret == 0 ? -1 : ret;
 795     }
 796     else
 797     {
 798         int ret = wxStringMemcmp(s1, s2, l2);
 799         return ret == 0 ? +1 : ret;
 800     }
 801 }
 802
 803 int wxString::compare(const wxString& str) const
 804 {
 805     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 806                      str.m_impl.data(), str.m_impl.length());
 807 }
 808
 809 int wxString::compare(size_t nStart, size_t nLen,
 810                       const wxString& str) const
 811 {
 812     wxASSERT(nStart <= length());
 813     size_type strLen = length() - nStart;
 814     nLen = strLen < nLen ? strLen : nLen;
 815
 816     size_t pos, len;
 817     PosLenToImpl(nStart, nLen, &pos, &len);
 818
 819     return ::wxDoCmp(m_impl.data() + pos,  len,
 820                      str.m_impl.data(), str.m_impl.length());
 821 }
 822
 823 int wxString::compare(size_t nStart, size_t nLen,
 824                       const wxString& str,
 825                       size_t nStart2, size_t nLen2) const
 826 {
 827     wxASSERT(nStart <= length());
 828     wxASSERT(nStart2 <= str.length());
 829     size_type strLen  =     length() - nStart,
 830               strLen2 = str.length() - nStart2;
 831     nLen  = strLen  < nLen  ? strLen  : nLen;
 832     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 833
 834     size_t pos, len;
 835     PosLenToImpl(nStart, nLen, &pos, &len);
 836     size_t pos2, len2;
 837     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 838
 839     return ::wxDoCmp(m_impl.data() + pos, len,
 840                      str.m_impl.data() + pos2, len2);
 841 }
 842
 843 int wxString::compare(const char* sz) const
 844 {
 845     SubstrBufFromMB str(ImplStr(sz, npos));
 846     if ( str.len == npos )
 847         str.len = wxStringStrlen(str.data);
 848     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 849 }
 850
 851 int wxString::compare(const wchar_t* sz) const
 852 {
 853     SubstrBufFromWC str(ImplStr(sz, npos));
 854     if ( str.len == npos )
 855         str.len = wxStringStrlen(str.data);
 856     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 857 }
 858
 859 int wxString::compare(size_t nStart, size_t nLen,
 860                       const char* sz, size_t nCount) const
 861 {
 862     wxASSERT(nStart <= length());
 863     size_type strLen = length() - nStart;
 864     nLen = strLen < nLen ? strLen : nLen;
 865
 866     size_t pos, len;
 867     PosLenToImpl(nStart, nLen, &pos, &len);
 868
 869     SubstrBufFromMB str(ImplStr(sz, nCount));
 870     if ( str.len == npos )
 871         str.len = wxStringStrlen(str.data);
 872
 873     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 874 }
 875
 876 int wxString::compare(size_t nStart, size_t nLen,
 877                       const wchar_t* sz, size_t nCount) const
 878 {
 879     wxASSERT(nStart <= length());
 880     size_type strLen = length() - nStart;
 881     nLen = strLen < nLen ? strLen : nLen;
 882
 883     size_t pos, len;
 884     PosLenToImpl(nStart, nLen, &pos, &len);
 885
 886     SubstrBufFromWC str(ImplStr(sz, nCount));
 887     if ( str.len == npos )
 888         str.len = wxStringStrlen(str.data);
 889
 890     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 891 }
 892
 893 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 894
 895
 896 // ---------------------------------------------------------------------------
 897 // find_{first,last}_[not]_of functions
 898 // ---------------------------------------------------------------------------
 899
 900 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 901
 902 // NB: All these functions are implemented  with the argument being wxChar*,
 903 //     i.e. widechar string in any Unicode build, even though native string
 904 //     representation is char* in the UTF-8 build. This is because we couldn't
 905 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 906
 907 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 908 {
 909     return find_first_of(sz, nStart, wxStrlen(sz));
 910 }
 911
 912 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 913 {
 914     return find_first_not_of(sz, nStart, wxStrlen(sz));
 915 }
 916
 917 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 918 {
 919     wxASSERT_MSG( nStart <= length(),  wxT("invalid index") );
 920
 921     size_t idx = nStart;
 922     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 923     {
 924         if ( wxTmemchr(sz, *i, n) )
 925             return idx;
 926     }
 927
 928     return npos;
 929 }
 930
 931 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 932 {
 933     wxASSERT_MSG( nStart <= length(),  wxT("invalid index") );
 934
 935     size_t idx = nStart;
 936     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 937     {
 938         if ( !wxTmemchr(sz, *i, n) )
 939             return idx;
 940     }
 941
 942     return npos;
 943 }
 944
 945
 946 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 947 {
 948     return find_last_of(sz, nStart, wxStrlen(sz));
 949 }
 950
 951 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 952 {
 953     return find_last_not_of(sz, nStart, wxStrlen(sz));
 954 }
 955
 956 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 957 {
 958     size_t len = length();
 959
 960     if ( nStart == npos )
 961     {
 962         nStart = len - 1;
 963     }
 964     else
 965     {
 966         wxASSERT_MSG( nStart <= len, wxT("invalid index") );
 967     }
 968
 969     size_t idx = nStart;
 970     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 971           i != rend(); --idx, ++i )
 972     {
 973         if ( wxTmemchr(sz, *i, n) )
 974             return idx;
 975     }
 976
 977     return npos;
 978 }
 979
 980 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 981 {
 982     size_t len = length();
 983
 984     if ( nStart == npos )
 985     {
 986         nStart = len - 1;
 987     }
 988     else
 989     {
 990         wxASSERT_MSG( nStart <= len, wxT("invalid index") );
 991     }
 992
 993     size_t idx = nStart;
 994     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 995           i != rend(); --idx, ++i )
 996     {
 997         if ( !wxTmemchr(sz, *i, n) )
 998             return idx;
 999     }
1000
1001     return npos;
1002 }
1003
1004 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
1005 {
1006     wxASSERT_MSG( nStart <= length(),  wxT("invalid index") );
1007
1008     size_t idx = nStart;
1009     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1010     {
1011         if ( *i != ch )
1012             return idx;
1013     }
1014
1015     return npos;
1016 }
1017
1018 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1019 {
1020     size_t len = length();
1021
1022     if ( nStart == npos )
1023     {
1024         nStart = len - 1;
1025     }
1026     else
1027     {
1028         wxASSERT_MSG( nStart <= len, wxT("invalid index") );
1029     }
1030
1031     size_t idx = nStart;
1032     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1033           i != rend(); --idx, ++i )
1034     {
1035         if ( *i != ch )
1036             return idx;
1037     }
1038
1039     return npos;
1040 }
1041
1042 // the functions above were implemented for wchar_t* arguments in Unicode
1043 // build and char* in ANSI build; below are implementations for the other
1044 // version:
1045 #if wxUSE_UNICODE
1046     #define wxOtherCharType char
1047     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
1048 #else
1049     #define wxOtherCharType wchar_t
1050     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
1051 #endif
1052
1053 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1054     { return find_first_of(STRCONV(sz), nStart); }
1055
1056 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1057                                size_t n) const
1058     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1059 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1060     { return find_last_of(STRCONV(sz), nStart); }
1061 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1062                               size_t n) const
1063     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1064 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1065     { return find_first_not_of(STRCONV(sz), nStart); }
1066 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1067                                    size_t n) const
1068     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1069 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1070     { return find_last_not_of(STRCONV(sz), nStart); }
1071 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1072                                   size_t n) const
1073     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1074
1075 #undef wxOtherCharType
1076 #undef STRCONV
1077
1078 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1079
1080 // ===========================================================================
1081 // other common string functions
1082 // ===========================================================================
1083
1084 int wxString::CmpNoCase(const wxString& s) const
1085 {
1086 #if !wxUSE_UNICODE_UTF8
1087     // We compare NUL-delimited chunks of the strings inside the loop. We will
1088     // do as many iterations as there are embedded NULs in the string, i.e.
1089     // usually we will run it just once.
1090
1091     typedef const wxStringImpl::value_type *pchar_type;
1092     const pchar_type thisBegin = m_impl.c_str();
1093     const pchar_type thatBegin = s.m_impl.c_str();
1094
1095     const pchar_type thisEnd = thisBegin + m_impl.length();
1096     const pchar_type thatEnd = thatBegin + s.m_impl.length();
1097
1098     pchar_type thisCur = thisBegin;
1099     pchar_type thatCur = thatBegin;
1100
1101     int rc;
1102     for ( ;; )
1103     {
1104         // Compare until the next NUL, if the strings differ this is the final
1105         // result.
1106         rc = wxStricmp(thisCur, thatCur);
1107         if ( rc )
1108             break;
1109
1110         const size_t lenChunk = wxStrlen(thisCur);
1111         thisCur += lenChunk;
1112         thatCur += lenChunk;
1113
1114         // Skip all the NULs as wxStricmp() doesn't handle them.
1115         for ( ; !*thisCur; thisCur++, thatCur++ )
1116         {
1117             // Check if we exhausted either of the strings.
1118             if ( thisCur == thisEnd )
1119             {
1120                 // This one is exhausted, is the other one too?
1121                 return thatCur == thatEnd ? 0 : -1;
1122             }
1123
1124             if ( thatCur == thatEnd )
1125             {
1126                 // Because of the test above we know that this one is not
1127                 // exhausted yet so it's greater than the other one that is.
1128                 return 1;
1129             }
1130
1131             if ( *thatCur )
1132             {
1133                 // Anything non-NUL is greater than NUL.
1134                 return -1;
1135             }
1136         }
1137     }
1138
1139     return rc;
1140 #else // wxUSE_UNICODE_UTF8
1141     // CRT functions can't be used for case-insensitive comparison of UTF-8
1142     // strings so do it in the naive, simple and inefficient way.
1143
1144     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1145     const_iterator i1 = begin();
1146     const_iterator end1 = end();
1147     const_iterator i2 = s.begin();
1148     const_iterator end2 = s.end();
1149
1150     for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1151     {
1152         wxUniChar lower1 = (wxChar)wxTolower(*i1);
1153         wxUniChar lower2 = (wxChar)wxTolower(*i2);
1154         if ( lower1 != lower2 )
1155             return lower1 < lower2 ? -1 : 1;
1156     }
1157
1158     size_t len1 = length();
1159     size_t len2 = s.length();
1160
1161     if ( len1 < len2 )
1162         return -1;
1163     else if ( len1 > len2 )
1164         return 1;
1165     return 0;
1166 #endif // !wxUSE_UNICODE_UTF8/wxUSE_UNICODE_UTF8
1167 }
1168
1169
1170 #if wxUSE_UNICODE
1171
1172 wxString wxString::FromAscii(const char *ascii, size_t len)
1173 {
1174     if (!ascii || len == 0)
1175        return wxEmptyString;
1176
1177     wxString res;
1178
1179     {
1180         wxStringInternalBuffer buf(res, len);
1181         wxStringCharType *dest = buf;
1182
1183         for ( ; len > 0; --len )
1184         {
1185             unsigned char c = (unsigned char)*ascii++;
1186             wxASSERT_MSG( c < 0x80,
1187                           wxT("Non-ASCII value passed to FromAscii().") );
1188
1189             *dest++ = (wchar_t)c;
1190         }
1191     }
1192
1193     return res;
1194 }
1195
1196 wxString wxString::FromAscii(const char *ascii)
1197 {
1198     return FromAscii(ascii, wxStrlen(ascii));
1199 }
1200
1201 wxString wxString::FromAscii(char ascii)
1202 {
1203     // What do we do with '\0' ?
1204
1205     unsigned char c = (unsigned char)ascii;
1206
1207     wxASSERT_MSG( c < 0x80, wxT("Non-ASCII value passed to FromAscii().") );
1208
1209     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1210     return wxString(wxUniChar((wchar_t)c));
1211 }
1212
1213 const wxScopedCharBuffer wxString::ToAscii() const
1214 {
1215     // this will allocate enough space for the terminating NUL too
1216     wxCharBuffer buffer(length());
1217     char *dest = buffer.data();
1218
1219     for ( const_iterator i = begin(); i != end(); ++i )
1220     {
1221         wxUniChar c(*i);
1222         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1223         *dest++ = c.IsAscii() ? (char)c : '_';
1224
1225         // the output string can't have embedded NULs anyhow, so we can safely
1226         // stop at first of them even if we do have any
1227         if ( !c )
1228             break;
1229     }
1230
1231     return buffer;
1232 }
1233
1234 #endif // wxUSE_UNICODE
1235
1236 // extract string of length nCount starting at nFirst
1237 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1238 {
1239     size_t nLen = length();
1240
1241     // default value of nCount is npos and means "till the end"
1242     if ( nCount == npos )
1243     {
1244         nCount = nLen - nFirst;
1245     }
1246
1247     // out-of-bounds requests return sensible things
1248     if ( nFirst + nCount > nLen )
1249     {
1250         nCount = nLen - nFirst;
1251     }
1252
1253     if ( nFirst > nLen )
1254     {
1255         // AllocCopy() will return empty string
1256         return wxEmptyString;
1257     }
1258
1259     wxString dest(*this, nFirst, nCount);
1260     if ( dest.length() != nCount )
1261     {
1262         wxFAIL_MSG( wxT("out of memory in wxString::Mid") );
1263     }
1264
1265     return dest;
1266 }
1267
1268 // check that the string starts with prefix and return the rest of the string
1269 // in the provided pointer if it is not NULL, otherwise return false
1270 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1271 {
1272     if ( compare(0, prefix.length(), prefix) != 0 )
1273         return false;
1274
1275     if ( rest )
1276     {
1277         // put the rest of the string into provided pointer
1278         rest->assign(*this, prefix.length(), npos);
1279     }
1280
1281     return true;
1282 }
1283
1284
1285 // check that the string ends with suffix and return the rest of it in the
1286 // provided pointer if it is not NULL, otherwise return false
1287 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1288 {
1289     int start = length() - suffix.length();
1290
1291     if ( start < 0 || compare(start, npos, suffix) != 0 )
1292         return false;
1293
1294     if ( rest )
1295     {
1296         // put the rest of the string into provided pointer
1297         rest->assign(*this, 0, start);
1298     }
1299
1300     return true;
1301 }
1302
1303
1304 // extract nCount last (rightmost) characters
1305 wxString wxString::Right(size_t nCount) const
1306 {
1307   if ( nCount > length() )
1308     nCount = length();
1309
1310   wxString dest(*this, length() - nCount, nCount);
1311   if ( dest.length() != nCount ) {
1312     wxFAIL_MSG( wxT("out of memory in wxString::Right") );
1313   }
1314   return dest;
1315 }
1316
1317 // get all characters after the last occurrence of ch
1318 // (returns the whole string if ch not found)
1319 wxString wxString::AfterLast(wxUniChar ch) const
1320 {
1321   wxString str;
1322   int iPos = Find(ch, true);
1323   if ( iPos == wxNOT_FOUND )
1324     str = *this;
1325   else
1326     str.assign(*this, iPos + 1, npos);
1327
1328   return str;
1329 }
1330
1331 // extract nCount first (leftmost) characters
1332 wxString wxString::Left(size_t nCount) const
1333 {
1334   if ( nCount > length() )
1335     nCount = length();
1336
1337   wxString dest(*this, 0, nCount);
1338   if ( dest.length() != nCount ) {
1339     wxFAIL_MSG( wxT("out of memory in wxString::Left") );
1340   }
1341   return dest;
1342 }
1343
1344 // get all characters before the first occurrence of ch
1345 // (returns the whole string if ch not found)
1346 wxString wxString::BeforeFirst(wxUniChar ch, wxString *rest) const
1347 {
1348   int iPos = Find(ch);
1349   if ( iPos == wxNOT_FOUND )
1350   {
1351     iPos = length();
1352     if ( rest )
1353       rest->clear();
1354   }
1355   else
1356   {
1357     if ( rest )
1358       rest->assign(*this, iPos + 1, npos);
1359   }
1360
1361   return wxString(*this, 0, iPos);
1362 }
1363
1364 /// get all characters before the last occurrence of ch
1365 /// (returns empty string if ch not found)
1366 wxString wxString::BeforeLast(wxUniChar ch, wxString *rest) const
1367 {
1368   wxString str;
1369   int iPos = Find(ch, true);
1370   if ( iPos != wxNOT_FOUND )
1371   {
1372     if ( iPos != 0 )
1373       str.assign(*this, 0, iPos);
1374
1375     if ( rest )
1376       rest->assign(*this, iPos + 1, npos);
1377   }
1378   else
1379   {
1380     if ( rest )
1381       *rest = *this;
1382   }
1383
1384   return str;
1385 }
1386
1387 /// get all characters after the first occurrence of ch
1388 /// (returns empty string if ch not found)
1389 wxString wxString::AfterFirst(wxUniChar ch) const
1390 {
1391   wxString str;
1392   int iPos = Find(ch);
1393   if ( iPos != wxNOT_FOUND )
1394       str.assign(*this, iPos + 1, npos);
1395
1396   return str;
1397 }
1398
1399 // replace first (or all) occurrences of some substring with another one
1400 size_t wxString::Replace(const wxString& strOld,
1401                          const wxString& strNew, bool bReplaceAll)
1402 {
1403     // if we tried to replace an empty string we'd enter an infinite loop below
1404     wxCHECK_MSG( !strOld.empty(), 0,
1405                  wxT("wxString::Replace(): invalid parameter") );
1406
1407     wxSTRING_INVALIDATE_CACHE();
1408
1409     size_t uiCount = 0;   // count of replacements made
1410
1411     // optimize the special common case: replacement of one character by
1412     // another one (in UTF-8 case we can only do this for ASCII characters)
1413     //
1414     // benchmarks show that this special version is around 3 times faster
1415     // (depending on the proportion of matching characters and UTF-8/wchar_t
1416     // build)
1417     if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1418     {
1419         const wxStringCharType chOld = strOld.m_impl[0],
1420                                chNew = strNew.m_impl[0];
1421
1422         // this loop is the simplified version of the one below
1423         for ( size_t pos = 0; ; )
1424         {
1425             pos = m_impl.find(chOld, pos);
1426             if ( pos == npos )
1427                 break;
1428
1429             m_impl[pos++] = chNew;
1430
1431             uiCount++;
1432
1433             if ( !bReplaceAll )
1434                 break;
1435         }
1436     }
1437     else if ( !bReplaceAll)
1438     {
1439         size_t pos = m_impl.find(strOld.m_impl, 0);
1440         if ( pos != npos )
1441         {
1442             m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl);
1443             uiCount = 1;
1444         }
1445     }
1446     else // replace all occurrences
1447     {
1448         const size_t uiOldLen = strOld.m_impl.length();
1449         const size_t uiNewLen = strNew.m_impl.length();
1450
1451         // first scan the string to find all positions at which the replacement
1452         // should be made
1453         wxVector<size_t> replacePositions;
1454
1455         size_t pos;
1456         for ( pos = m_impl.find(strOld.m_impl, 0);
1457               pos != npos;
1458               pos = m_impl.find(strOld.m_impl, pos + uiOldLen))
1459         {
1460             replacePositions.push_back(pos);
1461             ++uiCount;
1462         }
1463
1464         if ( !uiCount )
1465             return 0;
1466
1467         // allocate enough memory for the whole new string
1468         wxString tmp;
1469         tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen));
1470
1471         // copy this string to tmp doing replacements on the fly
1472         size_t replNum = 0;
1473         for ( pos = 0; replNum < uiCount; replNum++ )
1474         {
1475             const size_t nextReplPos = replacePositions[replNum];
1476
1477             if ( pos != nextReplPos )
1478             {
1479                 tmp.m_impl.append(m_impl, pos, nextReplPos - pos);
1480             }
1481
1482             tmp.m_impl.append(strNew.m_impl);
1483             pos = nextReplPos + uiOldLen;
1484         }
1485
1486         if ( pos != m_impl.length() )
1487         {
1488             // append the rest of the string unchanged
1489             tmp.m_impl.append(m_impl, pos, m_impl.length() - pos);
1490         }
1491
1492         swap(tmp);
1493     }
1494
1495     return uiCount;
1496 }
1497
1498 bool wxString::IsAscii() const
1499 {
1500     for ( const_iterator i = begin(); i != end(); ++i )
1501     {
1502         if ( !(*i).IsAscii() )
1503             return false;
1504     }
1505
1506     return true;
1507 }
1508
1509 bool wxString::IsWord() const
1510 {
1511     for ( const_iterator i = begin(); i != end(); ++i )
1512     {
1513         if ( !wxIsalpha(*i) )
1514             return false;
1515     }
1516
1517     return true;
1518 }
1519
1520 bool wxString::IsNumber() const
1521 {
1522     if ( empty() )
1523         return true;
1524
1525     const_iterator i = begin();
1526
1527     if ( *i == wxT('-') || *i == wxT('+') )
1528         ++i;
1529
1530     for ( ; i != end(); ++i )
1531     {
1532         if ( !wxIsdigit(*i) )
1533             return false;
1534     }
1535
1536     return true;
1537 }
1538
1539 wxString wxString::Strip(stripType w) const
1540 {
1541     wxString s = *this;
1542     if ( w & leading ) s.Trim(false);
1543     if ( w & trailing ) s.Trim(true);
1544     return s;
1545 }
1546
1547 // ---------------------------------------------------------------------------
1548 // case conversion
1549 // ---------------------------------------------------------------------------
1550
1551 wxString& wxString::MakeUpper()
1552 {
1553   for ( iterator it = begin(), en = end(); it != en; ++it )
1554     *it = (wxChar)wxToupper(*it);
1555
1556   return *this;
1557 }
1558
1559 wxString& wxString::MakeLower()
1560 {
1561   for ( iterator it = begin(), en = end(); it != en; ++it )
1562     *it = (wxChar)wxTolower(*it);
1563
1564   return *this;
1565 }
1566
1567 wxString& wxString::MakeCapitalized()
1568 {
1569     const iterator en = end();
1570     iterator it = begin();
1571     if ( it != en )
1572     {
1573         *it = (wxChar)wxToupper(*it);
1574         for ( ++it; it != en; ++it )
1575             *it = (wxChar)wxTolower(*it);
1576     }
1577
1578     return *this;
1579 }
1580
1581 // ---------------------------------------------------------------------------
1582 // trimming and padding
1583 // ---------------------------------------------------------------------------
1584
1585 // some compilers (VC++ 6.0 not to name them) return true for a call to
1586 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1587 // to live with this by checking that the character is a 7 bit one - even if
1588 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1589 // space-like symbols somewhere except in the first 128 chars), it is arguably
1590 // still better than trimming away accented letters
1591 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1592
1593 // trims spaces (in the sense of isspace) from left or right side
1594 wxString& wxString::Trim(bool bFromRight)
1595 {
1596     // first check if we're going to modify the string at all
1597     if ( !empty() &&
1598          (
1599           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1600           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1601          )
1602        )
1603     {
1604         if ( bFromRight )
1605         {
1606             // find last non-space character
1607             reverse_iterator psz = rbegin();
1608             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1609                 ++psz;
1610
1611             // truncate at trailing space start
1612             erase(psz.base(), end());
1613         }
1614         else
1615         {
1616             // find first non-space character
1617             iterator psz = begin();
1618             while ( (psz != end()) && wxSafeIsspace(*psz) )
1619                 ++psz;
1620
1621             // fix up data and length
1622             erase(begin(), psz);
1623         }
1624     }
1625
1626     return *this;
1627 }
1628
1629 // adds nCount characters chPad to the string from either side
1630 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1631 {
1632     wxString s(chPad, nCount);
1633
1634     if ( bFromRight )
1635         *this += s;
1636     else
1637     {
1638         s += *this;
1639         swap(s);
1640     }
1641
1642     return *this;
1643 }
1644
1645 // truncate the string
1646 wxString& wxString::Truncate(size_t uiLen)
1647 {
1648     if ( uiLen < length() )
1649     {
1650         erase(begin() + uiLen, end());
1651     }
1652     //else: nothing to do, string is already short enough
1653
1654     return *this;
1655 }
1656
1657 // ---------------------------------------------------------------------------
1658 // finding (return wxNOT_FOUND if not found and index otherwise)
1659 // ---------------------------------------------------------------------------
1660
1661 // find a character
1662 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1663 {
1664     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1665
1666     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1667 }
1668
1669 // ----------------------------------------------------------------------------
1670 // conversion to numbers
1671 // ----------------------------------------------------------------------------
1672
1673 // The implementation of all the functions below is exactly the same so factor
1674 // it out. Note that number extraction works correctly on UTF-8 strings, so
1675 // we can use wxStringCharType and wx_str() for maximum efficiency.
1676
1677 #ifndef __WXWINCE__
1678     #define DO_IF_NOT_WINCE(x) x
1679 #else
1680     #define DO_IF_NOT_WINCE(x)
1681 #endif
1682
1683 #define WX_STRING_TO_X_TYPE_START                                           \
1684     wxCHECK_MSG( pVal, false, wxT("NULL output pointer") );                  \
1685     DO_IF_NOT_WINCE( errno = 0; )                                           \
1686     const wxStringCharType *start = wx_str();                               \
1687     wxStringCharType *end;
1688
1689 // notice that we return false without modifying the output parameter at all if
1690 // nothing could be parsed but we do modify it and return false then if we did
1691 // parse something successfully but not the entire string
1692 #define WX_STRING_TO_X_TYPE_END                                             \
1693     if ( end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )                 \
1694         return false;                                                       \
1695     *pVal = val;                                                            \
1696     return !*end;
1697
1698 bool wxString::ToLong(long *pVal, int base) const
1699 {
1700     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1701
1702     WX_STRING_TO_X_TYPE_START
1703     long val = wxStrtol(start, &end, base);
1704     WX_STRING_TO_X_TYPE_END
1705 }
1706
1707 bool wxString::ToULong(unsigned long *pVal, int base) const
1708 {
1709     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1710
1711     WX_STRING_TO_X_TYPE_START
1712     unsigned long val = wxStrtoul(start, &end, base);
1713     WX_STRING_TO_X_TYPE_END
1714 }
1715
1716 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1717 {
1718     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1719
1720     WX_STRING_TO_X_TYPE_START
1721     wxLongLong_t val = wxStrtoll(start, &end, base);
1722     WX_STRING_TO_X_TYPE_END
1723 }
1724
1725 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1726 {
1727     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1728
1729     WX_STRING_TO_X_TYPE_START
1730     wxULongLong_t val = wxStrtoull(start, &end, base);
1731     WX_STRING_TO_X_TYPE_END
1732 }
1733
1734 bool wxString::ToDouble(double *pVal) const
1735 {
1736     WX_STRING_TO_X_TYPE_START
1737     double val = wxStrtod(start, &end);
1738     WX_STRING_TO_X_TYPE_END
1739 }
1740
1741 #if wxUSE_XLOCALE
1742
1743 bool wxString::ToCLong(long *pVal, int base) const
1744 {
1745     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1746
1747     WX_STRING_TO_X_TYPE_START
1748 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1749     long val = wxStrtol_lA(start, &end, base, wxCLocale);
1750 #else
1751     long val = wxStrtol_l(start, &end, base, wxCLocale);
1752 #endif
1753     WX_STRING_TO_X_TYPE_END
1754 }
1755
1756 bool wxString::ToCULong(unsigned long *pVal, int base) const
1757 {
1758     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1759
1760     WX_STRING_TO_X_TYPE_START
1761 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1762     unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale);
1763 #else
1764     unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale);
1765 #endif
1766     WX_STRING_TO_X_TYPE_END
1767 }
1768
1769 bool wxString::ToCDouble(double *pVal) const
1770 {
1771     WX_STRING_TO_X_TYPE_START
1772 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1773     double val = wxStrtod_lA(start, &end, wxCLocale);
1774 #else
1775     double val = wxStrtod_l(start, &end, wxCLocale);
1776 #endif
1777     WX_STRING_TO_X_TYPE_END
1778 }
1779
1780 #else // wxUSE_XLOCALE
1781
1782 // Provide implementation of these functions even when wxUSE_XLOCALE is
1783 // disabled, we still need them in wxWidgets internal code.
1784
1785 // For integers we just assume the current locale uses the same number
1786 // representation as the C one as there is nothing else we can do.
1787 bool wxString::ToCLong(long *pVal, int base) const
1788 {
1789     return ToLong(pVal, base);
1790 }
1791
1792 bool wxString::ToCULong(unsigned long *pVal, int base) const
1793 {
1794     return ToULong(pVal, base);
1795 }
1796
1797 // For floating point numbers we have to handle the problem of the decimal
1798 // point which is different in different locales.
1799 bool wxString::ToCDouble(double *pVal) const
1800 {
1801     // Create a copy of this string using the decimal point instead of whatever
1802     // separator the current locale uses.
1803 #if wxUSE_INTL
1804     wxString sep = wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT,
1805                                      wxLOCALE_CAT_NUMBER);
1806     if ( sep == "." )
1807     {
1808         // We can avoid an unnecessary string copy in this case.
1809         return ToDouble(pVal);
1810     }
1811 #else // !wxUSE_INTL
1812     // We don't know what the current separator is so it might even be a point
1813     // already, try to parse the string as a double:
1814     if ( ToDouble(pVal) )
1815     {
1816         // It must have been the point, nothing else to do.
1817         return true;
1818     }
1819
1820     // Try to guess the separator, using the most common alternative value.
1821     wxString sep(",");
1822 #endif // wxUSE_INTL/!wxUSE_INTL
1823     wxString cstr(*this);
1824     cstr.Replace(".", sep);
1825
1826     return cstr.ToDouble(pVal);
1827 }
1828
1829 #endif  // wxUSE_XLOCALE/!wxUSE_XLOCALE
1830
1831 // ----------------------------------------------------------------------------
1832 // number to string conversion
1833 // ----------------------------------------------------------------------------
1834
1835 /* static */
1836 wxString wxString::FromDouble(double val, int precision)
1837 {
1838     wxCHECK_MSG( precision >= -1, wxString(), "Invalid negative precision" );
1839
1840     wxString format;
1841     if ( precision == -1 )
1842     {
1843         format = "%g";
1844     }
1845     else // Use fixed precision.
1846     {
1847         format.Printf("%%.%df", precision);
1848     }
1849
1850     return wxString::Format(format, val);
1851 }
1852
1853 /* static */
1854 wxString wxString::FromCDouble(double val, int precision)
1855 {
1856     wxCHECK_MSG( precision >= -1, wxString(), "Invalid negative precision" );
1857
1858 #if wxUSE_STD_IOSTREAM && wxUSE_STD_STRING
1859     // We assume that we can use the ostream and not wstream for numbers.
1860     wxSTD ostringstream os;
1861     if ( precision != -1 )
1862     {
1863         os.precision(precision);
1864         os.setf(std::ios::fixed, std::ios::floatfield);
1865     }
1866
1867     os << val;
1868     return os.str();
1869 #else // !wxUSE_STD_IOSTREAM
1870     // Can't use iostream locale support, fall back to the manual method
1871     // instead.
1872     wxString s = FromDouble(val, precision);
1873 #if wxUSE_INTL
1874     wxString sep = wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT,
1875                                      wxLOCALE_CAT_NUMBER);
1876 #else // !wxUSE_INTL
1877     // As above, this is the most common alternative value. Notice that here it
1878     // doesn't matter if we guess wrongly and the current separator is already
1879     // ".": we'll just waste a call to Replace() in this case.
1880     wxString sep(",");
1881 #endif // wxUSE_INTL/!wxUSE_INTL
1882
1883     s.Replace(sep, ".");
1884     return s;
1885 #endif // wxUSE_STD_IOSTREAM/!wxUSE_STD_IOSTREAM
1886 }
1887
1888 // ---------------------------------------------------------------------------
1889 // formatted output
1890 // ---------------------------------------------------------------------------
1891
1892 #if !wxUSE_UTF8_LOCALE_ONLY
1893 /* static */
1894 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1895 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1896 #else
1897 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1898 #endif
1899 {
1900     va_list argptr;
1901     va_start(argptr, format);
1902
1903     wxString s;
1904     s.PrintfV(format, argptr);
1905
1906     va_end(argptr);
1907
1908     return s;
1909 }
1910 #endif // !wxUSE_UTF8_LOCALE_ONLY
1911
1912 #if wxUSE_UNICODE_UTF8
1913 /* static */
1914 wxString wxString::DoFormatUtf8(const char *format, ...)
1915 {
1916     va_list argptr;
1917     va_start(argptr, format);
1918
1919     wxString s;
1920     s.PrintfV(format, argptr);
1921
1922     va_end(argptr);
1923
1924     return s;
1925 }
1926 #endif // wxUSE_UNICODE_UTF8
1927
1928 /* static */
1929 wxString wxString::FormatV(const wxString& format, va_list argptr)
1930 {
1931     wxString s;
1932     s.PrintfV(format, argptr);
1933     return s;
1934 }
1935
1936 #if !wxUSE_UTF8_LOCALE_ONLY
1937 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1938 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1939 #else
1940 int wxString::DoPrintfWchar(const wxChar *format, ...)
1941 #endif
1942 {
1943     va_list argptr;
1944     va_start(argptr, format);
1945
1946 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1947     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1948     // because it's the only cast that works safely for downcasting when
1949     // multiple inheritance is used:
1950     wxString *str = static_cast<wxString*>(this);
1951 #else
1952     wxString *str = this;
1953 #endif
1954
1955     int iLen = str->PrintfV(format, argptr);
1956
1957     va_end(argptr);
1958
1959     return iLen;
1960 }
1961 #endif // !wxUSE_UTF8_LOCALE_ONLY
1962
1963 #if wxUSE_UNICODE_UTF8
1964 int wxString::DoPrintfUtf8(const char *format, ...)
1965 {
1966     va_list argptr;
1967     va_start(argptr, format);
1968
1969     int iLen = PrintfV(format, argptr);
1970
1971     va_end(argptr);
1972
1973     return iLen;
1974 }
1975 #endif // wxUSE_UNICODE_UTF8
1976
1977 /*
1978     Uses wxVsnprintf and places the result into the this string.
1979
1980     In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1981     it is vswprintf.  Due to a discrepancy between vsnprintf and vswprintf in
1982     the ISO C99 (and thus SUSv3) standard the return value for the case of
1983     an undersized buffer is inconsistent.  For conforming vsnprintf
1984     implementations the function must return the number of characters that
1985     would have been printed had the buffer been large enough.  For conforming
1986     vswprintf implementations the function must return a negative number
1987     and set errno.
1988
1989     What vswprintf sets errno to is undefined but Darwin seems to set it to
1990     EOVERFLOW.  The only expected errno are EILSEQ and EINVAL.  Both of
1991     those are defined in the standard and backed up by several conformance
1992     statements.  Note that ENOMEM mentioned in the manual page does not
1993     apply to swprintf, only wprintf and fwprintf.
1994
1995     Official manual page:
1996     http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1997
1998     Some conformance statements (AIX, Solaris):
1999     http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
2000     http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
2001
2002     Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
2003     EILSEQ and EINVAL are specifically defined to mean the error is other than
2004     an undersized buffer and no other errno are defined we treat those two
2005     as meaning hard errors and everything else gets the old behaviour which
2006     is to keep looping and increasing buffer size until the function succeeds.
2007
2008     In practice it's impossible to determine before compilation which behaviour
2009     may be used.  The vswprintf function may have vsnprintf-like behaviour or
2010     vice-versa.  Behaviour detected on one release can theoretically change
2011     with an updated release.  Not to mention that configure testing for it
2012     would require the test to be run on the host system, not the build system
2013     which makes cross compilation difficult. Therefore, we make no assumptions
2014     about behaviour and try our best to handle every known case, including the
2015     case where wxVsnprintf returns a negative number and fails to set errno.
2016
2017     There is yet one more non-standard implementation and that is our own.
2018     Fortunately, that can be detected at compile-time.
2019
2020     On top of all that, ISO C99 explicitly defines snprintf to write a null
2021     character to the last position of the specified buffer.  That would be at
2022     at the given buffer size minus 1.  It is supposed to do this even if it
2023     turns out that the buffer is sized too small.
2024
2025     Darwin (tested on 10.5) follows the C99 behaviour exactly.
2026
2027     Glibc 2.6 almost follows the C99 behaviour except vswprintf never sets
2028     errno even when it fails.  However, it only seems to ever fail due
2029     to an undersized buffer.
2030 */
2031 #if wxUSE_UNICODE_UTF8
2032 template<typename BufferType>
2033 #else
2034 // we only need one version in non-UTF8 builds and at least two Windows
2035 // compilers have problems with this function template, so use just one
2036 // normal function here
2037 #endif
2038 static int DoStringPrintfV(wxString& str,
2039                            const wxString& format, va_list argptr)
2040 {
2041     int size = 1024;
2042
2043     for ( ;; )
2044     {
2045 #if wxUSE_UNICODE_UTF8
2046         BufferType tmp(str, size + 1);
2047         typename BufferType::CharType *buf = tmp;
2048 #else
2049         wxStringBuffer tmp(str, size + 1);
2050         wxChar *buf = tmp;
2051 #endif
2052
2053         if ( !buf )
2054         {
2055             // out of memory
2056             return -1;
2057         }
2058
2059         // wxVsnprintf() may modify the original arg pointer, so pass it
2060         // only a copy
2061         va_list argptrcopy;
2062         wxVaCopy(argptrcopy, argptr);
2063
2064 #ifndef __WXWINCE__
2065         // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
2066         errno = 0;
2067 #endif
2068         int len = wxVsnprintf(buf, size, format, argptrcopy);
2069         va_end(argptrcopy);
2070
2071         // some implementations of vsnprintf() don't NUL terminate
2072         // the string if there is not enough space for it so
2073         // always do it manually
2074         // FIXME: This really seems to be the wrong and would be an off-by-one
2075         // bug except the code above allocates an extra character.
2076         buf[size] = wxT('\0');
2077
2078         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
2079         // total number of characters which would have been written if the
2080         // buffer were large enough (newer standards such as Unix98)
2081         if ( len < 0 )
2082         {
2083             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
2084             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
2085             //     is true if *both* of them use our own implementation,
2086             //     otherwise we can't be sure
2087 #if wxUSE_WXVSNPRINTF
2088             // we know that our own implementation of wxVsnprintf() returns -1
2089             // only for a format error - thus there's something wrong with
2090             // the user's format string
2091             buf[0] = '\0';
2092             return -1;
2093 #else // possibly using system version
2094             // assume it only returns error if there is not enough space, but
2095             // as we don't know how much we need, double the current size of
2096             // the buffer
2097 #ifndef __WXWINCE__
2098             if( (errno == EILSEQ) || (errno == EINVAL) )
2099             // If errno was set to one of the two well-known hard errors
2100             // then fail immediately to avoid an infinite loop.
2101                 return -1;
2102             else
2103 #endif // __WXWINCE__
2104             // still not enough, as we don't know how much we need, double the
2105             // current size of the buffer
2106                 size *= 2;
2107 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
2108         }
2109         else if ( len >= size )
2110         {
2111 #if wxUSE_WXVSNPRINTF
2112             // we know that our own implementation of wxVsnprintf() returns
2113             // size+1 when there's not enough space but that's not the size
2114             // of the required buffer!
2115             size *= 2;      // so we just double the current size of the buffer
2116 #else
2117             // some vsnprintf() implementations NUL-terminate the buffer and
2118             // some don't in len == size case, to be safe always add 1
2119             // FIXME: I don't quite understand this comment.  The vsnprintf
2120             // function is specifically defined to return the number of
2121             // characters printed not including the null terminator.
2122             // So OF COURSE you need to add 1 to get the right buffer size.
2123             // The following line is definitely correct, no question.
2124             size = len + 1;
2125 #endif
2126         }
2127         else // ok, there was enough space
2128         {
2129             break;
2130         }
2131     }
2132
2133     // we could have overshot
2134     str.Shrink();
2135
2136     return str.length();
2137 }
2138
2139 int wxString::PrintfV(const wxString& format, va_list argptr)
2140 {
2141 #if wxUSE_UNICODE_UTF8
2142     #if wxUSE_STL_BASED_WXSTRING
2143         typedef wxStringTypeBuffer<char> Utf8Buffer;
2144     #else
2145         typedef wxStringInternalBuffer Utf8Buffer;
2146     #endif
2147 #endif
2148
2149 #if wxUSE_UTF8_LOCALE_ONLY
2150     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2151 #else
2152     #if wxUSE_UNICODE_UTF8
2153     if ( wxLocaleIsUtf8 )
2154         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2155     else
2156         // wxChar* version
2157         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2158     #else
2159         return DoStringPrintfV(*this, format, argptr);
2160     #endif // UTF8/WCHAR
2161 #endif
2162 }
2163
2164 // ----------------------------------------------------------------------------
2165 // misc other operations
2166 // ----------------------------------------------------------------------------
2167
2168 // returns true if the string matches the pattern which may contain '*' and
2169 // '?' metacharacters (as usual, '?' matches any character and '*' any number
2170 // of them)
2171 bool wxString::Matches(const wxString& mask) const
2172 {
2173     // I disable this code as it doesn't seem to be faster (in fact, it seems
2174     // to be much slower) than the old, hand-written code below and using it
2175     // here requires always linking with libregex even if the user code doesn't
2176     // use it
2177 #if 0 // wxUSE_REGEX
2178     // first translate the shell-like mask into a regex
2179     wxString pattern;
2180     pattern.reserve(wxStrlen(pszMask));
2181
2182     pattern += wxT('^');
2183     while ( *pszMask )
2184     {
2185         switch ( *pszMask )
2186         {
2187             case wxT('?'):
2188                 pattern += wxT('.');
2189                 break;
2190
2191             case wxT('*'):
2192                 pattern += wxT(".*");
2193                 break;
2194
2195             case wxT('^'):
2196             case wxT('.'):
2197             case wxT('$'):
2198             case wxT('('):
2199             case wxT(')'):
2200             case wxT('|'):
2201             case wxT('+'):
2202             case wxT('\\'):
2203                 // these characters are special in a RE, quote them
2204                 // (however note that we don't quote '[' and ']' to allow
2205                 // using them for Unix shell like matching)
2206                 pattern += wxT('\\');
2207                 // fall through
2208
2209             default:
2210                 pattern += *pszMask;
2211         }
2212
2213         pszMask++;
2214     }
2215     pattern += wxT('$');
2216
2217     // and now use it
2218     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2219 #else // !wxUSE_REGEX
2220   // TODO: this is, of course, awfully inefficient...
2221
2222   // FIXME-UTF8: implement using iterators, remove #if
2223 #if wxUSE_UNICODE_UTF8
2224   const wxScopedWCharBuffer maskBuf = mask.wc_str();
2225   const wxScopedWCharBuffer txtBuf = wc_str();
2226   const wxChar *pszMask = maskBuf.data();
2227   const wxChar *pszTxt = txtBuf.data();
2228 #else
2229   const wxChar *pszMask = mask.wx_str();
2230   // the char currently being checked
2231   const wxChar *pszTxt = wx_str();
2232 #endif
2233
2234   // the last location where '*' matched
2235   const wxChar *pszLastStarInText = NULL;
2236   const wxChar *pszLastStarInMask = NULL;
2237
2238 match:
2239   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
2240     switch ( *pszMask ) {
2241       case wxT('?'):
2242         if ( *pszTxt == wxT('\0') )
2243           return false;
2244
2245         // pszTxt and pszMask will be incremented in the loop statement
2246
2247         break;
2248
2249       case wxT('*'):
2250         {
2251           // remember where we started to be able to backtrack later
2252           pszLastStarInText = pszTxt;
2253           pszLastStarInMask = pszMask;
2254
2255           // ignore special chars immediately following this one
2256           // (should this be an error?)
2257           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
2258             pszMask++;
2259
2260           // if there is nothing more, match
2261           if ( *pszMask == wxT('\0') )
2262             return true;
2263
2264           // are there any other metacharacters in the mask?
2265           size_t uiLenMask;
2266           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2267
2268           if ( pEndMask != NULL ) {
2269             // we have to match the string between two metachars
2270             uiLenMask = pEndMask - pszMask;
2271           }
2272           else {
2273             // we have to match the remainder of the string
2274             uiLenMask = wxStrlen(pszMask);
2275           }
2276
2277           wxString strToMatch(pszMask, uiLenMask);
2278           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2279           if ( pMatch == NULL )
2280             return false;
2281
2282           // -1 to compensate "++" in the loop
2283           pszTxt = pMatch + uiLenMask - 1;
2284           pszMask += uiLenMask - 1;
2285         }
2286         break;
2287
2288       default:
2289         if ( *pszMask != *pszTxt )
2290           return false;
2291         break;
2292     }
2293   }
2294
2295   // match only if nothing left
2296   if ( *pszTxt == wxT('\0') )
2297     return true;
2298
2299   // if we failed to match, backtrack if we can
2300   if ( pszLastStarInText ) {
2301     pszTxt = pszLastStarInText + 1;
2302     pszMask = pszLastStarInMask;
2303
2304     pszLastStarInText = NULL;
2305
2306     // don't bother resetting pszLastStarInMask, it's unnecessary
2307
2308     goto match;
2309   }
2310
2311   return false;
2312 #endif // wxUSE_REGEX/!wxUSE_REGEX
2313 }
2314
2315 // Count the number of chars
2316 int wxString::Freq(wxUniChar ch) const
2317 {
2318     int count = 0;
2319     for ( const_iterator i = begin(); i != end(); ++i )
2320     {
2321         if ( *i == ch )
2322             count ++;
2323     }
2324     return count;
2325 }
2326