src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27     #include "wx/intl.h"
  28     #include "wx/log.h"
  29 #endif
  30
  31 #include <ctype.h>
  32
  33 #ifndef __WXWINCE__
  34     #include <errno.h>
  35 #endif
  36
  37 #include <string.h>
  38 #include <stdlib.h>
  39
  40 #include "wx/hashmap.h"
  41 #include "wx/vector.h"
  42 #include "wx/xlocale.h"
  43
  44 #ifdef __WXMSW__
  45     #include "wx/msw/wrapwin.h"
  46 #endif // __WXMSW__
  47
  48 // string handling functions used by wxString:
  49 #if wxUSE_UNICODE_UTF8
  50     #define wxStringMemcpy   memcpy
  51     #define wxStringMemcmp   memcmp
  52     #define wxStringMemchr   memchr
  53     #define wxStringStrlen   strlen
  54 #else
  55     #define wxStringMemcpy   wxTmemcpy
  56     #define wxStringMemcmp   wxTmemcmp
  57     #define wxStringMemchr   wxTmemchr
  58     #define wxStringStrlen   wxStrlen
  59 #endif
  60
  61 // define a function declared in wx/buffer.h here as we don't have buffer.cpp
  62 // and don't want to add it just because of this simple function
  63 namespace wxPrivate
  64 {
  65
  66 // wxXXXBuffer classes can be (implicitly) used during global statics
  67 // initialization so wrap the status UntypedBufferData variable in a function
  68 // to make it safe to access it even before all global statics are initialized
  69 UntypedBufferData *GetUntypedNullData()
  70 {
  71     static UntypedBufferData s_untypedNullData(NULL, 0);
  72
  73     return &s_untypedNullData;
  74 }
  75
  76 } // namespace wxPrivate
  77
  78 // ---------------------------------------------------------------------------
  79 // static class variables definition
  80 // ---------------------------------------------------------------------------
  81
  82 //According to STL _must_ be a -1 size_t
  83 const size_t wxString::npos = (size_t) -1;
  84
  85 #if wxUSE_STRING_POS_CACHE
  86
  87 #ifdef wxHAS_COMPILER_TLS
  88
  89 wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
  90
  91 #else // !wxHAS_COMPILER_TLS
  92
  93 struct wxStrCacheInitializer
  94 {
  95     wxStrCacheInitializer()
  96     {
  97         // calling this function triggers s_cache initialization in it, and
  98         // from now on it becomes safe to call from multiple threads
  99         wxString::GetCache();
 100     }
 101 };
 102
 103 /*
 104 wxString::Cache& wxString::GetCache()
 105 {
 106     static wxTLS_TYPE(Cache) s_cache;
 107
 108     return wxTLS_VALUE(s_cache);
 109 }
 110 */
 111
 112 static wxStrCacheInitializer gs_stringCacheInit;
 113
 114 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
 115
 116 // gdb seems to be unable to display thread-local variables correctly, at least
 117 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
 118 #if wxDEBUG_LEVEL >= 2
 119
 120 struct wxStrCacheDumper
 121 {
 122     static void ShowAll()
 123     {
 124         puts("*** wxString cache dump:");
 125         for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
 126         {
 127             const wxString::Cache::Element&
 128                 c = wxString::GetCacheBegin()[n];
 129
 130             printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
 131                    n,
 132                    n == wxString::LastUsedCacheElement() ? " [*]" : "",
 133                    c.str,
 134                    (unsigned long)c.pos,
 135                    (unsigned long)c.impl,
 136                    (long)c.len);
 137         }
 138     }
 139 };
 140
 141 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
 142
 143 #endif // wxDEBUG_LEVEL >= 2
 144
 145 #ifdef wxPROFILE_STRING_CACHE
 146
 147 wxString::CacheStats wxString::ms_cacheStats;
 148
 149 struct wxStrCacheStatsDumper
 150 {
 151     ~wxStrCacheStatsDumper()
 152     {
 153         const wxString::CacheStats& stats = wxString::ms_cacheStats;
 154
 155         if ( stats.postot )
 156         {
 157             puts("*** wxString cache statistics:");
 158             printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
 159                    stats.postot);
 160             printf("\tHits %u (of which %u not used) or %.2f%%\n",
 161                    stats.poshits,
 162                    stats.mishits,
 163                    100.*float(stats.poshits - stats.mishits)/stats.postot);
 164             printf("\tAverage position requested: %.2f\n",
 165                    float(stats.sumpos) / stats.postot);
 166             printf("\tAverage offset after cached hint: %.2f\n",
 167                    float(stats.sumofs) / stats.postot);
 168         }
 169
 170         if ( stats.lentot )
 171         {
 172             printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
 173                    stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
 174         }
 175     }
 176 };
 177
 178 static wxStrCacheStatsDumper s_showCacheStats;
 179
 180 #endif // wxPROFILE_STRING_CACHE
 181
 182 #endif // wxUSE_STRING_POS_CACHE
 183
 184 // ----------------------------------------------------------------------------
 185 // global functions
 186 // ----------------------------------------------------------------------------
 187
 188 #if wxUSE_STD_IOSTREAM
 189
 190 #include <iostream>
 191
 192 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
 193 {
 194 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
 195     const wxScopedCharBuffer buf(str.AsCharBuf());
 196     if ( !buf )
 197         os.clear(wxSTD ios_base::failbit);
 198     else
 199         os << buf.data();
 200
 201     return os;
 202 #else
 203     return os << str.AsInternal();
 204 #endif
 205 }
 206
 207 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
 208 {
 209     return os << str.c_str();
 210 }
 211
 212 wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str)
 213 {
 214     return os << str.data();
 215 }
 216
 217 #ifndef __BORLANDC__
 218 wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str)
 219 {
 220     return os << str.data();
 221 }
 222 #endif
 223
 224 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 225
 226 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
 227 {
 228     return wos << str.wc_str();
 229 }
 230
 231 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
 232 {
 233     return wos << str.AsWChar();
 234 }
 235
 236 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str)
 237 {
 238     return wos << str.data();
 239 }
 240
 241 #endif  // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 242
 243 #endif // wxUSE_STD_IOSTREAM
 244
 245 // ===========================================================================
 246 // wxString class core
 247 // ===========================================================================
 248
 249 #if wxUSE_UNICODE_UTF8
 250
 251 void wxString::PosLenToImpl(size_t pos, size_t len,
 252                             size_t *implPos, size_t *implLen) const
 253 {
 254     if ( pos == npos )
 255     {
 256         *implPos = npos;
 257     }
 258     else // have valid start position
 259     {
 260         const const_iterator b = GetIterForNthChar(pos);
 261         *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
 262         if ( len == npos )
 263         {
 264             *implLen = npos;
 265         }
 266         else // have valid length too
 267         {
 268             // we need to handle the case of length specifying a substring
 269             // going beyond the end of the string, just as std::string does
 270             const const_iterator e(end());
 271             const_iterator i(b);
 272             while ( len && i <= e )
 273             {
 274                 ++i;
 275                 --len;
 276             }
 277
 278             *implLen = i.impl() - b.impl();
 279         }
 280     }
 281 }
 282
 283 #endif // wxUSE_UNICODE_UTF8
 284
 285 // ----------------------------------------------------------------------------
 286 // wxCStrData converted strings caching
 287 // ----------------------------------------------------------------------------
 288
 289 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 290 //             string objects; re-enable after fixing this bug and benchmarking
 291 //             performance to see if using a hash is a good idea at all
 292 #if 0
 293
 294 // For backward compatibility reasons, it must be possible to assign the value
 295 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 296 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 297 // because the memory would be freed immediately, but it has to be valid as long
 298 // as the string is not modified, so that code like this still works:
 299 //
 300 // const wxChar *s = str.c_str();
 301 // while ( s ) { ... }
 302
 303 // FIXME-UTF8: not thread safe!
 304 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 305 //             destroyed, but we should do it when the string is modified, to
 306 //             keep memory usage down
 307 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 308 //             invalidated the cache on every change, we could keep the previous
 309 //             conversion
 310 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 311 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 312
 313 template<typename T>
 314 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 315 {
 316     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 317     if ( i != hash.end() )
 318     {
 319         free(i->second);
 320         hash.erase(i);
 321     }
 322 }
 323
 324 #if wxUSE_UNICODE
 325 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 326 //     so we have to use wxString* here and const-cast when used
 327 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 328                     wxStringCharConversionCache);
 329 static wxStringCharConversionCache gs_stringsCharCache;
 330
 331 const char* wxCStrData::AsChar() const
 332 {
 333     // remove previously cache value, if any (see FIXMEs above):
 334     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 335
 336     // convert the string and keep it:
 337     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 338         m_str->mb_str().release();
 339
 340     return s + m_offset;
 341 }
 342 #endif // wxUSE_UNICODE
 343
 344 #if !wxUSE_UNICODE_WCHAR
 345 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 346                     wxStringWCharConversionCache);
 347 static wxStringWCharConversionCache gs_stringsWCharCache;
 348
 349 const wchar_t* wxCStrData::AsWChar() const
 350 {
 351     // remove previously cache value, if any (see FIXMEs above):
 352     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 353
 354     // convert the string and keep it:
 355     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 356         m_str->wc_str().release();
 357
 358     return s + m_offset;
 359 }
 360 #endif // !wxUSE_UNICODE_WCHAR
 361
 362 wxString::~wxString()
 363 {
 364 #if wxUSE_UNICODE
 365     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 366     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 367 #endif
 368 #if !wxUSE_UNICODE_WCHAR
 369     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 370 #endif
 371 }
 372 #endif
 373
 374 // ===========================================================================
 375 // wxString class core
 376 // ===========================================================================
 377
 378 // ---------------------------------------------------------------------------
 379 // construction and conversion
 380 // ---------------------------------------------------------------------------
 381
 382 #if wxUSE_UNICODE_WCHAR
 383 /* static */
 384 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 385                                                const wxMBConv& conv)
 386 {
 387     // anything to do?
 388     if ( !psz || nLength == 0 )
 389         return SubstrBufFromMB(wxWCharBuffer(L""), 0);
 390
 391     if ( nLength == npos )
 392         nLength = wxNO_LEN;
 393
 394     size_t wcLen;
 395     wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 396     if ( !wcLen )
 397         return SubstrBufFromMB(wxWCharBuffer(L""), 0);
 398     else
 399         return SubstrBufFromMB(wcBuf, wcLen);
 400 }
 401 #endif // wxUSE_UNICODE_WCHAR
 402
 403 #if wxUSE_UNICODE_UTF8
 404 /* static */
 405 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 406                                                const wxMBConv& conv)
 407 {
 408     // anything to do?
 409     if ( !psz || nLength == 0 )
 410         return SubstrBufFromMB(wxCharBuffer(""), 0);
 411
 412     // if psz is already in UTF-8, we don't have to do the roundtrip to
 413     // wchar_t* and back:
 414     if ( conv.IsUTF8() )
 415     {
 416         // we need to validate the input because UTF8 iterators assume valid
 417         // UTF-8 sequence and psz may be invalid:
 418         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 419         {
 420             // we must pass the real string length to SubstrBufFromMB ctor
 421             if ( nLength == npos )
 422                 nLength = psz ? strlen(psz) : 0;
 423             return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz, nLength),
 424                                    nLength);
 425         }
 426         // else: do the roundtrip through wchar_t*
 427     }
 428
 429     if ( nLength == npos )
 430         nLength = wxNO_LEN;
 431
 432     // first convert to wide string:
 433     size_t wcLen;
 434     wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 435     if ( !wcLen )
 436         return SubstrBufFromMB(wxCharBuffer(""), 0);
 437
 438     // and then to UTF-8:
 439     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
 440     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 441     wxASSERT_MSG( buf.data, wxT("conversion to UTF-8 failed") );
 442
 443     return buf;
 444 }
 445 #endif // wxUSE_UNICODE_UTF8
 446
 447 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 448 /* static */
 449 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 450                                                const wxMBConv& conv)
 451 {
 452     // anything to do?
 453     if ( !pwz || nLength == 0 )
 454         return SubstrBufFromWC(wxCharBuffer(""), 0);
 455
 456     if ( nLength == npos )
 457         nLength = wxNO_LEN;
 458
 459     size_t mbLen;
 460     wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 461     if ( !mbLen )
 462         return SubstrBufFromWC(wxCharBuffer(""), 0);
 463     else
 464         return SubstrBufFromWC(mbBuf, mbLen);
 465 }
 466 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 467
 468 // This std::string::c_str()-like method returns a wide char pointer to string
 469 // contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return
 470 // a pointer to the internal representation. Otherwise a conversion is required
 471 // and it returns a temporary buffer.
 472 //
 473 // However for compatibility with c_str() and to avoid breaking existing code
 474 // doing
 475 //
 476 //      for ( const wchar_t *p = s.wc_str(); *p; p++ )
 477 //          ... use *p...
 478 //
 479 // we actually need to ensure that the returned buffer is _not_ temporary and
 480 // so we use wxString::m_convertedToWChar to store the returned data
 481 #if !wxUSE_UNICODE_WCHAR
 482
 483 const wchar_t *wxString::AsWChar(const wxMBConv& conv) const
 484 {
 485     const char * const strMB = m_impl.c_str();
 486     const size_t lenMB = m_impl.length();
 487
 488     // find out the size of the buffer needed
 489     const size_t lenWC = conv.ToWChar(NULL, 0, strMB, lenMB);
 490     if ( lenWC == wxCONV_FAILED )
 491         return NULL;
 492
 493     // keep the same buffer if the string size didn't change: this is not only
 494     // an optimization but also ensure that code which modifies string
 495     // character by character (without changing its length) can continue to use
 496     // the pointer returned by a previous wc_str() call even after changing the
 497     // string
 498
 499     // TODO-UTF8: we could check for ">" instead of "!=" here as this would
 500     //            allow to save on buffer reallocations but at the cost of
 501     //            consuming (even) more memory, we should benchmark this to
 502     //            determine if it's worth doing
 503     if ( !m_convertedToWChar.m_str || lenWC != m_convertedToWChar.m_len )
 504     {
 505         if ( !const_cast<wxString *>(this)->m_convertedToWChar.Extend(lenWC) )
 506             return NULL;
 507     }
 508
 509     // finally do convert
 510     m_convertedToWChar.m_str[lenWC] = L'\0';
 511     if ( conv.ToWChar(m_convertedToWChar.m_str, lenWC,
 512                       strMB, lenMB) == wxCONV_FAILED )
 513         return NULL;
 514
 515     return m_convertedToWChar.m_str;
 516 }
 517
 518 #endif // !wxUSE_UNICODE_WCHAR
 519
 520
 521 // Same thing for mb_str() which returns a normal char pointer to string
 522 // contents: this always requires converting it to the specified encoding in
 523 // non-ANSI build except if we need to convert to UTF-8 and this is what we
 524 // already use internally.
 525 #if wxUSE_UNICODE
 526
 527 const char *wxString::AsChar(const wxMBConv& conv) const
 528 {
 529 #if wxUSE_UNICODE_UTF8
 530     if ( conv.IsUTF8() )
 531         return m_impl.c_str();
 532
 533     const wchar_t * const strWC = AsWChar(wxMBConvStrictUTF8());
 534     const size_t lenWC = m_convertedToWChar.m_len;
 535 #else // wxUSE_UNICODE_WCHAR
 536     const wchar_t * const strWC = m_impl.c_str();
 537     const size_t lenWC = m_impl.length();
 538 #endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR
 539
 540     const size_t lenMB = conv.FromWChar(NULL, 0, strWC, lenWC);
 541     if ( lenMB == wxCONV_FAILED )
 542         return NULL;
 543
 544     if ( !m_convertedToChar.m_str || lenMB != m_convertedToChar.m_len )
 545     {
 546         if ( !const_cast<wxString *>(this)->m_convertedToChar.Extend(lenMB) )
 547             return NULL;
 548     }
 549
 550     m_convertedToChar.m_str[lenMB] = '\0';
 551     if ( conv.FromWChar(m_convertedToChar.m_str, lenMB,
 552                         strWC, lenWC) == wxCONV_FAILED )
 553         return NULL;
 554
 555     return m_convertedToChar.m_str;
 556 }
 557
 558 #endif // wxUSE_UNICODE
 559
 560 // shrink to minimal size (releasing extra memory)
 561 bool wxString::Shrink()
 562 {
 563   wxString tmp(begin(), end());
 564   swap(tmp);
 565   return tmp.length() == length();
 566 }
 567
 568 // deprecated compatibility code:
 569 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 570 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 571 {
 572     return DoGetWriteBuf(nLen);
 573 }
 574
 575 void wxString::UngetWriteBuf()
 576 {
 577     DoUngetWriteBuf();
 578 }
 579
 580 void wxString::UngetWriteBuf(size_t nLen)
 581 {
 582     DoUngetWriteBuf(nLen);
 583 }
 584 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 585
 586
 587 // ---------------------------------------------------------------------------
 588 // data access
 589 // ---------------------------------------------------------------------------
 590
 591 // all functions are inline in string.h
 592
 593 // ---------------------------------------------------------------------------
 594 // concatenation operators
 595 // ---------------------------------------------------------------------------
 596
 597 /*
 598  * concatenation functions come in 5 flavours:
 599  *  string + string
 600  *  char   + string      and      string + char
 601  *  C str  + string      and      string + C str
 602  */
 603
 604 wxString operator+(const wxString& str1, const wxString& str2)
 605 {
 606 #if !wxUSE_STL_BASED_WXSTRING
 607     wxASSERT( str1.IsValid() );
 608     wxASSERT( str2.IsValid() );
 609 #endif
 610
 611     wxString s = str1;
 612     s += str2;
 613
 614     return s;
 615 }
 616
 617 wxString operator+(const wxString& str, wxUniChar ch)
 618 {
 619 #if !wxUSE_STL_BASED_WXSTRING
 620     wxASSERT( str.IsValid() );
 621 #endif
 622
 623     wxString s = str;
 624     s += ch;
 625
 626     return s;
 627 }
 628
 629 wxString operator+(wxUniChar ch, const wxString& str)
 630 {
 631 #if !wxUSE_STL_BASED_WXSTRING
 632     wxASSERT( str.IsValid() );
 633 #endif
 634
 635     wxString s = ch;
 636     s += str;
 637
 638     return s;
 639 }
 640
 641 wxString operator+(const wxString& str, const char *psz)
 642 {
 643 #if !wxUSE_STL_BASED_WXSTRING
 644     wxASSERT( str.IsValid() );
 645 #endif
 646
 647     wxString s;
 648     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 649         wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
 650     }
 651     s += str;
 652     s += psz;
 653
 654     return s;
 655 }
 656
 657 wxString operator+(const wxString& str, const wchar_t *pwz)
 658 {
 659 #if !wxUSE_STL_BASED_WXSTRING
 660     wxASSERT( str.IsValid() );
 661 #endif
 662
 663     wxString s;
 664     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 665         wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
 666     }
 667     s += str;
 668     s += pwz;
 669
 670     return s;
 671 }
 672
 673 wxString operator+(const char *psz, const wxString& str)
 674 {
 675 #if !wxUSE_STL_BASED_WXSTRING
 676     wxASSERT( str.IsValid() );
 677 #endif
 678
 679     wxString s;
 680     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 681         wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
 682     }
 683     s = psz;
 684     s += str;
 685
 686     return s;
 687 }
 688
 689 wxString operator+(const wchar_t *pwz, const wxString& str)
 690 {
 691 #if !wxUSE_STL_BASED_WXSTRING
 692     wxASSERT( str.IsValid() );
 693 #endif
 694
 695     wxString s;
 696     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 697         wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
 698     }
 699     s = pwz;
 700     s += str;
 701
 702     return s;
 703 }
 704
 705 // ---------------------------------------------------------------------------
 706 // string comparison
 707 // ---------------------------------------------------------------------------
 708
 709 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 710 {
 711     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 712                                : wxToupper(GetChar(0u)) == wxToupper(c));
 713 }
 714
 715 #ifdef HAVE_STD_STRING_COMPARE
 716
 717 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 718 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 719 //     sort strings in characters code point order by sorting the byte sequence
 720 //     in byte values order (i.e. what strcmp() and memcmp() do).
 721
 722 int wxString::compare(const wxString& str) const
 723 {
 724     return m_impl.compare(str.m_impl);
 725 }
 726
 727 int wxString::compare(size_t nStart, size_t nLen,
 728                       const wxString& str) const
 729 {
 730     size_t pos, len;
 731     PosLenToImpl(nStart, nLen, &pos, &len);
 732     return m_impl.compare(pos, len, str.m_impl);
 733 }
 734
 735 int wxString::compare(size_t nStart, size_t nLen,
 736                       const wxString& str,
 737                       size_t nStart2, size_t nLen2) const
 738 {
 739     size_t pos, len;
 740     PosLenToImpl(nStart, nLen, &pos, &len);
 741
 742     size_t pos2, len2;
 743     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 744
 745     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 746 }
 747
 748 int wxString::compare(const char* sz) const
 749 {
 750     return m_impl.compare(ImplStr(sz));
 751 }
 752
 753 int wxString::compare(const wchar_t* sz) const
 754 {
 755     return m_impl.compare(ImplStr(sz));
 756 }
 757
 758 int wxString::compare(size_t nStart, size_t nLen,
 759                       const char* sz, size_t nCount) const
 760 {
 761     size_t pos, len;
 762     PosLenToImpl(nStart, nLen, &pos, &len);
 763
 764     SubstrBufFromMB str(ImplStr(sz, nCount));
 765
 766     return m_impl.compare(pos, len, str.data, str.len);
 767 }
 768
 769 int wxString::compare(size_t nStart, size_t nLen,
 770                       const wchar_t* sz, size_t nCount) const
 771 {
 772     size_t pos, len;
 773     PosLenToImpl(nStart, nLen, &pos, &len);
 774
 775     SubstrBufFromWC str(ImplStr(sz, nCount));
 776
 777     return m_impl.compare(pos, len, str.data, str.len);
 778 }
 779
 780 #else // !HAVE_STD_STRING_COMPARE
 781
 782 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 783                           const wxStringCharType* s2, size_t l2)
 784 {
 785     if( l1 == l2 )
 786         return wxStringMemcmp(s1, s2, l1);
 787     else if( l1 < l2 )
 788     {
 789         int ret = wxStringMemcmp(s1, s2, l1);
 790         return ret == 0 ? -1 : ret;
 791     }
 792     else
 793     {
 794         int ret = wxStringMemcmp(s1, s2, l2);
 795         return ret == 0 ? +1 : ret;
 796     }
 797 }
 798
 799 int wxString::compare(const wxString& str) const
 800 {
 801     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 802                      str.m_impl.data(), str.m_impl.length());
 803 }
 804
 805 int wxString::compare(size_t nStart, size_t nLen,
 806                       const wxString& str) const
 807 {
 808     wxASSERT(nStart <= length());
 809     size_type strLen = length() - nStart;
 810     nLen = strLen < nLen ? strLen : nLen;
 811
 812     size_t pos, len;
 813     PosLenToImpl(nStart, nLen, &pos, &len);
 814
 815     return ::wxDoCmp(m_impl.data() + pos,  len,
 816                      str.m_impl.data(), str.m_impl.length());
 817 }
 818
 819 int wxString::compare(size_t nStart, size_t nLen,
 820                       const wxString& str,
 821                       size_t nStart2, size_t nLen2) const
 822 {
 823     wxASSERT(nStart <= length());
 824     wxASSERT(nStart2 <= str.length());
 825     size_type strLen  =     length() - nStart,
 826               strLen2 = str.length() - nStart2;
 827     nLen  = strLen  < nLen  ? strLen  : nLen;
 828     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 829
 830     size_t pos, len;
 831     PosLenToImpl(nStart, nLen, &pos, &len);
 832     size_t pos2, len2;
 833     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 834
 835     return ::wxDoCmp(m_impl.data() + pos, len,
 836                      str.m_impl.data() + pos2, len2);
 837 }
 838
 839 int wxString::compare(const char* sz) const
 840 {
 841     SubstrBufFromMB str(ImplStr(sz, npos));
 842     if ( str.len == npos )
 843         str.len = wxStringStrlen(str.data);
 844     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 845 }
 846
 847 int wxString::compare(const wchar_t* sz) const
 848 {
 849     SubstrBufFromWC str(ImplStr(sz, npos));
 850     if ( str.len == npos )
 851         str.len = wxStringStrlen(str.data);
 852     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 853 }
 854
 855 int wxString::compare(size_t nStart, size_t nLen,
 856                       const char* sz, size_t nCount) const
 857 {
 858     wxASSERT(nStart <= length());
 859     size_type strLen = length() - nStart;
 860     nLen = strLen < nLen ? strLen : nLen;
 861
 862     size_t pos, len;
 863     PosLenToImpl(nStart, nLen, &pos, &len);
 864
 865     SubstrBufFromMB str(ImplStr(sz, nCount));
 866     if ( str.len == npos )
 867         str.len = wxStringStrlen(str.data);
 868
 869     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 870 }
 871
 872 int wxString::compare(size_t nStart, size_t nLen,
 873                       const wchar_t* sz, size_t nCount) const
 874 {
 875     wxASSERT(nStart <= length());
 876     size_type strLen = length() - nStart;
 877     nLen = strLen < nLen ? strLen : nLen;
 878
 879     size_t pos, len;
 880     PosLenToImpl(nStart, nLen, &pos, &len);
 881
 882     SubstrBufFromWC str(ImplStr(sz, nCount));
 883     if ( str.len == npos )
 884         str.len = wxStringStrlen(str.data);
 885
 886     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 887 }
 888
 889 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 890
 891
 892 // ---------------------------------------------------------------------------
 893 // find_{first,last}_[not]_of functions
 894 // ---------------------------------------------------------------------------
 895
 896 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 897
 898 // NB: All these functions are implemented  with the argument being wxChar*,
 899 //     i.e. widechar string in any Unicode build, even though native string
 900 //     representation is char* in the UTF-8 build. This is because we couldn't
 901 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 902
 903 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 904 {
 905     return find_first_of(sz, nStart, wxStrlen(sz));
 906 }
 907
 908 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 909 {
 910     return find_first_not_of(sz, nStart, wxStrlen(sz));
 911 }
 912
 913 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 914 {
 915     wxASSERT_MSG( nStart <= length(),  wxT("invalid index") );
 916
 917     size_t idx = nStart;
 918     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 919     {
 920         if ( wxTmemchr(sz, *i, n) )
 921             return idx;
 922     }
 923
 924     return npos;
 925 }
 926
 927 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 928 {
 929     wxASSERT_MSG( nStart <= length(),  wxT("invalid index") );
 930
 931     size_t idx = nStart;
 932     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 933     {
 934         if ( !wxTmemchr(sz, *i, n) )
 935             return idx;
 936     }
 937
 938     return npos;
 939 }
 940
 941
 942 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 943 {
 944     return find_last_of(sz, nStart, wxStrlen(sz));
 945 }
 946
 947 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 948 {
 949     return find_last_not_of(sz, nStart, wxStrlen(sz));
 950 }
 951
 952 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 953 {
 954     size_t len = length();
 955
 956     if ( nStart == npos )
 957     {
 958         nStart = len - 1;
 959     }
 960     else
 961     {
 962         wxASSERT_MSG( nStart <= len, wxT("invalid index") );
 963     }
 964
 965     size_t idx = nStart;
 966     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 967           i != rend(); --idx, ++i )
 968     {
 969         if ( wxTmemchr(sz, *i, n) )
 970             return idx;
 971     }
 972
 973     return npos;
 974 }
 975
 976 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 977 {
 978     size_t len = length();
 979
 980     if ( nStart == npos )
 981     {
 982         nStart = len - 1;
 983     }
 984     else
 985     {
 986         wxASSERT_MSG( nStart <= len, wxT("invalid index") );
 987     }
 988
 989     size_t idx = nStart;
 990     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 991           i != rend(); --idx, ++i )
 992     {
 993         if ( !wxTmemchr(sz, *i, n) )
 994             return idx;
 995     }
 996
 997     return npos;
 998 }
 999
1000 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
1001 {
1002     wxASSERT_MSG( nStart <= length(),  wxT("invalid index") );
1003
1004     size_t idx = nStart;
1005     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1006     {
1007         if ( *i != ch )
1008             return idx;
1009     }
1010
1011     return npos;
1012 }
1013
1014 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1015 {
1016     size_t len = length();
1017
1018     if ( nStart == npos )
1019     {
1020         nStart = len - 1;
1021     }
1022     else
1023     {
1024         wxASSERT_MSG( nStart <= len, wxT("invalid index") );
1025     }
1026
1027     size_t idx = nStart;
1028     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1029           i != rend(); --idx, ++i )
1030     {
1031         if ( *i != ch )
1032             return idx;
1033     }
1034
1035     return npos;
1036 }
1037
1038 // the functions above were implemented for wchar_t* arguments in Unicode
1039 // build and char* in ANSI build; below are implementations for the other
1040 // version:
1041 #if wxUSE_UNICODE
1042     #define wxOtherCharType char
1043     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
1044 #else
1045     #define wxOtherCharType wchar_t
1046     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
1047 #endif
1048
1049 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1050     { return find_first_of(STRCONV(sz), nStart); }
1051
1052 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1053                                size_t n) const
1054     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1055 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1056     { return find_last_of(STRCONV(sz), nStart); }
1057 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1058                               size_t n) const
1059     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1060 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1061     { return find_first_not_of(STRCONV(sz), nStart); }
1062 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1063                                    size_t n) const
1064     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1065 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1066     { return find_last_not_of(STRCONV(sz), nStart); }
1067 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1068                                   size_t n) const
1069     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1070
1071 #undef wxOtherCharType
1072 #undef STRCONV
1073
1074 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1075
1076 // ===========================================================================
1077 // other common string functions
1078 // ===========================================================================
1079
1080 int wxString::CmpNoCase(const wxString& s) const
1081 {
1082 #if defined(__WXMSW__) && !wxUSE_UNICODE_UTF8
1083     // Prefer to use CompareString() if available as it's more efficient than
1084     // doing it manually or even using wxStricmp() (see #10375)
1085     //
1086     // Also note that not using NORM_STRINGSORT may result in not having a
1087     // strict weak ordering (e.g. s1 < s2 and s2 < s3 but s3 < s1) and so break
1088     // algorithms such as std::sort that rely on it. It's also more consistent
1089     // with the fall back version below.
1090     switch ( ::CompareString(LOCALE_USER_DEFAULT,
1091                              NORM_IGNORECASE | SORT_STRINGSORT,
1092                              m_impl.c_str(), m_impl.length(),
1093                              s.m_impl.c_str(), s.m_impl.length()) )
1094     {
1095         case CSTR_LESS_THAN:
1096             return -1;
1097
1098         case CSTR_EQUAL:
1099             return 0;
1100
1101         case CSTR_GREATER_THAN:
1102             return 1;
1103
1104         default:
1105             wxFAIL_MSG( "unexpected CompareString() return value" );
1106             // fall through
1107
1108         case 0:
1109             wxLogLastError("CompareString");
1110             // use generic code below
1111     }
1112 #endif // __WXMSW__ && !wxUSE_UNICODE_UTF8
1113
1114     // do the comparison manually: notice that we can't use wxStricmp() as it
1115     // doesn't handle embedded NULs
1116
1117     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1118     const_iterator i1 = begin();
1119     const_iterator end1 = end();
1120     const_iterator i2 = s.begin();
1121     const_iterator end2 = s.end();
1122
1123     for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1124     {
1125         wxUniChar lower1 = (wxChar)wxTolower(*i1);
1126         wxUniChar lower2 = (wxChar)wxTolower(*i2);
1127         if ( lower1 != lower2 )
1128             return lower1 < lower2 ? -1 : 1;
1129     }
1130
1131     size_t len1 = length();
1132     size_t len2 = s.length();
1133
1134     if ( len1 < len2 )
1135         return -1;
1136     else if ( len1 > len2 )
1137         return 1;
1138     return 0;
1139 }
1140
1141
1142 #if wxUSE_UNICODE
1143
1144 #ifdef __MWERKS__
1145 #ifndef __SCHAR_MAX__
1146 #define __SCHAR_MAX__ 127
1147 #endif
1148 #endif
1149
1150 wxString wxString::FromAscii(const char *ascii, size_t len)
1151 {
1152     if (!ascii || len == 0)
1153        return wxEmptyString;
1154
1155     wxString res;
1156
1157     {
1158         wxStringInternalBuffer buf(res, len);
1159         wxStringCharType *dest = buf;
1160
1161         for ( ; len > 0; --len )
1162         {
1163             unsigned char c = (unsigned char)*ascii++;
1164             wxASSERT_MSG( c < 0x80,
1165                           wxT("Non-ASCII value passed to FromAscii().") );
1166
1167             *dest++ = (wchar_t)c;
1168         }
1169     }
1170
1171     return res;
1172 }
1173
1174 wxString wxString::FromAscii(const char *ascii)
1175 {
1176     return FromAscii(ascii, wxStrlen(ascii));
1177 }
1178
1179 wxString wxString::FromAscii(char ascii)
1180 {
1181     // What do we do with '\0' ?
1182
1183     unsigned char c = (unsigned char)ascii;
1184
1185     wxASSERT_MSG( c < 0x80, wxT("Non-ASCII value passed to FromAscii().") );
1186
1187     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1188     return wxString(wxUniChar((wchar_t)c));
1189 }
1190
1191 const wxScopedCharBuffer wxString::ToAscii() const
1192 {
1193     // this will allocate enough space for the terminating NUL too
1194     wxCharBuffer buffer(length());
1195     char *dest = buffer.data();
1196
1197     for ( const_iterator i = begin(); i != end(); ++i )
1198     {
1199         wxUniChar c(*i);
1200         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1201         *dest++ = c.IsAscii() ? (char)c : '_';
1202
1203         // the output string can't have embedded NULs anyhow, so we can safely
1204         // stop at first of them even if we do have any
1205         if ( !c )
1206             break;
1207     }
1208
1209     return buffer;
1210 }
1211
1212 #endif // wxUSE_UNICODE
1213
1214 // extract string of length nCount starting at nFirst
1215 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1216 {
1217     size_t nLen = length();
1218
1219     // default value of nCount is npos and means "till the end"
1220     if ( nCount == npos )
1221     {
1222         nCount = nLen - nFirst;
1223     }
1224
1225     // out-of-bounds requests return sensible things
1226     if ( nFirst + nCount > nLen )
1227     {
1228         nCount = nLen - nFirst;
1229     }
1230
1231     if ( nFirst > nLen )
1232     {
1233         // AllocCopy() will return empty string
1234         return wxEmptyString;
1235     }
1236
1237     wxString dest(*this, nFirst, nCount);
1238     if ( dest.length() != nCount )
1239     {
1240         wxFAIL_MSG( wxT("out of memory in wxString::Mid") );
1241     }
1242
1243     return dest;
1244 }
1245
1246 // check that the string starts with prefix and return the rest of the string
1247 // in the provided pointer if it is not NULL, otherwise return false
1248 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1249 {
1250     if ( compare(0, prefix.length(), prefix) != 0 )
1251         return false;
1252
1253     if ( rest )
1254     {
1255         // put the rest of the string into provided pointer
1256         rest->assign(*this, prefix.length(), npos);
1257     }
1258
1259     return true;
1260 }
1261
1262
1263 // check that the string ends with suffix and return the rest of it in the
1264 // provided pointer if it is not NULL, otherwise return false
1265 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1266 {
1267     int start = length() - suffix.length();
1268
1269     if ( start < 0 || compare(start, npos, suffix) != 0 )
1270         return false;
1271
1272     if ( rest )
1273     {
1274         // put the rest of the string into provided pointer
1275         rest->assign(*this, 0, start);
1276     }
1277
1278     return true;
1279 }
1280
1281
1282 // extract nCount last (rightmost) characters
1283 wxString wxString::Right(size_t nCount) const
1284 {
1285   if ( nCount > length() )
1286     nCount = length();
1287
1288   wxString dest(*this, length() - nCount, nCount);
1289   if ( dest.length() != nCount ) {
1290     wxFAIL_MSG( wxT("out of memory in wxString::Right") );
1291   }
1292   return dest;
1293 }
1294
1295 // get all characters after the last occurrence of ch
1296 // (returns the whole string if ch not found)
1297 wxString wxString::AfterLast(wxUniChar ch) const
1298 {
1299   wxString str;
1300   int iPos = Find(ch, true);
1301   if ( iPos == wxNOT_FOUND )
1302     str = *this;
1303   else
1304     str.assign(*this, iPos + 1, npos);
1305
1306   return str;
1307 }
1308
1309 // extract nCount first (leftmost) characters
1310 wxString wxString::Left(size_t nCount) const
1311 {
1312   if ( nCount > length() )
1313     nCount = length();
1314
1315   wxString dest(*this, 0, nCount);
1316   if ( dest.length() != nCount ) {
1317     wxFAIL_MSG( wxT("out of memory in wxString::Left") );
1318   }
1319   return dest;
1320 }
1321
1322 // get all characters before the first occurrence of ch
1323 // (returns the whole string if ch not found)
1324 wxString wxString::BeforeFirst(wxUniChar ch) const
1325 {
1326   int iPos = Find(ch);
1327   if ( iPos == wxNOT_FOUND )
1328       iPos = length();
1329   return wxString(*this, 0, iPos);
1330 }
1331
1332 /// get all characters before the last occurrence of ch
1333 /// (returns empty string if ch not found)
1334 wxString wxString::BeforeLast(wxUniChar ch) const
1335 {
1336   wxString str;
1337   int iPos = Find(ch, true);
1338   if ( iPos != wxNOT_FOUND && iPos != 0 )
1339     str = wxString(c_str(), iPos);
1340
1341   return str;
1342 }
1343
1344 /// get all characters after the first occurrence of ch
1345 /// (returns empty string if ch not found)
1346 wxString wxString::AfterFirst(wxUniChar ch) const
1347 {
1348   wxString str;
1349   int iPos = Find(ch);
1350   if ( iPos != wxNOT_FOUND )
1351       str.assign(*this, iPos + 1, npos);
1352
1353   return str;
1354 }
1355
1356 // replace first (or all) occurrences of some substring with another one
1357 size_t wxString::Replace(const wxString& strOld,
1358                          const wxString& strNew, bool bReplaceAll)
1359 {
1360     // if we tried to replace an empty string we'd enter an infinite loop below
1361     wxCHECK_MSG( !strOld.empty(), 0,
1362                  wxT("wxString::Replace(): invalid parameter") );
1363
1364     wxSTRING_INVALIDATE_CACHE();
1365
1366     size_t uiCount = 0;   // count of replacements made
1367
1368     // optimize the special common case: replacement of one character by
1369     // another one (in UTF-8 case we can only do this for ASCII characters)
1370     //
1371     // benchmarks show that this special version is around 3 times faster
1372     // (depending on the proportion of matching characters and UTF-8/wchar_t
1373     // build)
1374     if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1375     {
1376         const wxStringCharType chOld = strOld.m_impl[0],
1377                                chNew = strNew.m_impl[0];
1378
1379         // this loop is the simplified version of the one below
1380         for ( size_t pos = 0; ; )
1381         {
1382             pos = m_impl.find(chOld, pos);
1383             if ( pos == npos )
1384                 break;
1385
1386             m_impl[pos++] = chNew;
1387
1388             uiCount++;
1389
1390             if ( !bReplaceAll )
1391                 break;
1392         }
1393     }
1394     else if ( !bReplaceAll)
1395     {
1396         size_t pos = m_impl.find(strOld, 0);
1397         if ( pos != npos )
1398         {
1399             m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl);
1400             uiCount = 1;
1401         }
1402     }
1403     else // replace all occurrences
1404     {
1405         const size_t uiOldLen = strOld.m_impl.length();
1406         const size_t uiNewLen = strNew.m_impl.length();
1407
1408         // first scan the string to find all positions at which the replacement
1409         // should be made
1410         wxVector<size_t> replacePositions;
1411
1412         size_t pos;
1413         for ( pos = m_impl.find(strOld.m_impl, 0);
1414               pos != npos;
1415               pos = m_impl.find(strOld.m_impl, pos + uiOldLen))
1416         {
1417             replacePositions.push_back(pos);
1418             ++uiCount;
1419         }
1420
1421         if ( !uiCount )
1422             return 0;
1423
1424         // allocate enough memory for the whole new string
1425         wxString tmp;
1426         tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen));
1427
1428         // copy this string to tmp doing replacements on the fly
1429         size_t replNum = 0;
1430         for ( pos = 0; replNum < uiCount; replNum++ )
1431         {
1432             const size_t nextReplPos = replacePositions[replNum];
1433
1434             if ( pos != nextReplPos )
1435             {
1436                 tmp.m_impl.append(m_impl, pos, nextReplPos - pos);
1437             }
1438
1439             tmp.m_impl.append(strNew.m_impl);
1440             pos = nextReplPos + uiOldLen;
1441         }
1442
1443         if ( pos != m_impl.length() )
1444         {
1445             // append the rest of the string unchanged
1446             tmp.m_impl.append(m_impl, pos, m_impl.length() - pos);
1447         }
1448
1449         swap(tmp);
1450     }
1451
1452     return uiCount;
1453 }
1454
1455 bool wxString::IsAscii() const
1456 {
1457     for ( const_iterator i = begin(); i != end(); ++i )
1458     {
1459         if ( !(*i).IsAscii() )
1460             return false;
1461     }
1462
1463     return true;
1464 }
1465
1466 bool wxString::IsWord() const
1467 {
1468     for ( const_iterator i = begin(); i != end(); ++i )
1469     {
1470         if ( !wxIsalpha(*i) )
1471             return false;
1472     }
1473
1474     return true;
1475 }
1476
1477 bool wxString::IsNumber() const
1478 {
1479     if ( empty() )
1480         return true;
1481
1482     const_iterator i = begin();
1483
1484     if ( *i == wxT('-') || *i == wxT('+') )
1485         ++i;
1486
1487     for ( ; i != end(); ++i )
1488     {
1489         if ( !wxIsdigit(*i) )
1490             return false;
1491     }
1492
1493     return true;
1494 }
1495
1496 wxString wxString::Strip(stripType w) const
1497 {
1498     wxString s = *this;
1499     if ( w & leading ) s.Trim(false);
1500     if ( w & trailing ) s.Trim(true);
1501     return s;
1502 }
1503
1504 // ---------------------------------------------------------------------------
1505 // case conversion
1506 // ---------------------------------------------------------------------------
1507
1508 wxString& wxString::MakeUpper()
1509 {
1510   for ( iterator it = begin(), en = end(); it != en; ++it )
1511     *it = (wxChar)wxToupper(*it);
1512
1513   return *this;
1514 }
1515
1516 wxString& wxString::MakeLower()
1517 {
1518   for ( iterator it = begin(), en = end(); it != en; ++it )
1519     *it = (wxChar)wxTolower(*it);
1520
1521   return *this;
1522 }
1523
1524 wxString& wxString::MakeCapitalized()
1525 {
1526     const iterator en = end();
1527     iterator it = begin();
1528     if ( it != en )
1529     {
1530         *it = (wxChar)wxToupper(*it);
1531         for ( ++it; it != en; ++it )
1532             *it = (wxChar)wxTolower(*it);
1533     }
1534
1535     return *this;
1536 }
1537
1538 // ---------------------------------------------------------------------------
1539 // trimming and padding
1540 // ---------------------------------------------------------------------------
1541
1542 // some compilers (VC++ 6.0 not to name them) return true for a call to
1543 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1544 // to live with this by checking that the character is a 7 bit one - even if
1545 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1546 // space-like symbols somewhere except in the first 128 chars), it is arguably
1547 // still better than trimming away accented letters
1548 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1549
1550 // trims spaces (in the sense of isspace) from left or right side
1551 wxString& wxString::Trim(bool bFromRight)
1552 {
1553     // first check if we're going to modify the string at all
1554     if ( !empty() &&
1555          (
1556           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1557           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1558          )
1559        )
1560     {
1561         if ( bFromRight )
1562         {
1563             // find last non-space character
1564             reverse_iterator psz = rbegin();
1565             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1566                 ++psz;
1567
1568             // truncate at trailing space start
1569             erase(psz.base(), end());
1570         }
1571         else
1572         {
1573             // find first non-space character
1574             iterator psz = begin();
1575             while ( (psz != end()) && wxSafeIsspace(*psz) )
1576                 ++psz;
1577
1578             // fix up data and length
1579             erase(begin(), psz);
1580         }
1581     }
1582
1583     return *this;
1584 }
1585
1586 // adds nCount characters chPad to the string from either side
1587 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1588 {
1589     wxString s(chPad, nCount);
1590
1591     if ( bFromRight )
1592         *this += s;
1593     else
1594     {
1595         s += *this;
1596         swap(s);
1597     }
1598
1599     return *this;
1600 }
1601
1602 // truncate the string
1603 wxString& wxString::Truncate(size_t uiLen)
1604 {
1605     if ( uiLen < length() )
1606     {
1607         erase(begin() + uiLen, end());
1608     }
1609     //else: nothing to do, string is already short enough
1610
1611     return *this;
1612 }
1613
1614 // ---------------------------------------------------------------------------
1615 // finding (return wxNOT_FOUND if not found and index otherwise)
1616 // ---------------------------------------------------------------------------
1617
1618 // find a character
1619 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1620 {
1621     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1622
1623     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1624 }
1625
1626 // ----------------------------------------------------------------------------
1627 // conversion to numbers
1628 // ----------------------------------------------------------------------------
1629
1630 // The implementation of all the functions below is exactly the same so factor
1631 // it out. Note that number extraction works correctly on UTF-8 strings, so
1632 // we can use wxStringCharType and wx_str() for maximum efficiency.
1633
1634 #ifndef __WXWINCE__
1635     #define DO_IF_NOT_WINCE(x) x
1636 #else
1637     #define DO_IF_NOT_WINCE(x)
1638 #endif
1639
1640 #define WX_STRING_TO_X_TYPE_START                                           \
1641     wxCHECK_MSG( pVal, false, wxT("NULL output pointer") );                  \
1642     DO_IF_NOT_WINCE( errno = 0; )                                           \
1643     const wxStringCharType *start = wx_str();                               \
1644     wxStringCharType *end;
1645
1646 // notice that we return false without modifying the output parameter at all if
1647 // nothing could be parsed but we do modify it and return false then if we did
1648 // parse something successfully but not the entire string
1649 #define WX_STRING_TO_X_TYPE_END                                             \
1650     if ( end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )                 \
1651         return false;                                                       \
1652     *pVal = val;                                                            \
1653     return !*end;
1654
1655 bool wxString::ToLong(long *pVal, int base) const
1656 {
1657     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1658
1659     WX_STRING_TO_X_TYPE_START
1660     long val = wxStrtol(start, &end, base);
1661     WX_STRING_TO_X_TYPE_END
1662 }
1663
1664 bool wxString::ToULong(unsigned long *pVal, int base) const
1665 {
1666     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1667
1668     WX_STRING_TO_X_TYPE_START
1669     unsigned long val = wxStrtoul(start, &end, base);
1670     WX_STRING_TO_X_TYPE_END
1671 }
1672
1673 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1674 {
1675     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1676
1677     WX_STRING_TO_X_TYPE_START
1678     wxLongLong_t val = wxStrtoll(start, &end, base);
1679     WX_STRING_TO_X_TYPE_END
1680 }
1681
1682 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1683 {
1684     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1685
1686     WX_STRING_TO_X_TYPE_START
1687     wxULongLong_t val = wxStrtoull(start, &end, base);
1688     WX_STRING_TO_X_TYPE_END
1689 }
1690
1691 bool wxString::ToDouble(double *pVal) const
1692 {
1693     WX_STRING_TO_X_TYPE_START
1694     double val = wxStrtod(start, &end);
1695     WX_STRING_TO_X_TYPE_END
1696 }
1697
1698 #if wxUSE_XLOCALE
1699
1700 bool wxString::ToCLong(long *pVal, int base) const
1701 {
1702     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1703
1704     WX_STRING_TO_X_TYPE_START
1705 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1706     long val = wxStrtol_lA(start, &end, base, wxCLocale);
1707 #else
1708     long val = wxStrtol_l(start, &end, base, wxCLocale);
1709 #endif
1710     WX_STRING_TO_X_TYPE_END
1711 }
1712
1713 bool wxString::ToCULong(unsigned long *pVal, int base) const
1714 {
1715     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1716
1717     WX_STRING_TO_X_TYPE_START
1718 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1719     unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale);
1720 #else
1721     unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale);
1722 #endif
1723     WX_STRING_TO_X_TYPE_END
1724 }
1725
1726 bool wxString::ToCDouble(double *pVal) const
1727 {
1728     WX_STRING_TO_X_TYPE_START
1729 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1730     double val = wxStrtod_lA(start, &end, wxCLocale);
1731 #else
1732     double val = wxStrtod_l(start, &end, wxCLocale);
1733 #endif
1734     WX_STRING_TO_X_TYPE_END
1735 }
1736
1737 #else // wxUSE_XLOCALE
1738
1739 // Provide implementation of these functions even when wxUSE_XLOCALE is
1740 // disabled, we still need them in wxWidgets internal code.
1741
1742 // For integers we just assume the current locale uses the same number
1743 // representation as the C one as there is nothing else we can do.
1744 bool wxString::ToCLong(long *pVal, int base) const
1745 {
1746     return ToLong(pVal, base);
1747 }
1748
1749 bool wxString::ToCULong(unsigned long *pVal, int base) const
1750 {
1751     return ToULong(pVal, base);
1752 }
1753
1754 // For floating point numbers we have to handle the problem of the decimal
1755 // point which is different in different locales.
1756 bool wxString::ToCDouble(double *pVal) const
1757 {
1758     // Create a copy of this string using the decimal point instead of whatever
1759     // separator the current locale uses.
1760 #if wxUSE_INTL
1761     wxString sep = wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT,
1762                                      wxLOCALE_CAT_NUMBER);
1763     if ( sep == "." )
1764     {
1765         // We can avoid an unnecessary string copy in this case.
1766         return ToDouble(pVal);
1767     }
1768 #else // !wxUSE_INTL
1769     // We don't know what the current separator is so it might even be a point
1770     // already, try to parse the string as a double:
1771     if ( ToDouble(pVal) )
1772     {
1773         // It must have been the point, nothing else to do.
1774         return true;
1775     }
1776
1777     // Try to guess the separator, using the most common alternative value.
1778     wxString sep(",");
1779 #endif // wxUSE_INTL/!wxUSE_INTL
1780     wxString cstr(*this);
1781     cstr.Replace(".", sep);
1782
1783     return cstr.ToDouble(pVal);
1784 }
1785
1786 #endif  // wxUSE_XLOCALE/!wxUSE_XLOCALE
1787
1788 // ---------------------------------------------------------------------------
1789 // formatted output
1790 // ---------------------------------------------------------------------------
1791
1792 #if !wxUSE_UTF8_LOCALE_ONLY
1793 /* static */
1794 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1795 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1796 #else
1797 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1798 #endif
1799 {
1800     va_list argptr;
1801     va_start(argptr, format);
1802
1803     wxString s;
1804     s.PrintfV(format, argptr);
1805
1806     va_end(argptr);
1807
1808     return s;
1809 }
1810 #endif // !wxUSE_UTF8_LOCALE_ONLY
1811
1812 #if wxUSE_UNICODE_UTF8
1813 /* static */
1814 wxString wxString::DoFormatUtf8(const char *format, ...)
1815 {
1816     va_list argptr;
1817     va_start(argptr, format);
1818
1819     wxString s;
1820     s.PrintfV(format, argptr);
1821
1822     va_end(argptr);
1823
1824     return s;
1825 }
1826 #endif // wxUSE_UNICODE_UTF8
1827
1828 /* static */
1829 wxString wxString::FormatV(const wxString& format, va_list argptr)
1830 {
1831     wxString s;
1832     s.PrintfV(format, argptr);
1833     return s;
1834 }
1835
1836 #if !wxUSE_UTF8_LOCALE_ONLY
1837 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1838 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1839 #else
1840 int wxString::DoPrintfWchar(const wxChar *format, ...)
1841 #endif
1842 {
1843     va_list argptr;
1844     va_start(argptr, format);
1845
1846 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1847     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1848     // because it's the only cast that works safely for downcasting when
1849     // multiple inheritance is used:
1850     wxString *str = static_cast<wxString*>(this);
1851 #else
1852     wxString *str = this;
1853 #endif
1854
1855     int iLen = str->PrintfV(format, argptr);
1856
1857     va_end(argptr);
1858
1859     return iLen;
1860 }
1861 #endif // !wxUSE_UTF8_LOCALE_ONLY
1862
1863 #if wxUSE_UNICODE_UTF8
1864 int wxString::DoPrintfUtf8(const char *format, ...)
1865 {
1866     va_list argptr;
1867     va_start(argptr, format);
1868
1869     int iLen = PrintfV(format, argptr);
1870
1871     va_end(argptr);
1872
1873     return iLen;
1874 }
1875 #endif // wxUSE_UNICODE_UTF8
1876
1877 /*
1878     Uses wxVsnprintf and places the result into the this string.
1879
1880     In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1881     it is vswprintf.  Due to a discrepancy between vsnprintf and vswprintf in
1882     the ISO C99 (and thus SUSv3) standard the return value for the case of
1883     an undersized buffer is inconsistent.  For conforming vsnprintf
1884     implementations the function must return the number of characters that
1885     would have been printed had the buffer been large enough.  For conforming
1886     vswprintf implementations the function must return a negative number
1887     and set errno.
1888
1889     What vswprintf sets errno to is undefined but Darwin seems to set it to
1890     EOVERFLOW.  The only expected errno are EILSEQ and EINVAL.  Both of
1891     those are defined in the standard and backed up by several conformance
1892     statements.  Note that ENOMEM mentioned in the manual page does not
1893     apply to swprintf, only wprintf and fwprintf.
1894
1895     Official manual page:
1896     http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1897
1898     Some conformance statements (AIX, Solaris):
1899     http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1900     http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1901
1902     Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1903     EILSEQ and EINVAL are specifically defined to mean the error is other than
1904     an undersized buffer and no other errno are defined we treat those two
1905     as meaning hard errors and everything else gets the old behavior which
1906     is to keep looping and increasing buffer size until the function succeeds.
1907
1908     In practice it's impossible to determine before compilation which behavior
1909     may be used.  The vswprintf function may have vsnprintf-like behavior or
1910     vice-versa.  Behavior detected on one release can theoretically change
1911     with an updated release.  Not to mention that configure testing for it
1912     would require the test to be run on the host system, not the build system
1913     which makes cross compilation difficult. Therefore, we make no assumptions
1914     about behavior and try our best to handle every known case, including the
1915     case where wxVsnprintf returns a negative number and fails to set errno.
1916
1917     There is yet one more non-standard implementation and that is our own.
1918     Fortunately, that can be detected at compile-time.
1919
1920     On top of all that, ISO C99 explicitly defines snprintf to write a null
1921     character to the last position of the specified buffer.  That would be at
1922     at the given buffer size minus 1.  It is supposed to do this even if it
1923     turns out that the buffer is sized too small.
1924
1925     Darwin (tested on 10.5) follows the C99 behavior exactly.
1926
1927     Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1928     errno even when it fails.  However, it only seems to ever fail due
1929     to an undersized buffer.
1930 */
1931 #if wxUSE_UNICODE_UTF8
1932 template<typename BufferType>
1933 #else
1934 // we only need one version in non-UTF8 builds and at least two Windows
1935 // compilers have problems with this function template, so use just one
1936 // normal function here
1937 #endif
1938 static int DoStringPrintfV(wxString& str,
1939                            const wxString& format, va_list argptr)
1940 {
1941     int size = 1024;
1942
1943     for ( ;; )
1944     {
1945 #if wxUSE_UNICODE_UTF8
1946         BufferType tmp(str, size + 1);
1947         typename BufferType::CharType *buf = tmp;
1948 #else
1949         wxStringBuffer tmp(str, size + 1);
1950         wxChar *buf = tmp;
1951 #endif
1952
1953         if ( !buf )
1954         {
1955             // out of memory
1956
1957             // in UTF-8 build, leaving uninitialized junk in the buffer
1958             // could result in invalid non-empty UTF-8 string, so just
1959             // reset the string to empty on failure:
1960             buf[0] = '\0';
1961             return -1;
1962         }
1963
1964         // wxVsnprintf() may modify the original arg pointer, so pass it
1965         // only a copy
1966         va_list argptrcopy;
1967         wxVaCopy(argptrcopy, argptr);
1968
1969 #ifndef __WXWINCE__
1970         // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1971         errno = 0;
1972 #endif
1973         int len = wxVsnprintf(buf, size, format, argptrcopy);
1974         va_end(argptrcopy);
1975
1976         // some implementations of vsnprintf() don't NUL terminate
1977         // the string if there is not enough space for it so
1978         // always do it manually
1979         // FIXME: This really seems to be the wrong and would be an off-by-one
1980         // bug except the code above allocates an extra character.
1981         buf[size] = wxT('\0');
1982
1983         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1984         // total number of characters which would have been written if the
1985         // buffer were large enough (newer standards such as Unix98)
1986         if ( len < 0 )
1987         {
1988             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1989             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1990             //     is true if *both* of them use our own implementation,
1991             //     otherwise we can't be sure
1992 #if wxUSE_WXVSNPRINTF
1993             // we know that our own implementation of wxVsnprintf() returns -1
1994             // only for a format error - thus there's something wrong with
1995             // the user's format string
1996             buf[0] = '\0';
1997             return -1;
1998 #else // possibly using system version
1999             // assume it only returns error if there is not enough space, but
2000             // as we don't know how much we need, double the current size of
2001             // the buffer
2002 #ifndef __WXWINCE__
2003             if( (errno == EILSEQ) || (errno == EINVAL) )
2004             // If errno was set to one of the two well-known hard errors
2005             // then fail immediately to avoid an infinite loop.
2006                 return -1;
2007             else
2008 #endif // __WXWINCE__
2009             // still not enough, as we don't know how much we need, double the
2010             // current size of the buffer
2011                 size *= 2;
2012 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
2013         }
2014         else if ( len >= size )
2015         {
2016 #if wxUSE_WXVSNPRINTF
2017             // we know that our own implementation of wxVsnprintf() returns
2018             // size+1 when there's not enough space but that's not the size
2019             // of the required buffer!
2020             size *= 2;      // so we just double the current size of the buffer
2021 #else
2022             // some vsnprintf() implementations NUL-terminate the buffer and
2023             // some don't in len == size case, to be safe always add 1
2024             // FIXME: I don't quite understand this comment.  The vsnprintf
2025             // function is specifically defined to return the number of
2026             // characters printed not including the null terminator.
2027             // So OF COURSE you need to add 1 to get the right buffer size.
2028             // The following line is definitely correct, no question.
2029             size = len + 1;
2030 #endif
2031         }
2032         else // ok, there was enough space
2033         {
2034             break;
2035         }
2036     }
2037
2038     // we could have overshot
2039     str.Shrink();
2040
2041     return str.length();
2042 }
2043
2044 int wxString::PrintfV(const wxString& format, va_list argptr)
2045 {
2046 #if wxUSE_UNICODE_UTF8
2047     #if wxUSE_STL_BASED_WXSTRING
2048         typedef wxStringTypeBuffer<char> Utf8Buffer;
2049     #else
2050         typedef wxStringInternalBuffer Utf8Buffer;
2051     #endif
2052 #endif
2053
2054 #if wxUSE_UTF8_LOCALE_ONLY
2055     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2056 #else
2057     #if wxUSE_UNICODE_UTF8
2058     if ( wxLocaleIsUtf8 )
2059         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2060     else
2061         // wxChar* version
2062         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2063     #else
2064         return DoStringPrintfV(*this, format, argptr);
2065     #endif // UTF8/WCHAR
2066 #endif
2067 }
2068
2069 // ----------------------------------------------------------------------------
2070 // misc other operations
2071 // ----------------------------------------------------------------------------
2072
2073 // returns true if the string matches the pattern which may contain '*' and
2074 // '?' metacharacters (as usual, '?' matches any character and '*' any number
2075 // of them)
2076 bool wxString::Matches(const wxString& mask) const
2077 {
2078     // I disable this code as it doesn't seem to be faster (in fact, it seems
2079     // to be much slower) than the old, hand-written code below and using it
2080     // here requires always linking with libregex even if the user code doesn't
2081     // use it
2082 #if 0 // wxUSE_REGEX
2083     // first translate the shell-like mask into a regex
2084     wxString pattern;
2085     pattern.reserve(wxStrlen(pszMask));
2086
2087     pattern += wxT('^');
2088     while ( *pszMask )
2089     {
2090         switch ( *pszMask )
2091         {
2092             case wxT('?'):
2093                 pattern += wxT('.');
2094                 break;
2095
2096             case wxT('*'):
2097                 pattern += wxT(".*");
2098                 break;
2099
2100             case wxT('^'):
2101             case wxT('.'):
2102             case wxT('$'):
2103             case wxT('('):
2104             case wxT(')'):
2105             case wxT('|'):
2106             case wxT('+'):
2107             case wxT('\\'):
2108                 // these characters are special in a RE, quote them
2109                 // (however note that we don't quote '[' and ']' to allow
2110                 // using them for Unix shell like matching)
2111                 pattern += wxT('\\');
2112                 // fall through
2113
2114             default:
2115                 pattern += *pszMask;
2116         }
2117
2118         pszMask++;
2119     }
2120     pattern += wxT('$');
2121
2122     // and now use it
2123     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2124 #else // !wxUSE_REGEX
2125   // TODO: this is, of course, awfully inefficient...
2126
2127   // FIXME-UTF8: implement using iterators, remove #if
2128 #if wxUSE_UNICODE_UTF8
2129   const wxScopedWCharBuffer maskBuf = mask.wc_str();
2130   const wxScopedWCharBuffer txtBuf = wc_str();
2131   const wxChar *pszMask = maskBuf.data();
2132   const wxChar *pszTxt = txtBuf.data();
2133 #else
2134   const wxChar *pszMask = mask.wx_str();
2135   // the char currently being checked
2136   const wxChar *pszTxt = wx_str();
2137 #endif
2138
2139   // the last location where '*' matched
2140   const wxChar *pszLastStarInText = NULL;
2141   const wxChar *pszLastStarInMask = NULL;
2142
2143 match:
2144   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
2145     switch ( *pszMask ) {
2146       case wxT('?'):
2147         if ( *pszTxt == wxT('\0') )
2148           return false;
2149
2150         // pszTxt and pszMask will be incremented in the loop statement
2151
2152         break;
2153
2154       case wxT('*'):
2155         {
2156           // remember where we started to be able to backtrack later
2157           pszLastStarInText = pszTxt;
2158           pszLastStarInMask = pszMask;
2159
2160           // ignore special chars immediately following this one
2161           // (should this be an error?)
2162           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
2163             pszMask++;
2164
2165           // if there is nothing more, match
2166           if ( *pszMask == wxT('\0') )
2167             return true;
2168
2169           // are there any other metacharacters in the mask?
2170           size_t uiLenMask;
2171           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2172
2173           if ( pEndMask != NULL ) {
2174             // we have to match the string between two metachars
2175             uiLenMask = pEndMask - pszMask;
2176           }
2177           else {
2178             // we have to match the remainder of the string
2179             uiLenMask = wxStrlen(pszMask);
2180           }
2181
2182           wxString strToMatch(pszMask, uiLenMask);
2183           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2184           if ( pMatch == NULL )
2185             return false;
2186
2187           // -1 to compensate "++" in the loop
2188           pszTxt = pMatch + uiLenMask - 1;
2189           pszMask += uiLenMask - 1;
2190         }
2191         break;
2192
2193       default:
2194         if ( *pszMask != *pszTxt )
2195           return false;
2196         break;
2197     }
2198   }
2199
2200   // match only if nothing left
2201   if ( *pszTxt == wxT('\0') )
2202     return true;
2203
2204   // if we failed to match, backtrack if we can
2205   if ( pszLastStarInText ) {
2206     pszTxt = pszLastStarInText + 1;
2207     pszMask = pszLastStarInMask;
2208
2209     pszLastStarInText = NULL;
2210
2211     // don't bother resetting pszLastStarInMask, it's unnecessary
2212
2213     goto match;
2214   }
2215
2216   return false;
2217 #endif // wxUSE_REGEX/!wxUSE_REGEX
2218 }
2219
2220 // Count the number of chars
2221 int wxString::Freq(wxUniChar ch) const
2222 {
2223     int count = 0;
2224     for ( const_iterator i = begin(); i != end(); ++i )
2225     {
2226         if ( *i == ch )
2227             count ++;
2228     }
2229     return count;
2230 }
2231