src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27     #include "wx/log.h"
  28 #endif
  29
  30 #include <ctype.h>
  31
  32 #ifndef __WXWINCE__
  33     #include <errno.h>
  34 #endif
  35
  36 #include <string.h>
  37 #include <stdlib.h>
  38
  39 #include "wx/hashmap.h"
  40 #include "wx/vector.h"
  41 #include "wx/xlocale.h"
  42
  43 #ifdef __WXMSW__
  44     #include "wx/msw/wrapwin.h"
  45 #endif // __WXMSW__
  46
  47 // string handling functions used by wxString:
  48 #if wxUSE_UNICODE_UTF8
  49     #define wxStringMemcpy   memcpy
  50     #define wxStringMemcmp   memcmp
  51     #define wxStringMemchr   memchr
  52     #define wxStringStrlen   strlen
  53 #else
  54     #define wxStringMemcpy   wxTmemcpy
  55     #define wxStringMemcmp   wxTmemcmp
  56     #define wxStringMemchr   wxTmemchr
  57     #define wxStringStrlen   wxStrlen
  58 #endif
  59
  60 // define a function declared in wx/buffer.h here as we don't have buffer.cpp
  61 // and don't want to add it just because of this simple function
  62 namespace wxPrivate
  63 {
  64
  65 // wxXXXBuffer classes can be (implicitly) used during global statics
  66 // initialization so wrap the status UntypedBufferData variable in a function
  67 // to make it safe to access it even before all global statics are initialized
  68 UntypedBufferData *GetUntypedNullData()
  69 {
  70     static UntypedBufferData s_untypedNullData(NULL, 0);
  71
  72     return &s_untypedNullData;
  73 }
  74
  75 } // namespace wxPrivate
  76
  77 // ---------------------------------------------------------------------------
  78 // static class variables definition
  79 // ---------------------------------------------------------------------------
  80
  81 //According to STL _must_ be a -1 size_t
  82 const size_t wxString::npos = (size_t) -1;
  83
  84 #if wxUSE_STRING_POS_CACHE
  85
  86 #ifdef wxHAS_COMPILER_TLS
  87
  88 wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
  89
  90 #else // !wxHAS_COMPILER_TLS
  91
  92 struct wxStrCacheInitializer
  93 {
  94     wxStrCacheInitializer()
  95     {
  96         // calling this function triggers s_cache initialization in it, and
  97         // from now on it becomes safe to call from multiple threads
  98         wxString::GetCache();
  99     }
 100 };
 101
 102 /*
 103 wxString::Cache& wxString::GetCache()
 104 {
 105     static wxTLS_TYPE(Cache) s_cache;
 106
 107     return wxTLS_VALUE(s_cache);
 108 }
 109 */
 110
 111 static wxStrCacheInitializer gs_stringCacheInit;
 112
 113 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
 114
 115 // gdb seems to be unable to display thread-local variables correctly, at least
 116 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
 117 #if wxDEBUG_LEVEL >= 2
 118
 119 struct wxStrCacheDumper
 120 {
 121     static void ShowAll()
 122     {
 123         puts("*** wxString cache dump:");
 124         for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
 125         {
 126             const wxString::Cache::Element&
 127                 c = wxString::GetCacheBegin()[n];
 128
 129             printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
 130                    n,
 131                    n == wxString::LastUsedCacheElement() ? " [*]" : "",
 132                    c.str,
 133                    (unsigned long)c.pos,
 134                    (unsigned long)c.impl,
 135                    (long)c.len);
 136         }
 137     }
 138 };
 139
 140 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
 141
 142 #endif // wxDEBUG_LEVEL >= 2
 143
 144 #ifdef wxPROFILE_STRING_CACHE
 145
 146 wxString::CacheStats wxString::ms_cacheStats;
 147
 148 struct wxStrCacheStatsDumper
 149 {
 150     ~wxStrCacheStatsDumper()
 151     {
 152         const wxString::CacheStats& stats = wxString::ms_cacheStats;
 153
 154         if ( stats.postot )
 155         {
 156             puts("*** wxString cache statistics:");
 157             printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
 158                    stats.postot);
 159             printf("\tHits %u (of which %u not used) or %.2f%%\n",
 160                    stats.poshits,
 161                    stats.mishits,
 162                    100.*float(stats.poshits - stats.mishits)/stats.postot);
 163             printf("\tAverage position requested: %.2f\n",
 164                    float(stats.sumpos) / stats.postot);
 165             printf("\tAverage offset after cached hint: %.2f\n",
 166                    float(stats.sumofs) / stats.postot);
 167         }
 168
 169         if ( stats.lentot )
 170         {
 171             printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
 172                    stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
 173         }
 174     }
 175 };
 176
 177 static wxStrCacheStatsDumper s_showCacheStats;
 178
 179 #endif // wxPROFILE_STRING_CACHE
 180
 181 #endif // wxUSE_STRING_POS_CACHE
 182
 183 // ----------------------------------------------------------------------------
 184 // global functions
 185 // ----------------------------------------------------------------------------
 186
 187 #if wxUSE_STD_IOSTREAM
 188
 189 #include <iostream>
 190
 191 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
 192 {
 193 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
 194     const wxScopedCharBuffer buf(str.AsCharBuf());
 195     if ( !buf )
 196         os.clear(wxSTD ios_base::failbit);
 197     else
 198         os << buf.data();
 199
 200     return os;
 201 #else
 202     return os << str.AsInternal();
 203 #endif
 204 }
 205
 206 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
 207 {
 208     return os << str.c_str();
 209 }
 210
 211 wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str)
 212 {
 213     return os << str.data();
 214 }
 215
 216 #ifndef __BORLANDC__
 217 wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str)
 218 {
 219     return os << str.data();
 220 }
 221 #endif
 222
 223 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 224
 225 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
 226 {
 227     return wos << str.wc_str();
 228 }
 229
 230 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
 231 {
 232     return wos << str.AsWChar();
 233 }
 234
 235 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str)
 236 {
 237     return wos << str.data();
 238 }
 239
 240 #endif  // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 241
 242 #endif // wxUSE_STD_IOSTREAM
 243
 244 // ===========================================================================
 245 // wxString class core
 246 // ===========================================================================
 247
 248 #if wxUSE_UNICODE_UTF8
 249
 250 void wxString::PosLenToImpl(size_t pos, size_t len,
 251                             size_t *implPos, size_t *implLen) const
 252 {
 253     if ( pos == npos )
 254     {
 255         *implPos = npos;
 256     }
 257     else // have valid start position
 258     {
 259         const const_iterator b = GetIterForNthChar(pos);
 260         *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
 261         if ( len == npos )
 262         {
 263             *implLen = npos;
 264         }
 265         else // have valid length too
 266         {
 267             // we need to handle the case of length specifying a substring
 268             // going beyond the end of the string, just as std::string does
 269             const const_iterator e(end());
 270             const_iterator i(b);
 271             while ( len && i <= e )
 272             {
 273                 ++i;
 274                 --len;
 275             }
 276
 277             *implLen = i.impl() - b.impl();
 278         }
 279     }
 280 }
 281
 282 #endif // wxUSE_UNICODE_UTF8
 283
 284 // ----------------------------------------------------------------------------
 285 // wxCStrData converted strings caching
 286 // ----------------------------------------------------------------------------
 287
 288 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 289 //             string objects; re-enable after fixing this bug and benchmarking
 290 //             performance to see if using a hash is a good idea at all
 291 #if 0
 292
 293 // For backward compatibility reasons, it must be possible to assign the value
 294 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 295 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 296 // because the memory would be freed immediately, but it has to be valid as long
 297 // as the string is not modified, so that code like this still works:
 298 //
 299 // const wxChar *s = str.c_str();
 300 // while ( s ) { ... }
 301
 302 // FIXME-UTF8: not thread safe!
 303 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 304 //             destroyed, but we should do it when the string is modified, to
 305 //             keep memory usage down
 306 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 307 //             invalidated the cache on every change, we could keep the previous
 308 //             conversion
 309 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 310 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 311
 312 template<typename T>
 313 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 314 {
 315     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 316     if ( i != hash.end() )
 317     {
 318         free(i->second);
 319         hash.erase(i);
 320     }
 321 }
 322
 323 #if wxUSE_UNICODE
 324 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 325 //     so we have to use wxString* here and const-cast when used
 326 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 327                     wxStringCharConversionCache);
 328 static wxStringCharConversionCache gs_stringsCharCache;
 329
 330 const char* wxCStrData::AsChar() const
 331 {
 332     // remove previously cache value, if any (see FIXMEs above):
 333     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 334
 335     // convert the string and keep it:
 336     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 337         m_str->mb_str().release();
 338
 339     return s + m_offset;
 340 }
 341 #endif // wxUSE_UNICODE
 342
 343 #if !wxUSE_UNICODE_WCHAR
 344 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 345                     wxStringWCharConversionCache);
 346 static wxStringWCharConversionCache gs_stringsWCharCache;
 347
 348 const wchar_t* wxCStrData::AsWChar() const
 349 {
 350     // remove previously cache value, if any (see FIXMEs above):
 351     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 352
 353     // convert the string and keep it:
 354     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 355         m_str->wc_str().release();
 356
 357     return s + m_offset;
 358 }
 359 #endif // !wxUSE_UNICODE_WCHAR
 360
 361 wxString::~wxString()
 362 {
 363 #if wxUSE_UNICODE
 364     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 365     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 366 #endif
 367 #if !wxUSE_UNICODE_WCHAR
 368     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 369 #endif
 370 }
 371 #endif
 372
 373 // ===========================================================================
 374 // wxString class core
 375 // ===========================================================================
 376
 377 // ---------------------------------------------------------------------------
 378 // construction and conversion
 379 // ---------------------------------------------------------------------------
 380
 381 #if wxUSE_UNICODE_WCHAR
 382 /* static */
 383 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 384                                                const wxMBConv& conv)
 385 {
 386     // anything to do?
 387     if ( !psz || nLength == 0 )
 388         return SubstrBufFromMB(wxWCharBuffer(L""), 0);
 389
 390     if ( nLength == npos )
 391         nLength = wxNO_LEN;
 392
 393     size_t wcLen;
 394     wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 395     if ( !wcLen )
 396         return SubstrBufFromMB(wxWCharBuffer(L""), 0);
 397     else
 398         return SubstrBufFromMB(wcBuf, wcLen);
 399 }
 400 #endif // wxUSE_UNICODE_WCHAR
 401
 402 #if wxUSE_UNICODE_UTF8
 403 /* static */
 404 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 405                                                const wxMBConv& conv)
 406 {
 407     // anything to do?
 408     if ( !psz || nLength == 0 )
 409         return SubstrBufFromMB(wxCharBuffer(""), 0);
 410
 411     // if psz is already in UTF-8, we don't have to do the roundtrip to
 412     // wchar_t* and back:
 413     if ( conv.IsUTF8() )
 414     {
 415         // we need to validate the input because UTF8 iterators assume valid
 416         // UTF-8 sequence and psz may be invalid:
 417         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 418         {
 419             // we must pass the real string length to SubstrBufFromMB ctor
 420             if ( nLength == npos )
 421                 nLength = psz ? strlen(psz) : 0;
 422             return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz, nLength),
 423                                    nLength);
 424         }
 425         // else: do the roundtrip through wchar_t*
 426     }
 427
 428     if ( nLength == npos )
 429         nLength = wxNO_LEN;
 430
 431     // first convert to wide string:
 432     size_t wcLen;
 433     wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 434     if ( !wcLen )
 435         return SubstrBufFromMB(wxCharBuffer(""), 0);
 436
 437     // and then to UTF-8:
 438     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
 439     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 440     wxASSERT_MSG( buf.data, wxT("conversion to UTF-8 failed") );
 441
 442     return buf;
 443 }
 444 #endif // wxUSE_UNICODE_UTF8
 445
 446 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 447 /* static */
 448 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 449                                                const wxMBConv& conv)
 450 {
 451     // anything to do?
 452     if ( !pwz || nLength == 0 )
 453         return SubstrBufFromWC(wxCharBuffer(""), 0);
 454
 455     if ( nLength == npos )
 456         nLength = wxNO_LEN;
 457
 458     size_t mbLen;
 459     wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 460     if ( !mbLen )
 461         return SubstrBufFromWC(wxCharBuffer(""), 0);
 462     else
 463         return SubstrBufFromWC(mbBuf, mbLen);
 464 }
 465 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 466
 467 // This std::string::c_str()-like method returns a wide char pointer to string
 468 // contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return
 469 // a pointer to the internal representation. Otherwise a conversion is required
 470 // and it returns a temporary buffer.
 471 //
 472 // However for compatibility with c_str() and to avoid breaking existing code
 473 // doing
 474 //
 475 //      for ( const wchar_t *p = s.wc_str(); *p; p++ )
 476 //          ... use *p...
 477 //
 478 // we actually need to ensure that the returned buffer is _not_ temporary and
 479 // so we use wxString::m_convertedToWChar to store the returned data
 480 #if !wxUSE_UNICODE_WCHAR
 481
 482 const wchar_t *wxString::AsWChar(const wxMBConv& conv) const
 483 {
 484     const char * const strMB = m_impl.c_str();
 485     const size_t lenMB = m_impl.length();
 486
 487     // find out the size of the buffer needed
 488     const size_t lenWC = conv.ToWChar(NULL, 0, strMB, lenMB);
 489     if ( lenWC == wxCONV_FAILED )
 490         return NULL;
 491
 492     // keep the same buffer if the string size didn't change: this is not only
 493     // an optimization but also ensure that code which modifies string
 494     // character by character (without changing its length) can continue to use
 495     // the pointer returned by a previous wc_str() call even after changing the
 496     // string
 497
 498     // TODO-UTF8: we could check for ">" instead of "!=" here as this would
 499     //            allow to save on buffer reallocations but at the cost of
 500     //            consuming (even) more memory, we should benchmark this to
 501     //            determine if it's worth doing
 502     if ( !m_convertedToWChar.m_str || lenWC != m_convertedToWChar.m_len )
 503     {
 504         if ( !const_cast<wxString *>(this)->m_convertedToWChar.Extend(lenWC) )
 505             return NULL;
 506     }
 507
 508     // finally do convert
 509     m_convertedToWChar.m_str[lenWC] = L'\0';
 510     if ( conv.ToWChar(m_convertedToWChar.m_str, lenWC,
 511                       strMB, lenMB) == wxCONV_FAILED )
 512         return NULL;
 513
 514     return m_convertedToWChar.m_str;
 515 }
 516
 517 #endif // !wxUSE_UNICODE_WCHAR
 518
 519
 520 // Same thing for mb_str() which returns a normal char pointer to string
 521 // contents: this always requires converting it to the specified encoding in
 522 // non-ANSI build except if we need to convert to UTF-8 and this is what we
 523 // already use internally.
 524 #if wxUSE_UNICODE
 525
 526 const char *wxString::AsChar(const wxMBConv& conv) const
 527 {
 528 #if wxUSE_UNICODE_UTF8
 529     if ( conv.IsUTF8() )
 530         return m_impl.c_str();
 531
 532     const wchar_t * const strWC = AsWChar(wxMBConvStrictUTF8());
 533     const size_t lenWC = m_convertedToWChar.m_len;
 534 #else // wxUSE_UNICODE_WCHAR
 535     const wchar_t * const strWC = m_impl.c_str();
 536     const size_t lenWC = m_impl.length();
 537 #endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR
 538
 539     const size_t lenMB = conv.FromWChar(NULL, 0, strWC, lenWC);
 540     if ( lenMB == wxCONV_FAILED )
 541         return NULL;
 542
 543     if ( !m_convertedToChar.m_str || lenMB != m_convertedToChar.m_len )
 544     {
 545         if ( !const_cast<wxString *>(this)->m_convertedToChar.Extend(lenMB) )
 546             return NULL;
 547     }
 548
 549     m_convertedToChar.m_str[lenMB] = '\0';
 550     if ( conv.FromWChar(m_convertedToChar.m_str, lenMB,
 551                         strWC, lenWC) == wxCONV_FAILED )
 552         return NULL;
 553
 554     return m_convertedToChar.m_str;
 555 }
 556
 557 #endif // wxUSE_UNICODE
 558
 559 // shrink to minimal size (releasing extra memory)
 560 bool wxString::Shrink()
 561 {
 562   wxString tmp(begin(), end());
 563   swap(tmp);
 564   return tmp.length() == length();
 565 }
 566
 567 // deprecated compatibility code:
 568 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 569 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 570 {
 571     return DoGetWriteBuf(nLen);
 572 }
 573
 574 void wxString::UngetWriteBuf()
 575 {
 576     DoUngetWriteBuf();
 577 }
 578
 579 void wxString::UngetWriteBuf(size_t nLen)
 580 {
 581     DoUngetWriteBuf(nLen);
 582 }
 583 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 584
 585
 586 // ---------------------------------------------------------------------------
 587 // data access
 588 // ---------------------------------------------------------------------------
 589
 590 // all functions are inline in string.h
 591
 592 // ---------------------------------------------------------------------------
 593 // concatenation operators
 594 // ---------------------------------------------------------------------------
 595
 596 /*
 597  * concatenation functions come in 5 flavours:
 598  *  string + string
 599  *  char   + string      and      string + char
 600  *  C str  + string      and      string + C str
 601  */
 602
 603 wxString operator+(const wxString& str1, const wxString& str2)
 604 {
 605 #if !wxUSE_STL_BASED_WXSTRING
 606     wxASSERT( str1.IsValid() );
 607     wxASSERT( str2.IsValid() );
 608 #endif
 609
 610     wxString s = str1;
 611     s += str2;
 612
 613     return s;
 614 }
 615
 616 wxString operator+(const wxString& str, wxUniChar ch)
 617 {
 618 #if !wxUSE_STL_BASED_WXSTRING
 619     wxASSERT( str.IsValid() );
 620 #endif
 621
 622     wxString s = str;
 623     s += ch;
 624
 625     return s;
 626 }
 627
 628 wxString operator+(wxUniChar ch, const wxString& str)
 629 {
 630 #if !wxUSE_STL_BASED_WXSTRING
 631     wxASSERT( str.IsValid() );
 632 #endif
 633
 634     wxString s = ch;
 635     s += str;
 636
 637     return s;
 638 }
 639
 640 wxString operator+(const wxString& str, const char *psz)
 641 {
 642 #if !wxUSE_STL_BASED_WXSTRING
 643     wxASSERT( str.IsValid() );
 644 #endif
 645
 646     wxString s;
 647     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 648         wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
 649     }
 650     s += str;
 651     s += psz;
 652
 653     return s;
 654 }
 655
 656 wxString operator+(const wxString& str, const wchar_t *pwz)
 657 {
 658 #if !wxUSE_STL_BASED_WXSTRING
 659     wxASSERT( str.IsValid() );
 660 #endif
 661
 662     wxString s;
 663     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 664         wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
 665     }
 666     s += str;
 667     s += pwz;
 668
 669     return s;
 670 }
 671
 672 wxString operator+(const char *psz, const wxString& str)
 673 {
 674 #if !wxUSE_STL_BASED_WXSTRING
 675     wxASSERT( str.IsValid() );
 676 #endif
 677
 678     wxString s;
 679     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 680         wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
 681     }
 682     s = psz;
 683     s += str;
 684
 685     return s;
 686 }
 687
 688 wxString operator+(const wchar_t *pwz, const wxString& str)
 689 {
 690 #if !wxUSE_STL_BASED_WXSTRING
 691     wxASSERT( str.IsValid() );
 692 #endif
 693
 694     wxString s;
 695     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 696         wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
 697     }
 698     s = pwz;
 699     s += str;
 700
 701     return s;
 702 }
 703
 704 // ---------------------------------------------------------------------------
 705 // string comparison
 706 // ---------------------------------------------------------------------------
 707
 708 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 709 {
 710     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 711                                : wxToupper(GetChar(0u)) == wxToupper(c));
 712 }
 713
 714 #ifdef HAVE_STD_STRING_COMPARE
 715
 716 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 717 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 718 //     sort strings in characters code point order by sorting the byte sequence
 719 //     in byte values order (i.e. what strcmp() and memcmp() do).
 720
 721 int wxString::compare(const wxString& str) const
 722 {
 723     return m_impl.compare(str.m_impl);
 724 }
 725
 726 int wxString::compare(size_t nStart, size_t nLen,
 727                       const wxString& str) const
 728 {
 729     size_t pos, len;
 730     PosLenToImpl(nStart, nLen, &pos, &len);
 731     return m_impl.compare(pos, len, str.m_impl);
 732 }
 733
 734 int wxString::compare(size_t nStart, size_t nLen,
 735                       const wxString& str,
 736                       size_t nStart2, size_t nLen2) const
 737 {
 738     size_t pos, len;
 739     PosLenToImpl(nStart, nLen, &pos, &len);
 740
 741     size_t pos2, len2;
 742     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 743
 744     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 745 }
 746
 747 int wxString::compare(const char* sz) const
 748 {
 749     return m_impl.compare(ImplStr(sz));
 750 }
 751
 752 int wxString::compare(const wchar_t* sz) const
 753 {
 754     return m_impl.compare(ImplStr(sz));
 755 }
 756
 757 int wxString::compare(size_t nStart, size_t nLen,
 758                       const char* sz, size_t nCount) const
 759 {
 760     size_t pos, len;
 761     PosLenToImpl(nStart, nLen, &pos, &len);
 762
 763     SubstrBufFromMB str(ImplStr(sz, nCount));
 764
 765     return m_impl.compare(pos, len, str.data, str.len);
 766 }
 767
 768 int wxString::compare(size_t nStart, size_t nLen,
 769                       const wchar_t* sz, size_t nCount) const
 770 {
 771     size_t pos, len;
 772     PosLenToImpl(nStart, nLen, &pos, &len);
 773
 774     SubstrBufFromWC str(ImplStr(sz, nCount));
 775
 776     return m_impl.compare(pos, len, str.data, str.len);
 777 }
 778
 779 #else // !HAVE_STD_STRING_COMPARE
 780
 781 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 782                           const wxStringCharType* s2, size_t l2)
 783 {
 784     if( l1 == l2 )
 785         return wxStringMemcmp(s1, s2, l1);
 786     else if( l1 < l2 )
 787     {
 788         int ret = wxStringMemcmp(s1, s2, l1);
 789         return ret == 0 ? -1 : ret;
 790     }
 791     else
 792     {
 793         int ret = wxStringMemcmp(s1, s2, l2);
 794         return ret == 0 ? +1 : ret;
 795     }
 796 }
 797
 798 int wxString::compare(const wxString& str) const
 799 {
 800     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 801                      str.m_impl.data(), str.m_impl.length());
 802 }
 803
 804 int wxString::compare(size_t nStart, size_t nLen,
 805                       const wxString& str) const
 806 {
 807     wxASSERT(nStart <= length());
 808     size_type strLen = length() - nStart;
 809     nLen = strLen < nLen ? strLen : nLen;
 810
 811     size_t pos, len;
 812     PosLenToImpl(nStart, nLen, &pos, &len);
 813
 814     return ::wxDoCmp(m_impl.data() + pos,  len,
 815                      str.m_impl.data(), str.m_impl.length());
 816 }
 817
 818 int wxString::compare(size_t nStart, size_t nLen,
 819                       const wxString& str,
 820                       size_t nStart2, size_t nLen2) const
 821 {
 822     wxASSERT(nStart <= length());
 823     wxASSERT(nStart2 <= str.length());
 824     size_type strLen  =     length() - nStart,
 825               strLen2 = str.length() - nStart2;
 826     nLen  = strLen  < nLen  ? strLen  : nLen;
 827     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 828
 829     size_t pos, len;
 830     PosLenToImpl(nStart, nLen, &pos, &len);
 831     size_t pos2, len2;
 832     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 833
 834     return ::wxDoCmp(m_impl.data() + pos, len,
 835                      str.m_impl.data() + pos2, len2);
 836 }
 837
 838 int wxString::compare(const char* sz) const
 839 {
 840     SubstrBufFromMB str(ImplStr(sz, npos));
 841     if ( str.len == npos )
 842         str.len = wxStringStrlen(str.data);
 843     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 844 }
 845
 846 int wxString::compare(const wchar_t* sz) const
 847 {
 848     SubstrBufFromWC str(ImplStr(sz, npos));
 849     if ( str.len == npos )
 850         str.len = wxStringStrlen(str.data);
 851     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 852 }
 853
 854 int wxString::compare(size_t nStart, size_t nLen,
 855                       const char* sz, size_t nCount) const
 856 {
 857     wxASSERT(nStart <= length());
 858     size_type strLen = length() - nStart;
 859     nLen = strLen < nLen ? strLen : nLen;
 860
 861     size_t pos, len;
 862     PosLenToImpl(nStart, nLen, &pos, &len);
 863
 864     SubstrBufFromMB str(ImplStr(sz, nCount));
 865     if ( str.len == npos )
 866         str.len = wxStringStrlen(str.data);
 867
 868     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 869 }
 870
 871 int wxString::compare(size_t nStart, size_t nLen,
 872                       const wchar_t* sz, size_t nCount) const
 873 {
 874     wxASSERT(nStart <= length());
 875     size_type strLen = length() - nStart;
 876     nLen = strLen < nLen ? strLen : nLen;
 877
 878     size_t pos, len;
 879     PosLenToImpl(nStart, nLen, &pos, &len);
 880
 881     SubstrBufFromWC str(ImplStr(sz, nCount));
 882     if ( str.len == npos )
 883         str.len = wxStringStrlen(str.data);
 884
 885     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 886 }
 887
 888 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 889
 890
 891 // ---------------------------------------------------------------------------
 892 // find_{first,last}_[not]_of functions
 893 // ---------------------------------------------------------------------------
 894
 895 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 896
 897 // NB: All these functions are implemented  with the argument being wxChar*,
 898 //     i.e. widechar string in any Unicode build, even though native string
 899 //     representation is char* in the UTF-8 build. This is because we couldn't
 900 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 901
 902 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 903 {
 904     return find_first_of(sz, nStart, wxStrlen(sz));
 905 }
 906
 907 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 908 {
 909     return find_first_not_of(sz, nStart, wxStrlen(sz));
 910 }
 911
 912 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 913 {
 914     wxASSERT_MSG( nStart <= length(),  wxT("invalid index") );
 915
 916     size_t idx = nStart;
 917     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 918     {
 919         if ( wxTmemchr(sz, *i, n) )
 920             return idx;
 921     }
 922
 923     return npos;
 924 }
 925
 926 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 927 {
 928     wxASSERT_MSG( nStart <= length(),  wxT("invalid index") );
 929
 930     size_t idx = nStart;
 931     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 932     {
 933         if ( !wxTmemchr(sz, *i, n) )
 934             return idx;
 935     }
 936
 937     return npos;
 938 }
 939
 940
 941 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 942 {
 943     return find_last_of(sz, nStart, wxStrlen(sz));
 944 }
 945
 946 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 947 {
 948     return find_last_not_of(sz, nStart, wxStrlen(sz));
 949 }
 950
 951 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 952 {
 953     size_t len = length();
 954
 955     if ( nStart == npos )
 956     {
 957         nStart = len - 1;
 958     }
 959     else
 960     {
 961         wxASSERT_MSG( nStart <= len, wxT("invalid index") );
 962     }
 963
 964     size_t idx = nStart;
 965     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 966           i != rend(); --idx, ++i )
 967     {
 968         if ( wxTmemchr(sz, *i, n) )
 969             return idx;
 970     }
 971
 972     return npos;
 973 }
 974
 975 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 976 {
 977     size_t len = length();
 978
 979     if ( nStart == npos )
 980     {
 981         nStart = len - 1;
 982     }
 983     else
 984     {
 985         wxASSERT_MSG( nStart <= len, wxT("invalid index") );
 986     }
 987
 988     size_t idx = nStart;
 989     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 990           i != rend(); --idx, ++i )
 991     {
 992         if ( !wxTmemchr(sz, *i, n) )
 993             return idx;
 994     }
 995
 996     return npos;
 997 }
 998
 999 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
1000 {
1001     wxASSERT_MSG( nStart <= length(),  wxT("invalid index") );
1002
1003     size_t idx = nStart;
1004     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1005     {
1006         if ( *i != ch )
1007             return idx;
1008     }
1009
1010     return npos;
1011 }
1012
1013 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1014 {
1015     size_t len = length();
1016
1017     if ( nStart == npos )
1018     {
1019         nStart = len - 1;
1020     }
1021     else
1022     {
1023         wxASSERT_MSG( nStart <= len, wxT("invalid index") );
1024     }
1025
1026     size_t idx = nStart;
1027     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1028           i != rend(); --idx, ++i )
1029     {
1030         if ( *i != ch )
1031             return idx;
1032     }
1033
1034     return npos;
1035 }
1036
1037 // the functions above were implemented for wchar_t* arguments in Unicode
1038 // build and char* in ANSI build; below are implementations for the other
1039 // version:
1040 #if wxUSE_UNICODE
1041     #define wxOtherCharType char
1042     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
1043 #else
1044     #define wxOtherCharType wchar_t
1045     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
1046 #endif
1047
1048 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1049     { return find_first_of(STRCONV(sz), nStart); }
1050
1051 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1052                                size_t n) const
1053     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1054 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1055     { return find_last_of(STRCONV(sz), nStart); }
1056 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1057                               size_t n) const
1058     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1059 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1060     { return find_first_not_of(STRCONV(sz), nStart); }
1061 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1062                                    size_t n) const
1063     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1064 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1065     { return find_last_not_of(STRCONV(sz), nStart); }
1066 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1067                                   size_t n) const
1068     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1069
1070 #undef wxOtherCharType
1071 #undef STRCONV
1072
1073 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1074
1075 // ===========================================================================
1076 // other common string functions
1077 // ===========================================================================
1078
1079 int wxString::CmpNoCase(const wxString& s) const
1080 {
1081 #if defined(__WXMSW__) && !wxUSE_UNICODE_UTF8
1082     // Prefer to use CompareString() if available as it's more efficient than
1083     // doing it manually or even using wxStricmp() (see #10375)
1084     //
1085     // Also note that not using NORM_STRINGSORT may result in not having a
1086     // strict weak ordering (e.g. s1 < s2 and s2 < s3 but s3 < s1) and so break
1087     // algorithms such as std::sort that rely on it. It's also more consistent
1088     // with the fall back version below.
1089     switch ( ::CompareString(LOCALE_USER_DEFAULT,
1090                              NORM_IGNORECASE | SORT_STRINGSORT,
1091                              m_impl.c_str(), m_impl.length(),
1092                              s.m_impl.c_str(), s.m_impl.length()) )
1093     {
1094         case CSTR_LESS_THAN:
1095             return -1;
1096
1097         case CSTR_EQUAL:
1098             return 0;
1099
1100         case CSTR_GREATER_THAN:
1101             return 1;
1102
1103         default:
1104             wxFAIL_MSG( "unexpected CompareString() return value" );
1105             // fall through
1106
1107         case 0:
1108             wxLogLastError("CompareString");
1109             // use generic code below
1110     }
1111 #endif // __WXMSW__ && !wxUSE_UNICODE_UTF8
1112
1113     // do the comparison manually: notice that we can't use wxStricmp() as it
1114     // doesn't handle embedded NULs
1115
1116     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1117     const_iterator i1 = begin();
1118     const_iterator end1 = end();
1119     const_iterator i2 = s.begin();
1120     const_iterator end2 = s.end();
1121
1122     for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1123     {
1124         wxUniChar lower1 = (wxChar)wxTolower(*i1);
1125         wxUniChar lower2 = (wxChar)wxTolower(*i2);
1126         if ( lower1 != lower2 )
1127             return lower1 < lower2 ? -1 : 1;
1128     }
1129
1130     size_t len1 = length();
1131     size_t len2 = s.length();
1132
1133     if ( len1 < len2 )
1134         return -1;
1135     else if ( len1 > len2 )
1136         return 1;
1137     return 0;
1138 }
1139
1140
1141 #if wxUSE_UNICODE
1142
1143 #ifdef __MWERKS__
1144 #ifndef __SCHAR_MAX__
1145 #define __SCHAR_MAX__ 127
1146 #endif
1147 #endif
1148
1149 wxString wxString::FromAscii(const char *ascii, size_t len)
1150 {
1151     if (!ascii || len == 0)
1152        return wxEmptyString;
1153
1154     wxString res;
1155
1156     {
1157         wxStringInternalBuffer buf(res, len);
1158         wxStringCharType *dest = buf;
1159
1160         for ( ; len > 0; --len )
1161         {
1162             unsigned char c = (unsigned char)*ascii++;
1163             wxASSERT_MSG( c < 0x80,
1164                           wxT("Non-ASCII value passed to FromAscii().") );
1165
1166             *dest++ = (wchar_t)c;
1167         }
1168     }
1169
1170     return res;
1171 }
1172
1173 wxString wxString::FromAscii(const char *ascii)
1174 {
1175     return FromAscii(ascii, wxStrlen(ascii));
1176 }
1177
1178 wxString wxString::FromAscii(char ascii)
1179 {
1180     // What do we do with '\0' ?
1181
1182     unsigned char c = (unsigned char)ascii;
1183
1184     wxASSERT_MSG( c < 0x80, wxT("Non-ASCII value passed to FromAscii().") );
1185
1186     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1187     return wxString(wxUniChar((wchar_t)c));
1188 }
1189
1190 const wxScopedCharBuffer wxString::ToAscii() const
1191 {
1192     // this will allocate enough space for the terminating NUL too
1193     wxCharBuffer buffer(length());
1194     char *dest = buffer.data();
1195
1196     for ( const_iterator i = begin(); i != end(); ++i )
1197     {
1198         wxUniChar c(*i);
1199         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1200         *dest++ = c.IsAscii() ? (char)c : '_';
1201
1202         // the output string can't have embedded NULs anyhow, so we can safely
1203         // stop at first of them even if we do have any
1204         if ( !c )
1205             break;
1206     }
1207
1208     return buffer;
1209 }
1210
1211 #endif // wxUSE_UNICODE
1212
1213 // extract string of length nCount starting at nFirst
1214 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1215 {
1216     size_t nLen = length();
1217
1218     // default value of nCount is npos and means "till the end"
1219     if ( nCount == npos )
1220     {
1221         nCount = nLen - nFirst;
1222     }
1223
1224     // out-of-bounds requests return sensible things
1225     if ( nFirst + nCount > nLen )
1226     {
1227         nCount = nLen - nFirst;
1228     }
1229
1230     if ( nFirst > nLen )
1231     {
1232         // AllocCopy() will return empty string
1233         return wxEmptyString;
1234     }
1235
1236     wxString dest(*this, nFirst, nCount);
1237     if ( dest.length() != nCount )
1238     {
1239         wxFAIL_MSG( wxT("out of memory in wxString::Mid") );
1240     }
1241
1242     return dest;
1243 }
1244
1245 // check that the string starts with prefix and return the rest of the string
1246 // in the provided pointer if it is not NULL, otherwise return false
1247 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1248 {
1249     if ( compare(0, prefix.length(), prefix) != 0 )
1250         return false;
1251
1252     if ( rest )
1253     {
1254         // put the rest of the string into provided pointer
1255         rest->assign(*this, prefix.length(), npos);
1256     }
1257
1258     return true;
1259 }
1260
1261
1262 // check that the string ends with suffix and return the rest of it in the
1263 // provided pointer if it is not NULL, otherwise return false
1264 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1265 {
1266     int start = length() - suffix.length();
1267
1268     if ( start < 0 || compare(start, npos, suffix) != 0 )
1269         return false;
1270
1271     if ( rest )
1272     {
1273         // put the rest of the string into provided pointer
1274         rest->assign(*this, 0, start);
1275     }
1276
1277     return true;
1278 }
1279
1280
1281 // extract nCount last (rightmost) characters
1282 wxString wxString::Right(size_t nCount) const
1283 {
1284   if ( nCount > length() )
1285     nCount = length();
1286
1287   wxString dest(*this, length() - nCount, nCount);
1288   if ( dest.length() != nCount ) {
1289     wxFAIL_MSG( wxT("out of memory in wxString::Right") );
1290   }
1291   return dest;
1292 }
1293
1294 // get all characters after the last occurrence of ch
1295 // (returns the whole string if ch not found)
1296 wxString wxString::AfterLast(wxUniChar ch) const
1297 {
1298   wxString str;
1299   int iPos = Find(ch, true);
1300   if ( iPos == wxNOT_FOUND )
1301     str = *this;
1302   else
1303     str.assign(*this, iPos + 1, npos);
1304
1305   return str;
1306 }
1307
1308 // extract nCount first (leftmost) characters
1309 wxString wxString::Left(size_t nCount) const
1310 {
1311   if ( nCount > length() )
1312     nCount = length();
1313
1314   wxString dest(*this, 0, nCount);
1315   if ( dest.length() != nCount ) {
1316     wxFAIL_MSG( wxT("out of memory in wxString::Left") );
1317   }
1318   return dest;
1319 }
1320
1321 // get all characters before the first occurrence of ch
1322 // (returns the whole string if ch not found)
1323 wxString wxString::BeforeFirst(wxUniChar ch) const
1324 {
1325   int iPos = Find(ch);
1326   if ( iPos == wxNOT_FOUND )
1327       iPos = length();
1328   return wxString(*this, 0, iPos);
1329 }
1330
1331 /// get all characters before the last occurrence of ch
1332 /// (returns empty string if ch not found)
1333 wxString wxString::BeforeLast(wxUniChar ch) const
1334 {
1335   wxString str;
1336   int iPos = Find(ch, true);
1337   if ( iPos != wxNOT_FOUND && iPos != 0 )
1338     str = wxString(c_str(), iPos);
1339
1340   return str;
1341 }
1342
1343 /// get all characters after the first occurrence of ch
1344 /// (returns empty string if ch not found)
1345 wxString wxString::AfterFirst(wxUniChar ch) const
1346 {
1347   wxString str;
1348   int iPos = Find(ch);
1349   if ( iPos != wxNOT_FOUND )
1350       str.assign(*this, iPos + 1, npos);
1351
1352   return str;
1353 }
1354
1355 // replace first (or all) occurrences of some substring with another one
1356 size_t wxString::Replace(const wxString& strOld,
1357                          const wxString& strNew, bool bReplaceAll)
1358 {
1359     // if we tried to replace an empty string we'd enter an infinite loop below
1360     wxCHECK_MSG( !strOld.empty(), 0,
1361                  wxT("wxString::Replace(): invalid parameter") );
1362
1363     wxSTRING_INVALIDATE_CACHE();
1364
1365     size_t uiCount = 0;   // count of replacements made
1366
1367     // optimize the special common case: replacement of one character by
1368     // another one (in UTF-8 case we can only do this for ASCII characters)
1369     //
1370     // benchmarks show that this special version is around 3 times faster
1371     // (depending on the proportion of matching characters and UTF-8/wchar_t
1372     // build)
1373     if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1374     {
1375         const wxStringCharType chOld = strOld.m_impl[0],
1376                                chNew = strNew.m_impl[0];
1377
1378         // this loop is the simplified version of the one below
1379         for ( size_t pos = 0; ; )
1380         {
1381             pos = m_impl.find(chOld, pos);
1382             if ( pos == npos )
1383                 break;
1384
1385             m_impl[pos++] = chNew;
1386
1387             uiCount++;
1388
1389             if ( !bReplaceAll )
1390                 break;
1391         }
1392     }
1393     else if ( !bReplaceAll)
1394     {
1395         size_t pos = m_impl.find(strOld, 0);
1396         if ( pos != npos )
1397         {
1398             m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl);
1399             uiCount = 1;
1400         }
1401     }
1402     else // replace all occurrences
1403     {
1404         const size_t uiOldLen = strOld.m_impl.length();
1405         const size_t uiNewLen = strNew.m_impl.length();
1406
1407         // first scan the string to find all positions at which the replacement
1408         // should be made
1409         wxVector<size_t> replacePositions;
1410
1411         size_t pos;
1412         for ( pos = m_impl.find(strOld.m_impl, 0);
1413               pos != npos;
1414               pos = m_impl.find(strOld.m_impl, pos + uiOldLen))
1415         {
1416             replacePositions.push_back(pos);
1417             ++uiCount;
1418         }
1419
1420         if ( !uiCount )
1421             return 0;
1422
1423         // allocate enough memory for the whole new string
1424         wxString tmp;
1425         tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen));
1426
1427         // copy this string to tmp doing replacements on the fly
1428         size_t replNum = 0;
1429         for ( pos = 0; replNum < uiCount; replNum++ )
1430         {
1431             const size_t nextReplPos = replacePositions[replNum];
1432
1433             if ( pos != nextReplPos )
1434             {
1435                 tmp.m_impl.append(m_impl, pos, nextReplPos - pos);
1436             }
1437
1438             tmp.m_impl.append(strNew.m_impl);
1439             pos = nextReplPos + uiOldLen;
1440         }
1441
1442         if ( pos != m_impl.length() )
1443         {
1444             // append the rest of the string unchanged
1445             tmp.m_impl.append(m_impl, pos, m_impl.length() - pos);
1446         }
1447
1448         swap(tmp);
1449     }
1450
1451     return uiCount;
1452 }
1453
1454 bool wxString::IsAscii() const
1455 {
1456     for ( const_iterator i = begin(); i != end(); ++i )
1457     {
1458         if ( !(*i).IsAscii() )
1459             return false;
1460     }
1461
1462     return true;
1463 }
1464
1465 bool wxString::IsWord() const
1466 {
1467     for ( const_iterator i = begin(); i != end(); ++i )
1468     {
1469         if ( !wxIsalpha(*i) )
1470             return false;
1471     }
1472
1473     return true;
1474 }
1475
1476 bool wxString::IsNumber() const
1477 {
1478     if ( empty() )
1479         return true;
1480
1481     const_iterator i = begin();
1482
1483     if ( *i == wxT('-') || *i == wxT('+') )
1484         ++i;
1485
1486     for ( ; i != end(); ++i )
1487     {
1488         if ( !wxIsdigit(*i) )
1489             return false;
1490     }
1491
1492     return true;
1493 }
1494
1495 wxString wxString::Strip(stripType w) const
1496 {
1497     wxString s = *this;
1498     if ( w & leading ) s.Trim(false);
1499     if ( w & trailing ) s.Trim(true);
1500     return s;
1501 }
1502
1503 // ---------------------------------------------------------------------------
1504 // case conversion
1505 // ---------------------------------------------------------------------------
1506
1507 wxString& wxString::MakeUpper()
1508 {
1509   for ( iterator it = begin(), en = end(); it != en; ++it )
1510     *it = (wxChar)wxToupper(*it);
1511
1512   return *this;
1513 }
1514
1515 wxString& wxString::MakeLower()
1516 {
1517   for ( iterator it = begin(), en = end(); it != en; ++it )
1518     *it = (wxChar)wxTolower(*it);
1519
1520   return *this;
1521 }
1522
1523 wxString& wxString::MakeCapitalized()
1524 {
1525     const iterator en = end();
1526     iterator it = begin();
1527     if ( it != en )
1528     {
1529         *it = (wxChar)wxToupper(*it);
1530         for ( ++it; it != en; ++it )
1531             *it = (wxChar)wxTolower(*it);
1532     }
1533
1534     return *this;
1535 }
1536
1537 // ---------------------------------------------------------------------------
1538 // trimming and padding
1539 // ---------------------------------------------------------------------------
1540
1541 // some compilers (VC++ 6.0 not to name them) return true for a call to
1542 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1543 // to live with this by checking that the character is a 7 bit one - even if
1544 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1545 // space-like symbols somewhere except in the first 128 chars), it is arguably
1546 // still better than trimming away accented letters
1547 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1548
1549 // trims spaces (in the sense of isspace) from left or right side
1550 wxString& wxString::Trim(bool bFromRight)
1551 {
1552     // first check if we're going to modify the string at all
1553     if ( !empty() &&
1554          (
1555           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1556           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1557          )
1558        )
1559     {
1560         if ( bFromRight )
1561         {
1562             // find last non-space character
1563             reverse_iterator psz = rbegin();
1564             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1565                 ++psz;
1566
1567             // truncate at trailing space start
1568             erase(psz.base(), end());
1569         }
1570         else
1571         {
1572             // find first non-space character
1573             iterator psz = begin();
1574             while ( (psz != end()) && wxSafeIsspace(*psz) )
1575                 ++psz;
1576
1577             // fix up data and length
1578             erase(begin(), psz);
1579         }
1580     }
1581
1582     return *this;
1583 }
1584
1585 // adds nCount characters chPad to the string from either side
1586 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1587 {
1588     wxString s(chPad, nCount);
1589
1590     if ( bFromRight )
1591         *this += s;
1592     else
1593     {
1594         s += *this;
1595         swap(s);
1596     }
1597
1598     return *this;
1599 }
1600
1601 // truncate the string
1602 wxString& wxString::Truncate(size_t uiLen)
1603 {
1604     if ( uiLen < length() )
1605     {
1606         erase(begin() + uiLen, end());
1607     }
1608     //else: nothing to do, string is already short enough
1609
1610     return *this;
1611 }
1612
1613 // ---------------------------------------------------------------------------
1614 // finding (return wxNOT_FOUND if not found and index otherwise)
1615 // ---------------------------------------------------------------------------
1616
1617 // find a character
1618 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1619 {
1620     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1621
1622     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1623 }
1624
1625 // ----------------------------------------------------------------------------
1626 // conversion to numbers
1627 // ----------------------------------------------------------------------------
1628
1629 // The implementation of all the functions below is exactly the same so factor
1630 // it out. Note that number extraction works correctly on UTF-8 strings, so
1631 // we can use wxStringCharType and wx_str() for maximum efficiency.
1632
1633 #ifndef __WXWINCE__
1634     #define DO_IF_NOT_WINCE(x) x
1635 #else
1636     #define DO_IF_NOT_WINCE(x)
1637 #endif
1638
1639 #define WX_STRING_TO_X_TYPE_START                                           \
1640     wxCHECK_MSG( pVal, false, wxT("NULL output pointer") );                  \
1641     DO_IF_NOT_WINCE( errno = 0; )                                           \
1642     const wxStringCharType *start = wx_str();                               \
1643     wxStringCharType *end;
1644
1645 // notice that we return false without modifying the output parameter at all if
1646 // nothing could be parsed but we do modify it and return false then if we did
1647 // parse something successfully but not the entire string
1648 #define WX_STRING_TO_X_TYPE_END                                             \
1649     if ( end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )                 \
1650         return false;                                                       \
1651     *pVal = val;                                                            \
1652     return !*end;
1653
1654 bool wxString::ToLong(long *pVal, int base) const
1655 {
1656     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1657
1658     WX_STRING_TO_X_TYPE_START
1659     long val = wxStrtol(start, &end, base);
1660     WX_STRING_TO_X_TYPE_END
1661 }
1662
1663 bool wxString::ToULong(unsigned long *pVal, int base) const
1664 {
1665     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1666
1667     WX_STRING_TO_X_TYPE_START
1668     unsigned long val = wxStrtoul(start, &end, base);
1669     WX_STRING_TO_X_TYPE_END
1670 }
1671
1672 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1673 {
1674     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1675
1676     WX_STRING_TO_X_TYPE_START
1677     wxLongLong_t val = wxStrtoll(start, &end, base);
1678     WX_STRING_TO_X_TYPE_END
1679 }
1680
1681 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1682 {
1683     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1684
1685     WX_STRING_TO_X_TYPE_START
1686     wxULongLong_t val = wxStrtoull(start, &end, base);
1687     WX_STRING_TO_X_TYPE_END
1688 }
1689
1690 bool wxString::ToDouble(double *pVal) const
1691 {
1692     WX_STRING_TO_X_TYPE_START
1693     double val = wxStrtod(start, &end);
1694     WX_STRING_TO_X_TYPE_END
1695 }
1696
1697 #if wxUSE_XLOCALE
1698
1699 bool wxString::ToCLong(long *pVal, int base) const
1700 {
1701     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1702
1703     WX_STRING_TO_X_TYPE_START
1704 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1705     long val = wxStrtol_lA(start, &end, base, wxCLocale);
1706 #else
1707     long val = wxStrtol_l(start, &end, base, wxCLocale);
1708 #endif
1709     WX_STRING_TO_X_TYPE_END
1710 }
1711
1712 bool wxString::ToCULong(unsigned long *pVal, int base) const
1713 {
1714     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1715
1716     WX_STRING_TO_X_TYPE_START
1717 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1718     unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale);
1719 #else
1720     unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale);
1721 #endif
1722     WX_STRING_TO_X_TYPE_END
1723 }
1724
1725 bool wxString::ToCDouble(double *pVal) const
1726 {
1727     WX_STRING_TO_X_TYPE_START
1728 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1729     double val = wxStrtod_lA(start, &end, wxCLocale);
1730 #else
1731     double val = wxStrtod_l(start, &end, wxCLocale);
1732 #endif
1733     WX_STRING_TO_X_TYPE_END
1734 }
1735
1736 #endif  // wxUSE_XLOCALE
1737
1738 // ---------------------------------------------------------------------------
1739 // formatted output
1740 // ---------------------------------------------------------------------------
1741
1742 #if !wxUSE_UTF8_LOCALE_ONLY
1743 /* static */
1744 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1745 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1746 #else
1747 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1748 #endif
1749 {
1750     va_list argptr;
1751     va_start(argptr, format);
1752
1753     wxString s;
1754     s.PrintfV(format, argptr);
1755
1756     va_end(argptr);
1757
1758     return s;
1759 }
1760 #endif // !wxUSE_UTF8_LOCALE_ONLY
1761
1762 #if wxUSE_UNICODE_UTF8
1763 /* static */
1764 wxString wxString::DoFormatUtf8(const char *format, ...)
1765 {
1766     va_list argptr;
1767     va_start(argptr, format);
1768
1769     wxString s;
1770     s.PrintfV(format, argptr);
1771
1772     va_end(argptr);
1773
1774     return s;
1775 }
1776 #endif // wxUSE_UNICODE_UTF8
1777
1778 /* static */
1779 wxString wxString::FormatV(const wxString& format, va_list argptr)
1780 {
1781     wxString s;
1782     s.PrintfV(format, argptr);
1783     return s;
1784 }
1785
1786 #if !wxUSE_UTF8_LOCALE_ONLY
1787 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1788 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1789 #else
1790 int wxString::DoPrintfWchar(const wxChar *format, ...)
1791 #endif
1792 {
1793     va_list argptr;
1794     va_start(argptr, format);
1795
1796 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1797     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1798     // because it's the only cast that works safely for downcasting when
1799     // multiple inheritance is used:
1800     wxString *str = static_cast<wxString*>(this);
1801 #else
1802     wxString *str = this;
1803 #endif
1804
1805     int iLen = str->PrintfV(format, argptr);
1806
1807     va_end(argptr);
1808
1809     return iLen;
1810 }
1811 #endif // !wxUSE_UTF8_LOCALE_ONLY
1812
1813 #if wxUSE_UNICODE_UTF8
1814 int wxString::DoPrintfUtf8(const char *format, ...)
1815 {
1816     va_list argptr;
1817     va_start(argptr, format);
1818
1819     int iLen = PrintfV(format, argptr);
1820
1821     va_end(argptr);
1822
1823     return iLen;
1824 }
1825 #endif // wxUSE_UNICODE_UTF8
1826
1827 /*
1828     Uses wxVsnprintf and places the result into the this string.
1829
1830     In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1831     it is vswprintf.  Due to a discrepancy between vsnprintf and vswprintf in
1832     the ISO C99 (and thus SUSv3) standard the return value for the case of
1833     an undersized buffer is inconsistent.  For conforming vsnprintf
1834     implementations the function must return the number of characters that
1835     would have been printed had the buffer been large enough.  For conforming
1836     vswprintf implementations the function must return a negative number
1837     and set errno.
1838
1839     What vswprintf sets errno to is undefined but Darwin seems to set it to
1840     EOVERFLOW.  The only expected errno are EILSEQ and EINVAL.  Both of
1841     those are defined in the standard and backed up by several conformance
1842     statements.  Note that ENOMEM mentioned in the manual page does not
1843     apply to swprintf, only wprintf and fwprintf.
1844
1845     Official manual page:
1846     http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1847
1848     Some conformance statements (AIX, Solaris):
1849     http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1850     http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1851
1852     Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1853     EILSEQ and EINVAL are specifically defined to mean the error is other than
1854     an undersized buffer and no other errno are defined we treat those two
1855     as meaning hard errors and everything else gets the old behavior which
1856     is to keep looping and increasing buffer size until the function succeeds.
1857
1858     In practice it's impossible to determine before compilation which behavior
1859     may be used.  The vswprintf function may have vsnprintf-like behavior or
1860     vice-versa.  Behavior detected on one release can theoretically change
1861     with an updated release.  Not to mention that configure testing for it
1862     would require the test to be run on the host system, not the build system
1863     which makes cross compilation difficult. Therefore, we make no assumptions
1864     about behavior and try our best to handle every known case, including the
1865     case where wxVsnprintf returns a negative number and fails to set errno.
1866
1867     There is yet one more non-standard implementation and that is our own.
1868     Fortunately, that can be detected at compile-time.
1869
1870     On top of all that, ISO C99 explicitly defines snprintf to write a null
1871     character to the last position of the specified buffer.  That would be at
1872     at the given buffer size minus 1.  It is supposed to do this even if it
1873     turns out that the buffer is sized too small.
1874
1875     Darwin (tested on 10.5) follows the C99 behavior exactly.
1876
1877     Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1878     errno even when it fails.  However, it only seems to ever fail due
1879     to an undersized buffer.
1880 */
1881 #if wxUSE_UNICODE_UTF8
1882 template<typename BufferType>
1883 #else
1884 // we only need one version in non-UTF8 builds and at least two Windows
1885 // compilers have problems with this function template, so use just one
1886 // normal function here
1887 #endif
1888 static int DoStringPrintfV(wxString& str,
1889                            const wxString& format, va_list argptr)
1890 {
1891     int size = 1024;
1892
1893     for ( ;; )
1894     {
1895 #if wxUSE_UNICODE_UTF8
1896         BufferType tmp(str, size + 1);
1897         typename BufferType::CharType *buf = tmp;
1898 #else
1899         wxStringBuffer tmp(str, size + 1);
1900         wxChar *buf = tmp;
1901 #endif
1902
1903         if ( !buf )
1904         {
1905             // out of memory
1906
1907             // in UTF-8 build, leaving uninitialized junk in the buffer
1908             // could result in invalid non-empty UTF-8 string, so just
1909             // reset the string to empty on failure:
1910             buf[0] = '\0';
1911             return -1;
1912         }
1913
1914         // wxVsnprintf() may modify the original arg pointer, so pass it
1915         // only a copy
1916         va_list argptrcopy;
1917         wxVaCopy(argptrcopy, argptr);
1918
1919 #ifndef __WXWINCE__
1920         // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1921         errno = 0;
1922 #endif
1923         int len = wxVsnprintf(buf, size, format, argptrcopy);
1924         va_end(argptrcopy);
1925
1926         // some implementations of vsnprintf() don't NUL terminate
1927         // the string if there is not enough space for it so
1928         // always do it manually
1929         // FIXME: This really seems to be the wrong and would be an off-by-one
1930         // bug except the code above allocates an extra character.
1931         buf[size] = wxT('\0');
1932
1933         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1934         // total number of characters which would have been written if the
1935         // buffer were large enough (newer standards such as Unix98)
1936         if ( len < 0 )
1937         {
1938             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1939             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1940             //     is true if *both* of them use our own implementation,
1941             //     otherwise we can't be sure
1942 #if wxUSE_WXVSNPRINTF
1943             // we know that our own implementation of wxVsnprintf() returns -1
1944             // only for a format error - thus there's something wrong with
1945             // the user's format string
1946             buf[0] = '\0';
1947             return -1;
1948 #else // possibly using system version
1949             // assume it only returns error if there is not enough space, but
1950             // as we don't know how much we need, double the current size of
1951             // the buffer
1952 #ifndef __WXWINCE__
1953             if( (errno == EILSEQ) || (errno == EINVAL) )
1954             // If errno was set to one of the two well-known hard errors
1955             // then fail immediately to avoid an infinite loop.
1956                 return -1;
1957             else
1958 #endif // __WXWINCE__
1959             // still not enough, as we don't know how much we need, double the
1960             // current size of the buffer
1961                 size *= 2;
1962 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1963         }
1964         else if ( len >= size )
1965         {
1966 #if wxUSE_WXVSNPRINTF
1967             // we know that our own implementation of wxVsnprintf() returns
1968             // size+1 when there's not enough space but that's not the size
1969             // of the required buffer!
1970             size *= 2;      // so we just double the current size of the buffer
1971 #else
1972             // some vsnprintf() implementations NUL-terminate the buffer and
1973             // some don't in len == size case, to be safe always add 1
1974             // FIXME: I don't quite understand this comment.  The vsnprintf
1975             // function is specifically defined to return the number of
1976             // characters printed not including the null terminator.
1977             // So OF COURSE you need to add 1 to get the right buffer size.
1978             // The following line is definitely correct, no question.
1979             size = len + 1;
1980 #endif
1981         }
1982         else // ok, there was enough space
1983         {
1984             break;
1985         }
1986     }
1987
1988     // we could have overshot
1989     str.Shrink();
1990
1991     return str.length();
1992 }
1993
1994 int wxString::PrintfV(const wxString& format, va_list argptr)
1995 {
1996 #if wxUSE_UNICODE_UTF8
1997     #if wxUSE_STL_BASED_WXSTRING
1998         typedef wxStringTypeBuffer<char> Utf8Buffer;
1999     #else
2000         typedef wxStringInternalBuffer Utf8Buffer;
2001     #endif
2002 #endif
2003
2004 #if wxUSE_UTF8_LOCALE_ONLY
2005     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2006 #else
2007     #if wxUSE_UNICODE_UTF8
2008     if ( wxLocaleIsUtf8 )
2009         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2010     else
2011         // wxChar* version
2012         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2013     #else
2014         return DoStringPrintfV(*this, format, argptr);
2015     #endif // UTF8/WCHAR
2016 #endif
2017 }
2018
2019 // ----------------------------------------------------------------------------
2020 // misc other operations
2021 // ----------------------------------------------------------------------------
2022
2023 // returns true if the string matches the pattern which may contain '*' and
2024 // '?' metacharacters (as usual, '?' matches any character and '*' any number
2025 // of them)
2026 bool wxString::Matches(const wxString& mask) const
2027 {
2028     // I disable this code as it doesn't seem to be faster (in fact, it seems
2029     // to be much slower) than the old, hand-written code below and using it
2030     // here requires always linking with libregex even if the user code doesn't
2031     // use it
2032 #if 0 // wxUSE_REGEX
2033     // first translate the shell-like mask into a regex
2034     wxString pattern;
2035     pattern.reserve(wxStrlen(pszMask));
2036
2037     pattern += wxT('^');
2038     while ( *pszMask )
2039     {
2040         switch ( *pszMask )
2041         {
2042             case wxT('?'):
2043                 pattern += wxT('.');
2044                 break;
2045
2046             case wxT('*'):
2047                 pattern += wxT(".*");
2048                 break;
2049
2050             case wxT('^'):
2051             case wxT('.'):
2052             case wxT('$'):
2053             case wxT('('):
2054             case wxT(')'):
2055             case wxT('|'):
2056             case wxT('+'):
2057             case wxT('\\'):
2058                 // these characters are special in a RE, quote them
2059                 // (however note that we don't quote '[' and ']' to allow
2060                 // using them for Unix shell like matching)
2061                 pattern += wxT('\\');
2062                 // fall through
2063
2064             default:
2065                 pattern += *pszMask;
2066         }
2067
2068         pszMask++;
2069     }
2070     pattern += wxT('$');
2071
2072     // and now use it
2073     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2074 #else // !wxUSE_REGEX
2075   // TODO: this is, of course, awfully inefficient...
2076
2077   // FIXME-UTF8: implement using iterators, remove #if
2078 #if wxUSE_UNICODE_UTF8
2079   const wxScopedWCharBuffer maskBuf = mask.wc_str();
2080   const wxScopedWCharBuffer txtBuf = wc_str();
2081   const wxChar *pszMask = maskBuf.data();
2082   const wxChar *pszTxt = txtBuf.data();
2083 #else
2084   const wxChar *pszMask = mask.wx_str();
2085   // the char currently being checked
2086   const wxChar *pszTxt = wx_str();
2087 #endif
2088
2089   // the last location where '*' matched
2090   const wxChar *pszLastStarInText = NULL;
2091   const wxChar *pszLastStarInMask = NULL;
2092
2093 match:
2094   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
2095     switch ( *pszMask ) {
2096       case wxT('?'):
2097         if ( *pszTxt == wxT('\0') )
2098           return false;
2099
2100         // pszTxt and pszMask will be incremented in the loop statement
2101
2102         break;
2103
2104       case wxT('*'):
2105         {
2106           // remember where we started to be able to backtrack later
2107           pszLastStarInText = pszTxt;
2108           pszLastStarInMask = pszMask;
2109
2110           // ignore special chars immediately following this one
2111           // (should this be an error?)
2112           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
2113             pszMask++;
2114
2115           // if there is nothing more, match
2116           if ( *pszMask == wxT('\0') )
2117             return true;
2118
2119           // are there any other metacharacters in the mask?
2120           size_t uiLenMask;
2121           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2122
2123           if ( pEndMask != NULL ) {
2124             // we have to match the string between two metachars
2125             uiLenMask = pEndMask - pszMask;
2126           }
2127           else {
2128             // we have to match the remainder of the string
2129             uiLenMask = wxStrlen(pszMask);
2130           }
2131
2132           wxString strToMatch(pszMask, uiLenMask);
2133           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2134           if ( pMatch == NULL )
2135             return false;
2136
2137           // -1 to compensate "++" in the loop
2138           pszTxt = pMatch + uiLenMask - 1;
2139           pszMask += uiLenMask - 1;
2140         }
2141         break;
2142
2143       default:
2144         if ( *pszMask != *pszTxt )
2145           return false;
2146         break;
2147     }
2148   }
2149
2150   // match only if nothing left
2151   if ( *pszTxt == wxT('\0') )
2152     return true;
2153
2154   // if we failed to match, backtrack if we can
2155   if ( pszLastStarInText ) {
2156     pszTxt = pszLastStarInText + 1;
2157     pszMask = pszLastStarInMask;
2158
2159     pszLastStarInText = NULL;
2160
2161     // don't bother resetting pszLastStarInMask, it's unnecessary
2162
2163     goto match;
2164   }
2165
2166   return false;
2167 #endif // wxUSE_REGEX/!wxUSE_REGEX
2168 }
2169
2170 // Count the number of chars
2171 int wxString::Freq(wxUniChar ch) const
2172 {
2173     int count = 0;
2174     for ( const_iterator i = begin(); i != end(); ++i )
2175     {
2176         if ( *i == ch )
2177             count ++;
2178     }
2179     return count;
2180 }
2181