src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #include "wx/hashmap.h"
  39 #include "wx/vector.h"
  40 #include "wx/xlocale.h"
  41
  42 #ifdef __WXMSW__
  43     #include "wx/msw/wrapwin.h"
  44 #endif // __WXMSW__
  45
  46 // string handling functions used by wxString:
  47 #if wxUSE_UNICODE_UTF8
  48     #define wxStringMemcpy   memcpy
  49     #define wxStringMemcmp   memcmp
  50     #define wxStringMemchr   memchr
  51     #define wxStringStrlen   strlen
  52 #else
  53     #define wxStringMemcpy   wxTmemcpy
  54     #define wxStringMemcmp   wxTmemcmp
  55     #define wxStringMemchr   wxTmemchr
  56     #define wxStringStrlen   wxStrlen
  57 #endif
  58
  59 // ----------------------------------------------------------------------------
  60 // global variables
  61 // ----------------------------------------------------------------------------
  62
  63 namespace wxPrivate
  64 {
  65
  66 static UntypedBufferData s_untypedNullData(NULL, 0);
  67
  68 UntypedBufferData * const untypedNullDataPtr = &s_untypedNullData;
  69
  70 } // namespace wxPrivate
  71
  72 // ---------------------------------------------------------------------------
  73 // static class variables definition
  74 // ---------------------------------------------------------------------------
  75
  76 //According to STL _must_ be a -1 size_t
  77 const size_t wxString::npos = (size_t) -1;
  78
  79 #if wxUSE_STRING_POS_CACHE
  80
  81 #ifdef wxHAS_COMPILER_TLS
  82
  83 wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
  84
  85 #else // !wxHAS_COMPILER_TLS
  86
  87 struct wxStrCacheInitializer
  88 {
  89     wxStrCacheInitializer()
  90     {
  91         // calling this function triggers s_cache initialization in it, and
  92         // from now on it becomes safe to call from multiple threads
  93         wxString::GetCache();
  94     }
  95 };
  96
  97 /*
  98 wxString::Cache& wxString::GetCache()
  99 {
 100     static wxTLS_TYPE(Cache) s_cache;
 101
 102     return wxTLS_VALUE(s_cache);
 103 }
 104 */
 105
 106 static wxStrCacheInitializer gs_stringCacheInit;
 107
 108 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
 109
 110 // gdb seems to be unable to display thread-local variables correctly, at least
 111 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
 112 #if wxDEBUG_LEVEL >= 2
 113
 114 struct wxStrCacheDumper
 115 {
 116     static void ShowAll()
 117     {
 118         puts("*** wxString cache dump:");
 119         for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
 120         {
 121             const wxString::Cache::Element&
 122                 c = wxString::GetCacheBegin()[n];
 123
 124             printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
 125                    n,
 126                    n == wxString::LastUsedCacheElement() ? " [*]" : "",
 127                    c.str,
 128                    (unsigned long)c.pos,
 129                    (unsigned long)c.impl,
 130                    (long)c.len);
 131         }
 132     }
 133 };
 134
 135 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
 136
 137 #endif // wxDEBUG_LEVEL >= 2
 138
 139 #ifdef wxPROFILE_STRING_CACHE
 140
 141 wxString::CacheStats wxString::ms_cacheStats;
 142
 143 struct wxStrCacheStatsDumper
 144 {
 145     ~wxStrCacheStatsDumper()
 146     {
 147         const wxString::CacheStats& stats = wxString::ms_cacheStats;
 148
 149         if ( stats.postot )
 150         {
 151             puts("*** wxString cache statistics:");
 152             printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
 153                    stats.postot);
 154             printf("\tHits %u (of which %u not used) or %.2f%%\n",
 155                    stats.poshits,
 156                    stats.mishits,
 157                    100.*float(stats.poshits - stats.mishits)/stats.postot);
 158             printf("\tAverage position requested: %.2f\n",
 159                    float(stats.sumpos) / stats.postot);
 160             printf("\tAverage offset after cached hint: %.2f\n",
 161                    float(stats.sumofs) / stats.postot);
 162         }
 163
 164         if ( stats.lentot )
 165         {
 166             printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
 167                    stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
 168         }
 169     }
 170 };
 171
 172 static wxStrCacheStatsDumper s_showCacheStats;
 173
 174 #endif // wxPROFILE_STRING_CACHE
 175
 176 #endif // wxUSE_STRING_POS_CACHE
 177
 178 // ----------------------------------------------------------------------------
 179 // global functions
 180 // ----------------------------------------------------------------------------
 181
 182 #if wxUSE_STD_IOSTREAM
 183
 184 #include <iostream>
 185
 186 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
 187 {
 188 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
 189     const wxScopedCharBuffer buf(str.AsCharBuf());
 190     if ( !buf )
 191         os.clear(wxSTD ios_base::failbit);
 192     else
 193         os << buf.data();
 194
 195     return os;
 196 #else
 197     return os << str.AsInternal();
 198 #endif
 199 }
 200
 201 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
 202 {
 203     return os << str.c_str();
 204 }
 205
 206 wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str)
 207 {
 208     return os << str.data();
 209 }
 210
 211 #ifndef __BORLANDC__
 212 wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str)
 213 {
 214     return os << str.data();
 215 }
 216 #endif
 217
 218 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 219
 220 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
 221 {
 222     return wos << str.wc_str();
 223 }
 224
 225 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
 226 {
 227     return wos << str.AsWChar();
 228 }
 229
 230 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str)
 231 {
 232     return wos << str.data();
 233 }
 234
 235 #endif  // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 236
 237 #endif // wxUSE_STD_IOSTREAM
 238
 239 // ===========================================================================
 240 // wxString class core
 241 // ===========================================================================
 242
 243 #if wxUSE_UNICODE_UTF8
 244
 245 void wxString::PosLenToImpl(size_t pos, size_t len,
 246                             size_t *implPos, size_t *implLen) const
 247 {
 248     if ( pos == npos )
 249     {
 250         *implPos = npos;
 251     }
 252     else // have valid start position
 253     {
 254         const const_iterator b = GetIterForNthChar(pos);
 255         *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
 256         if ( len == npos )
 257         {
 258             *implLen = npos;
 259         }
 260         else // have valid length too
 261         {
 262             // we need to handle the case of length specifying a substring
 263             // going beyond the end of the string, just as std::string does
 264             const const_iterator e(end());
 265             const_iterator i(b);
 266             while ( len && i <= e )
 267             {
 268                 ++i;
 269                 --len;
 270             }
 271
 272             *implLen = i.impl() - b.impl();
 273         }
 274     }
 275 }
 276
 277 #endif // wxUSE_UNICODE_UTF8
 278
 279 // ----------------------------------------------------------------------------
 280 // wxCStrData converted strings caching
 281 // ----------------------------------------------------------------------------
 282
 283 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 284 //             string objects; re-enable after fixing this bug and benchmarking
 285 //             performance to see if using a hash is a good idea at all
 286 #if 0
 287
 288 // For backward compatibility reasons, it must be possible to assign the value
 289 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 290 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 291 // because the memory would be freed immediately, but it has to be valid as long
 292 // as the string is not modified, so that code like this still works:
 293 //
 294 // const wxChar *s = str.c_str();
 295 // while ( s ) { ... }
 296
 297 // FIXME-UTF8: not thread safe!
 298 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 299 //             destroyed, but we should do it when the string is modified, to
 300 //             keep memory usage down
 301 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 302 //             invalidated the cache on every change, we could keep the previous
 303 //             conversion
 304 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 305 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 306
 307 template<typename T>
 308 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 309 {
 310     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 311     if ( i != hash.end() )
 312     {
 313         free(i->second);
 314         hash.erase(i);
 315     }
 316 }
 317
 318 #if wxUSE_UNICODE
 319 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 320 //     so we have to use wxString* here and const-cast when used
 321 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 322                     wxStringCharConversionCache);
 323 static wxStringCharConversionCache gs_stringsCharCache;
 324
 325 const char* wxCStrData::AsChar() const
 326 {
 327     // remove previously cache value, if any (see FIXMEs above):
 328     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 329
 330     // convert the string and keep it:
 331     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 332         m_str->mb_str().release();
 333
 334     return s + m_offset;
 335 }
 336 #endif // wxUSE_UNICODE
 337
 338 #if !wxUSE_UNICODE_WCHAR
 339 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 340                     wxStringWCharConversionCache);
 341 static wxStringWCharConversionCache gs_stringsWCharCache;
 342
 343 const wchar_t* wxCStrData::AsWChar() const
 344 {
 345     // remove previously cache value, if any (see FIXMEs above):
 346     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 347
 348     // convert the string and keep it:
 349     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 350         m_str->wc_str().release();
 351
 352     return s + m_offset;
 353 }
 354 #endif // !wxUSE_UNICODE_WCHAR
 355
 356 wxString::~wxString()
 357 {
 358 #if wxUSE_UNICODE
 359     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 360     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 361 #endif
 362 #if !wxUSE_UNICODE_WCHAR
 363     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 364 #endif
 365 }
 366 #endif
 367
 368 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 369 const char* wxCStrData::AsChar() const
 370 {
 371 #if wxUSE_UNICODE_UTF8
 372     if ( wxLocaleIsUtf8 )
 373         return AsInternal();
 374 #endif
 375     // under non-UTF8 locales, we have to convert the internal UTF-8
 376     // representation using wxConvLibc and cache the result
 377
 378     wxString *str = wxConstCast(m_str, wxString);
 379
 380     // convert the string:
 381     //
 382     // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
 383     //             have it) but it's unfortunately not obvious to implement
 384     //             because we don't know how big buffer do we need for the
 385     //             given string length (in case of multibyte encodings, e.g.
 386     //             ISO-2022-JP or UTF-8 when internal representation is wchar_t)
 387     //
 388     //             One idea would be to store more than just m_convertedToChar
 389     //             in wxString: then we could record the length of the string
 390     //             which was converted the last time and try to reuse the same
 391     //             buffer if the current length is not greater than it (this
 392     //             could still fail because string could have been modified in
 393     //             place but it would work most of the time, so we'd do it and
 394     //             only allocate the new buffer if in-place conversion returned
 395     //             an error). We could also store a bit saying if the string
 396     //             was modified since the last conversion (and update it in all
 397     //             operation modifying the string, of course) to avoid unneeded
 398     //             consequential conversions. But both of these ideas require
 399     //             adding more fields to wxString and require profiling results
 400     //             to be sure that we really gain enough from them to justify
 401     //             doing it.
 402     wxScopedCharBuffer buf(str->mb_str());
 403
 404     // if it failed, return empty string and not NULL to avoid crashes in code
 405     // written with either wxWidgets 2 wxString or std::string behaviour in
 406     // mind: neither of them ever returns NULL and so we shouldn't neither
 407     if ( !buf )
 408         return "";
 409
 410     if ( str->m_convertedToChar &&
 411          strlen(buf) == strlen(str->m_convertedToChar) )
 412     {
 413         // keep the same buffer for as long as possible, so that several calls
 414         // to c_str() in a row still work:
 415         strcpy(str->m_convertedToChar, buf);
 416     }
 417     else
 418     {
 419         str->m_convertedToChar = buf.release();
 420     }
 421
 422     // and keep it:
 423     return str->m_convertedToChar + m_offset;
 424 }
 425 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 426
 427 #if !wxUSE_UNICODE_WCHAR
 428 const wchar_t* wxCStrData::AsWChar() const
 429 {
 430     wxString *str = wxConstCast(m_str, wxString);
 431
 432     // convert the string:
 433     wxScopedWCharBuffer buf(str->wc_str());
 434
 435     // notice that here, unlike above in AsChar(), conversion can't fail as our
 436     // internal UTF-8 is always well-formed -- or the string was corrupted and
 437     // all bets are off anyhow
 438
 439     // FIXME-UTF8: do the conversion in-place in the existing buffer
 440     if ( str->m_convertedToWChar &&
 441          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 442     {
 443         // keep the same buffer for as long as possible, so that several calls
 444         // to c_str() in a row still work:
 445         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 446     }
 447     else
 448     {
 449         str->m_convertedToWChar = buf.release();
 450     }
 451
 452     // and keep it:
 453     return str->m_convertedToWChar + m_offset;
 454 }
 455 #endif // !wxUSE_UNICODE_WCHAR
 456
 457 // ===========================================================================
 458 // wxString class core
 459 // ===========================================================================
 460
 461 // ---------------------------------------------------------------------------
 462 // construction and conversion
 463 // ---------------------------------------------------------------------------
 464
 465 #if wxUSE_UNICODE_WCHAR
 466 /* static */
 467 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 468                                                const wxMBConv& conv)
 469 {
 470     // anything to do?
 471     if ( !psz || nLength == 0 )
 472         return SubstrBufFromMB(wxWCharBuffer(L""), 0);
 473
 474     if ( nLength == npos )
 475         nLength = wxNO_LEN;
 476
 477     size_t wcLen;
 478     wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 479     if ( !wcLen )
 480         return SubstrBufFromMB(wxWCharBuffer(L""), 0);
 481     else
 482         return SubstrBufFromMB(wcBuf, wcLen);
 483 }
 484 #endif // wxUSE_UNICODE_WCHAR
 485
 486 #if wxUSE_UNICODE_UTF8
 487 /* static */
 488 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 489                                                const wxMBConv& conv)
 490 {
 491     // anything to do?
 492     if ( !psz || nLength == 0 )
 493         return SubstrBufFromMB(wxCharBuffer(""), 0);
 494
 495     // if psz is already in UTF-8, we don't have to do the roundtrip to
 496     // wchar_t* and back:
 497     if ( conv.IsUTF8() )
 498     {
 499         // we need to validate the input because UTF8 iterators assume valid
 500         // UTF-8 sequence and psz may be invalid:
 501         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 502         {
 503             // we must pass the real string length to SubstrBufFromMB ctor
 504             if ( nLength == npos )
 505                 nLength = psz ? strlen(psz) : 0;
 506             return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz, nLength),
 507                                    nLength);
 508         }
 509         // else: do the roundtrip through wchar_t*
 510     }
 511
 512     if ( nLength == npos )
 513         nLength = wxNO_LEN;
 514
 515     // first convert to wide string:
 516     size_t wcLen;
 517     wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 518     if ( !wcLen )
 519         return SubstrBufFromMB(wxCharBuffer(""), 0);
 520
 521     // and then to UTF-8:
 522     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
 523     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 524     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 525
 526     return buf;
 527 }
 528 #endif // wxUSE_UNICODE_UTF8
 529
 530 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 531 /* static */
 532 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 533                                                const wxMBConv& conv)
 534 {
 535     // anything to do?
 536     if ( !pwz || nLength == 0 )
 537         return SubstrBufFromWC(wxCharBuffer(""), 0);
 538
 539     if ( nLength == npos )
 540         nLength = wxNO_LEN;
 541
 542     size_t mbLen;
 543     wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 544     if ( !mbLen )
 545         return SubstrBufFromWC(wxCharBuffer(""), 0);
 546     else
 547         return SubstrBufFromWC(mbBuf, mbLen);
 548 }
 549 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 550
 551
 552 #if wxUSE_UNICODE_WCHAR
 553
 554 //Convert wxString in Unicode mode to a multi-byte string
 555 const wxScopedCharBuffer wxString::mb_str(const wxMBConv& conv) const
 556 {
 557     // NB: Length passed to cWC2MB() doesn't include terminating NUL, it's
 558     //     added by it automatically. If we passed length()+1 here, it would
 559     //     create a buffer with 2 trailing NULs of length one greater than
 560     //     expected.
 561     return conv.cWC2MB(wx_str(), length(), NULL);
 562 }
 563
 564 #elif wxUSE_UNICODE_UTF8
 565
 566 const wxScopedWCharBuffer wxString::wc_str() const
 567 {
 568     // NB: Length passed to cMB2WC() doesn't include terminating NUL, it's
 569     //     added by it automatically. If we passed length()+1 here, it would
 570     //     create a buffer with 2 trailing NULs of length one greater than
 571     //     expected.
 572     return wxMBConvStrictUTF8().cMB2WC
 573                                 (
 574                                     m_impl.c_str(),
 575                                     m_impl.length(),
 576                                     NULL
 577                                 );
 578 }
 579
 580 const wxScopedCharBuffer wxString::mb_str(const wxMBConv& conv) const
 581 {
 582     if ( conv.IsUTF8() )
 583         return wxScopedCharBuffer::CreateNonOwned(m_impl.c_str(), m_impl.length());
 584
 585     wxScopedWCharBuffer wcBuf(wc_str());
 586     if ( !wcBuf.length() )
 587         return wxCharBuffer("");
 588
 589     return conv.cWC2MB(wcBuf.data(), wcBuf.length(), NULL);
 590 }
 591
 592 #else // ANSI
 593
 594 //Converts this string to a wide character string if unicode
 595 //mode is not enabled and wxUSE_WCHAR_T is enabled
 596 const wxScopedWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 597 {
 598     // NB: Length passed to cMB2WC() doesn't include terminating NUL, it's
 599     //     added by it automatically. If we passed length()+1 here, it would
 600     //     create a buffer with 2 trailing NULs of length one greater than
 601     //     expected.
 602     return conv.cMB2WC(wx_str(), length(), NULL);
 603 }
 604
 605 #endif // Unicode/ANSI
 606
 607 // shrink to minimal size (releasing extra memory)
 608 bool wxString::Shrink()
 609 {
 610   wxString tmp(begin(), end());
 611   swap(tmp);
 612   return tmp.length() == length();
 613 }
 614
 615 // deprecated compatibility code:
 616 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 617 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 618 {
 619     return DoGetWriteBuf(nLen);
 620 }
 621
 622 void wxString::UngetWriteBuf()
 623 {
 624     DoUngetWriteBuf();
 625 }
 626
 627 void wxString::UngetWriteBuf(size_t nLen)
 628 {
 629     DoUngetWriteBuf(nLen);
 630 }
 631 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 632
 633
 634 // ---------------------------------------------------------------------------
 635 // data access
 636 // ---------------------------------------------------------------------------
 637
 638 // all functions are inline in string.h
 639
 640 // ---------------------------------------------------------------------------
 641 // concatenation operators
 642 // ---------------------------------------------------------------------------
 643
 644 /*
 645  * concatenation functions come in 5 flavours:
 646  *  string + string
 647  *  char   + string      and      string + char
 648  *  C str  + string      and      string + C str
 649  */
 650
 651 wxString operator+(const wxString& str1, const wxString& str2)
 652 {
 653 #if !wxUSE_STL_BASED_WXSTRING
 654     wxASSERT( str1.IsValid() );
 655     wxASSERT( str2.IsValid() );
 656 #endif
 657
 658     wxString s = str1;
 659     s += str2;
 660
 661     return s;
 662 }
 663
 664 wxString operator+(const wxString& str, wxUniChar ch)
 665 {
 666 #if !wxUSE_STL_BASED_WXSTRING
 667     wxASSERT( str.IsValid() );
 668 #endif
 669
 670     wxString s = str;
 671     s += ch;
 672
 673     return s;
 674 }
 675
 676 wxString operator+(wxUniChar ch, const wxString& str)
 677 {
 678 #if !wxUSE_STL_BASED_WXSTRING
 679     wxASSERT( str.IsValid() );
 680 #endif
 681
 682     wxString s = ch;
 683     s += str;
 684
 685     return s;
 686 }
 687
 688 wxString operator+(const wxString& str, const char *psz)
 689 {
 690 #if !wxUSE_STL_BASED_WXSTRING
 691     wxASSERT( str.IsValid() );
 692 #endif
 693
 694     wxString s;
 695     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 696         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 697     }
 698     s += str;
 699     s += psz;
 700
 701     return s;
 702 }
 703
 704 wxString operator+(const wxString& str, const wchar_t *pwz)
 705 {
 706 #if !wxUSE_STL_BASED_WXSTRING
 707     wxASSERT( str.IsValid() );
 708 #endif
 709
 710     wxString s;
 711     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 712         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 713     }
 714     s += str;
 715     s += pwz;
 716
 717     return s;
 718 }
 719
 720 wxString operator+(const char *psz, const wxString& str)
 721 {
 722 #if !wxUSE_STL_BASED_WXSTRING
 723     wxASSERT( str.IsValid() );
 724 #endif
 725
 726     wxString s;
 727     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 728         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 729     }
 730     s = psz;
 731     s += str;
 732
 733     return s;
 734 }
 735
 736 wxString operator+(const wchar_t *pwz, const wxString& str)
 737 {
 738 #if !wxUSE_STL_BASED_WXSTRING
 739     wxASSERT( str.IsValid() );
 740 #endif
 741
 742     wxString s;
 743     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 744         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 745     }
 746     s = pwz;
 747     s += str;
 748
 749     return s;
 750 }
 751
 752 // ---------------------------------------------------------------------------
 753 // string comparison
 754 // ---------------------------------------------------------------------------
 755
 756 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 757 {
 758     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 759                                : wxToupper(GetChar(0u)) == wxToupper(c));
 760 }
 761
 762 #ifdef HAVE_STD_STRING_COMPARE
 763
 764 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 765 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 766 //     sort strings in characters code point order by sorting the byte sequence
 767 //     in byte values order (i.e. what strcmp() and memcmp() do).
 768
 769 int wxString::compare(const wxString& str) const
 770 {
 771     return m_impl.compare(str.m_impl);
 772 }
 773
 774 int wxString::compare(size_t nStart, size_t nLen,
 775                       const wxString& str) const
 776 {
 777     size_t pos, len;
 778     PosLenToImpl(nStart, nLen, &pos, &len);
 779     return m_impl.compare(pos, len, str.m_impl);
 780 }
 781
 782 int wxString::compare(size_t nStart, size_t nLen,
 783                       const wxString& str,
 784                       size_t nStart2, size_t nLen2) const
 785 {
 786     size_t pos, len;
 787     PosLenToImpl(nStart, nLen, &pos, &len);
 788
 789     size_t pos2, len2;
 790     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 791
 792     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 793 }
 794
 795 int wxString::compare(const char* sz) const
 796 {
 797     return m_impl.compare(ImplStr(sz));
 798 }
 799
 800 int wxString::compare(const wchar_t* sz) const
 801 {
 802     return m_impl.compare(ImplStr(sz));
 803 }
 804
 805 int wxString::compare(size_t nStart, size_t nLen,
 806                       const char* sz, size_t nCount) const
 807 {
 808     size_t pos, len;
 809     PosLenToImpl(nStart, nLen, &pos, &len);
 810
 811     SubstrBufFromMB str(ImplStr(sz, nCount));
 812
 813     return m_impl.compare(pos, len, str.data, str.len);
 814 }
 815
 816 int wxString::compare(size_t nStart, size_t nLen,
 817                       const wchar_t* sz, size_t nCount) const
 818 {
 819     size_t pos, len;
 820     PosLenToImpl(nStart, nLen, &pos, &len);
 821
 822     SubstrBufFromWC str(ImplStr(sz, nCount));
 823
 824     return m_impl.compare(pos, len, str.data, str.len);
 825 }
 826
 827 #else // !HAVE_STD_STRING_COMPARE
 828
 829 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 830                           const wxStringCharType* s2, size_t l2)
 831 {
 832     if( l1 == l2 )
 833         return wxStringMemcmp(s1, s2, l1);
 834     else if( l1 < l2 )
 835     {
 836         int ret = wxStringMemcmp(s1, s2, l1);
 837         return ret == 0 ? -1 : ret;
 838     }
 839     else
 840     {
 841         int ret = wxStringMemcmp(s1, s2, l2);
 842         return ret == 0 ? +1 : ret;
 843     }
 844 }
 845
 846 int wxString::compare(const wxString& str) const
 847 {
 848     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 849                      str.m_impl.data(), str.m_impl.length());
 850 }
 851
 852 int wxString::compare(size_t nStart, size_t nLen,
 853                       const wxString& str) const
 854 {
 855     wxASSERT(nStart <= length());
 856     size_type strLen = length() - nStart;
 857     nLen = strLen < nLen ? strLen : nLen;
 858
 859     size_t pos, len;
 860     PosLenToImpl(nStart, nLen, &pos, &len);
 861
 862     return ::wxDoCmp(m_impl.data() + pos,  len,
 863                      str.m_impl.data(), str.m_impl.length());
 864 }
 865
 866 int wxString::compare(size_t nStart, size_t nLen,
 867                       const wxString& str,
 868                       size_t nStart2, size_t nLen2) const
 869 {
 870     wxASSERT(nStart <= length());
 871     wxASSERT(nStart2 <= str.length());
 872     size_type strLen  =     length() - nStart,
 873               strLen2 = str.length() - nStart2;
 874     nLen  = strLen  < nLen  ? strLen  : nLen;
 875     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 876
 877     size_t pos, len;
 878     PosLenToImpl(nStart, nLen, &pos, &len);
 879     size_t pos2, len2;
 880     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 881
 882     return ::wxDoCmp(m_impl.data() + pos, len,
 883                      str.m_impl.data() + pos2, len2);
 884 }
 885
 886 int wxString::compare(const char* sz) const
 887 {
 888     SubstrBufFromMB str(ImplStr(sz, npos));
 889     if ( str.len == npos )
 890         str.len = wxStringStrlen(str.data);
 891     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 892 }
 893
 894 int wxString::compare(const wchar_t* sz) const
 895 {
 896     SubstrBufFromWC str(ImplStr(sz, npos));
 897     if ( str.len == npos )
 898         str.len = wxStringStrlen(str.data);
 899     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 900 }
 901
 902 int wxString::compare(size_t nStart, size_t nLen,
 903                       const char* sz, size_t nCount) const
 904 {
 905     wxASSERT(nStart <= length());
 906     size_type strLen = length() - nStart;
 907     nLen = strLen < nLen ? strLen : nLen;
 908
 909     size_t pos, len;
 910     PosLenToImpl(nStart, nLen, &pos, &len);
 911
 912     SubstrBufFromMB str(ImplStr(sz, nCount));
 913     if ( str.len == npos )
 914         str.len = wxStringStrlen(str.data);
 915
 916     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 917 }
 918
 919 int wxString::compare(size_t nStart, size_t nLen,
 920                       const wchar_t* sz, size_t nCount) const
 921 {
 922     wxASSERT(nStart <= length());
 923     size_type strLen = length() - nStart;
 924     nLen = strLen < nLen ? strLen : nLen;
 925
 926     size_t pos, len;
 927     PosLenToImpl(nStart, nLen, &pos, &len);
 928
 929     SubstrBufFromWC str(ImplStr(sz, nCount));
 930     if ( str.len == npos )
 931         str.len = wxStringStrlen(str.data);
 932
 933     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 934 }
 935
 936 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 937
 938
 939 // ---------------------------------------------------------------------------
 940 // find_{first,last}_[not]_of functions
 941 // ---------------------------------------------------------------------------
 942
 943 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 944
 945 // NB: All these functions are implemented  with the argument being wxChar*,
 946 //     i.e. widechar string in any Unicode build, even though native string
 947 //     representation is char* in the UTF-8 build. This is because we couldn't
 948 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 949
 950 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 951 {
 952     return find_first_of(sz, nStart, wxStrlen(sz));
 953 }
 954
 955 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 956 {
 957     return find_first_not_of(sz, nStart, wxStrlen(sz));
 958 }
 959
 960 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 961 {
 962     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 963
 964     size_t idx = nStart;
 965     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 966     {
 967         if ( wxTmemchr(sz, *i, n) )
 968             return idx;
 969     }
 970
 971     return npos;
 972 }
 973
 974 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 975 {
 976     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 977
 978     size_t idx = nStart;
 979     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 980     {
 981         if ( !wxTmemchr(sz, *i, n) )
 982             return idx;
 983     }
 984
 985     return npos;
 986 }
 987
 988
 989 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 990 {
 991     return find_last_of(sz, nStart, wxStrlen(sz));
 992 }
 993
 994 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 995 {
 996     return find_last_not_of(sz, nStart, wxStrlen(sz));
 997 }
 998
 999 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
1000 {
1001     size_t len = length();
1002
1003     if ( nStart == npos )
1004     {
1005         nStart = len - 1;
1006     }
1007     else
1008     {
1009         wxASSERT_MSG( nStart <= len, _T("invalid index") );
1010     }
1011
1012     size_t idx = nStart;
1013     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1014           i != rend(); --idx, ++i )
1015     {
1016         if ( wxTmemchr(sz, *i, n) )
1017             return idx;
1018     }
1019
1020     return npos;
1021 }
1022
1023 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
1024 {
1025     size_t len = length();
1026
1027     if ( nStart == npos )
1028     {
1029         nStart = len - 1;
1030     }
1031     else
1032     {
1033         wxASSERT_MSG( nStart <= len, _T("invalid index") );
1034     }
1035
1036     size_t idx = nStart;
1037     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1038           i != rend(); --idx, ++i )
1039     {
1040         if ( !wxTmemchr(sz, *i, n) )
1041             return idx;
1042     }
1043
1044     return npos;
1045 }
1046
1047 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
1048 {
1049     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
1050
1051     size_t idx = nStart;
1052     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1053     {
1054         if ( *i != ch )
1055             return idx;
1056     }
1057
1058     return npos;
1059 }
1060
1061 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1062 {
1063     size_t len = length();
1064
1065     if ( nStart == npos )
1066     {
1067         nStart = len - 1;
1068     }
1069     else
1070     {
1071         wxASSERT_MSG( nStart <= len, _T("invalid index") );
1072     }
1073
1074     size_t idx = nStart;
1075     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1076           i != rend(); --idx, ++i )
1077     {
1078         if ( *i != ch )
1079             return idx;
1080     }
1081
1082     return npos;
1083 }
1084
1085 // the functions above were implemented for wchar_t* arguments in Unicode
1086 // build and char* in ANSI build; below are implementations for the other
1087 // version:
1088 #if wxUSE_UNICODE
1089     #define wxOtherCharType char
1090     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
1091 #else
1092     #define wxOtherCharType wchar_t
1093     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
1094 #endif
1095
1096 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1097     { return find_first_of(STRCONV(sz), nStart); }
1098
1099 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1100                                size_t n) const
1101     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1102 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1103     { return find_last_of(STRCONV(sz), nStart); }
1104 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1105                               size_t n) const
1106     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1107 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1108     { return find_first_not_of(STRCONV(sz), nStart); }
1109 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1110                                    size_t n) const
1111     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1112 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1113     { return find_last_not_of(STRCONV(sz), nStart); }
1114 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1115                                   size_t n) const
1116     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1117
1118 #undef wxOtherCharType
1119 #undef STRCONV
1120
1121 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1122
1123 // ===========================================================================
1124 // other common string functions
1125 // ===========================================================================
1126
1127 int wxString::CmpNoCase(const wxString& s) const
1128 {
1129 #if defined(__WXMSW__) && !wxUSE_UNICODE_UTF8
1130     // prefer to use CompareString() if available as it's more efficient than
1131     // doing it manual or even using wxStricmp() (see #10375)
1132     switch ( ::CompareString(LOCALE_USER_DEFAULT, NORM_IGNORECASE,
1133                              m_impl.c_str(), m_impl.length(),
1134                              s.m_impl.c_str(), s.m_impl.length()) )
1135     {
1136         case CSTR_LESS_THAN:
1137             return -1;
1138
1139         case CSTR_EQUAL:
1140             return 0;
1141
1142         case CSTR_GREATER_THAN:
1143             return 1;
1144
1145         default:
1146             wxFAIL_MSG( "unexpected CompareString() return value" );
1147             // fall through
1148
1149         case 0:
1150             wxLogLastError("CompareString");
1151             // use generic code below
1152     }
1153 #endif // __WXMSW__ && !wxUSE_UNICODE_UTF8
1154
1155     // do the comparison manually: notice that we can't use wxStricmp() as it
1156     // doesn't handle embedded NULs
1157
1158     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1159     const_iterator i1 = begin();
1160     const_iterator end1 = end();
1161     const_iterator i2 = s.begin();
1162     const_iterator end2 = s.end();
1163
1164     for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1165     {
1166         wxUniChar lower1 = (wxChar)wxTolower(*i1);
1167         wxUniChar lower2 = (wxChar)wxTolower(*i2);
1168         if ( lower1 != lower2 )
1169             return lower1 < lower2 ? -1 : 1;
1170     }
1171
1172     size_t len1 = length();
1173     size_t len2 = s.length();
1174
1175     if ( len1 < len2 )
1176         return -1;
1177     else if ( len1 > len2 )
1178         return 1;
1179     return 0;
1180 }
1181
1182
1183 #if wxUSE_UNICODE
1184
1185 #ifdef __MWERKS__
1186 #ifndef __SCHAR_MAX__
1187 #define __SCHAR_MAX__ 127
1188 #endif
1189 #endif
1190
1191 wxString wxString::FromAscii(const char *ascii, size_t len)
1192 {
1193     if (!ascii || len == 0)
1194        return wxEmptyString;
1195
1196     wxString res;
1197
1198     {
1199         wxStringInternalBuffer buf(res, len);
1200         wxStringCharType *dest = buf;
1201
1202         for ( ; len > 0; --len )
1203         {
1204             unsigned char c = (unsigned char)*ascii++;
1205             wxASSERT_MSG( c < 0x80,
1206                           _T("Non-ASCII value passed to FromAscii().") );
1207
1208             *dest++ = (wchar_t)c;
1209         }
1210     }
1211
1212     return res;
1213 }
1214
1215 wxString wxString::FromAscii(const char *ascii)
1216 {
1217     return FromAscii(ascii, wxStrlen(ascii));
1218 }
1219
1220 wxString wxString::FromAscii(char ascii)
1221 {
1222     // What do we do with '\0' ?
1223
1224     unsigned char c = (unsigned char)ascii;
1225
1226     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1227
1228     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1229     return wxString(wxUniChar((wchar_t)c));
1230 }
1231
1232 const wxScopedCharBuffer wxString::ToAscii() const
1233 {
1234     // this will allocate enough space for the terminating NUL too
1235     wxCharBuffer buffer(length());
1236     char *dest = buffer.data();
1237
1238     for ( const_iterator i = begin(); i != end(); ++i )
1239     {
1240         wxUniChar c(*i);
1241         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1242         *dest++ = c.IsAscii() ? (char)c : '_';
1243
1244         // the output string can't have embedded NULs anyhow, so we can safely
1245         // stop at first of them even if we do have any
1246         if ( !c )
1247             break;
1248     }
1249
1250     return buffer;
1251 }
1252
1253 #endif // wxUSE_UNICODE
1254
1255 // extract string of length nCount starting at nFirst
1256 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1257 {
1258     size_t nLen = length();
1259
1260     // default value of nCount is npos and means "till the end"
1261     if ( nCount == npos )
1262     {
1263         nCount = nLen - nFirst;
1264     }
1265
1266     // out-of-bounds requests return sensible things
1267     if ( nFirst + nCount > nLen )
1268     {
1269         nCount = nLen - nFirst;
1270     }
1271
1272     if ( nFirst > nLen )
1273     {
1274         // AllocCopy() will return empty string
1275         return wxEmptyString;
1276     }
1277
1278     wxString dest(*this, nFirst, nCount);
1279     if ( dest.length() != nCount )
1280     {
1281         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1282     }
1283
1284     return dest;
1285 }
1286
1287 // check that the string starts with prefix and return the rest of the string
1288 // in the provided pointer if it is not NULL, otherwise return false
1289 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1290 {
1291     if ( compare(0, prefix.length(), prefix) != 0 )
1292         return false;
1293
1294     if ( rest )
1295     {
1296         // put the rest of the string into provided pointer
1297         rest->assign(*this, prefix.length(), npos);
1298     }
1299
1300     return true;
1301 }
1302
1303
1304 // check that the string ends with suffix and return the rest of it in the
1305 // provided pointer if it is not NULL, otherwise return false
1306 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1307 {
1308     int start = length() - suffix.length();
1309
1310     if ( start < 0 || compare(start, npos, suffix) != 0 )
1311         return false;
1312
1313     if ( rest )
1314     {
1315         // put the rest of the string into provided pointer
1316         rest->assign(*this, 0, start);
1317     }
1318
1319     return true;
1320 }
1321
1322
1323 // extract nCount last (rightmost) characters
1324 wxString wxString::Right(size_t nCount) const
1325 {
1326   if ( nCount > length() )
1327     nCount = length();
1328
1329   wxString dest(*this, length() - nCount, nCount);
1330   if ( dest.length() != nCount ) {
1331     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1332   }
1333   return dest;
1334 }
1335
1336 // get all characters after the last occurrence of ch
1337 // (returns the whole string if ch not found)
1338 wxString wxString::AfterLast(wxUniChar ch) const
1339 {
1340   wxString str;
1341   int iPos = Find(ch, true);
1342   if ( iPos == wxNOT_FOUND )
1343     str = *this;
1344   else
1345     str.assign(*this, iPos + 1, npos);
1346
1347   return str;
1348 }
1349
1350 // extract nCount first (leftmost) characters
1351 wxString wxString::Left(size_t nCount) const
1352 {
1353   if ( nCount > length() )
1354     nCount = length();
1355
1356   wxString dest(*this, 0, nCount);
1357   if ( dest.length() != nCount ) {
1358     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1359   }
1360   return dest;
1361 }
1362
1363 // get all characters before the first occurrence of ch
1364 // (returns the whole string if ch not found)
1365 wxString wxString::BeforeFirst(wxUniChar ch) const
1366 {
1367   int iPos = Find(ch);
1368   if ( iPos == wxNOT_FOUND )
1369       iPos = length();
1370   return wxString(*this, 0, iPos);
1371 }
1372
1373 /// get all characters before the last occurrence of ch
1374 /// (returns empty string if ch not found)
1375 wxString wxString::BeforeLast(wxUniChar ch) const
1376 {
1377   wxString str;
1378   int iPos = Find(ch, true);
1379   if ( iPos != wxNOT_FOUND && iPos != 0 )
1380     str = wxString(c_str(), iPos);
1381
1382   return str;
1383 }
1384
1385 /// get all characters after the first occurrence of ch
1386 /// (returns empty string if ch not found)
1387 wxString wxString::AfterFirst(wxUniChar ch) const
1388 {
1389   wxString str;
1390   int iPos = Find(ch);
1391   if ( iPos != wxNOT_FOUND )
1392       str.assign(*this, iPos + 1, npos);
1393
1394   return str;
1395 }
1396
1397 // replace first (or all) occurrences of some substring with another one
1398 size_t wxString::Replace(const wxString& strOld,
1399                          const wxString& strNew, bool bReplaceAll)
1400 {
1401     // if we tried to replace an empty string we'd enter an infinite loop below
1402     wxCHECK_MSG( !strOld.empty(), 0,
1403                  _T("wxString::Replace(): invalid parameter") );
1404
1405     wxSTRING_INVALIDATE_CACHE();
1406
1407     size_t uiCount = 0;   // count of replacements made
1408
1409     // optimize the special common case: replacement of one character by
1410     // another one (in UTF-8 case we can only do this for ASCII characters)
1411     //
1412     // benchmarks show that this special version is around 3 times faster
1413     // (depending on the proportion of matching characters and UTF-8/wchar_t
1414     // build)
1415     if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1416     {
1417         const wxStringCharType chOld = strOld.m_impl[0],
1418                                chNew = strNew.m_impl[0];
1419
1420         // this loop is the simplified version of the one below
1421         for ( size_t pos = 0; ; )
1422         {
1423             pos = m_impl.find(chOld, pos);
1424             if ( pos == npos )
1425                 break;
1426
1427             m_impl[pos++] = chNew;
1428
1429             uiCount++;
1430
1431             if ( !bReplaceAll )
1432                 break;
1433         }
1434     }
1435     else if ( !bReplaceAll)
1436     {
1437         size_t pos = m_impl.find(strOld, 0);
1438         if ( pos != npos )
1439         {
1440             m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl);
1441             uiCount = 1;
1442         }
1443     }
1444     else // replace all occurrences
1445     {
1446         const size_t uiOldLen = strOld.m_impl.length();
1447         const size_t uiNewLen = strNew.m_impl.length();
1448
1449         // first scan the string to find all positions at which the replacement
1450         // should be made
1451         wxVector<size_t> replacePositions;
1452
1453         size_t pos;
1454         for ( pos = m_impl.find(strOld.m_impl, 0);
1455               pos != npos;
1456               pos = m_impl.find(strOld.m_impl, pos + uiOldLen))
1457         {
1458             replacePositions.push_back(pos);
1459             ++uiCount;
1460         }
1461
1462         if ( !uiCount )
1463             return 0;
1464
1465         // allocate enough memory for the whole new string
1466         wxString tmp;
1467         tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen));
1468
1469         // copy this string to tmp doing replacements on the fly
1470         size_t replNum = 0;
1471         for ( pos = 0; replNum < uiCount; replNum++ )
1472         {
1473             const size_t nextReplPos = replacePositions[replNum];
1474
1475             if ( pos != nextReplPos )
1476             {
1477                 tmp.m_impl.append(m_impl, pos, nextReplPos - pos);
1478             }
1479
1480             tmp.m_impl.append(strNew.m_impl);
1481             pos = nextReplPos + uiOldLen;
1482         }
1483
1484         if ( pos != m_impl.length() )
1485         {
1486             // append the rest of the string unchanged
1487             tmp.m_impl.append(m_impl, pos, m_impl.length() - pos);
1488         }
1489
1490         swap(tmp);
1491     }
1492
1493     return uiCount;
1494 }
1495
1496 bool wxString::IsAscii() const
1497 {
1498     for ( const_iterator i = begin(); i != end(); ++i )
1499     {
1500         if ( !(*i).IsAscii() )
1501             return false;
1502     }
1503
1504     return true;
1505 }
1506
1507 bool wxString::IsWord() const
1508 {
1509     for ( const_iterator i = begin(); i != end(); ++i )
1510     {
1511         if ( !wxIsalpha(*i) )
1512             return false;
1513     }
1514
1515     return true;
1516 }
1517
1518 bool wxString::IsNumber() const
1519 {
1520     if ( empty() )
1521         return true;
1522
1523     const_iterator i = begin();
1524
1525     if ( *i == _T('-') || *i == _T('+') )
1526         ++i;
1527
1528     for ( ; i != end(); ++i )
1529     {
1530         if ( !wxIsdigit(*i) )
1531             return false;
1532     }
1533
1534     return true;
1535 }
1536
1537 wxString wxString::Strip(stripType w) const
1538 {
1539     wxString s = *this;
1540     if ( w & leading ) s.Trim(false);
1541     if ( w & trailing ) s.Trim(true);
1542     return s;
1543 }
1544
1545 // ---------------------------------------------------------------------------
1546 // case conversion
1547 // ---------------------------------------------------------------------------
1548
1549 wxString& wxString::MakeUpper()
1550 {
1551   for ( iterator it = begin(), en = end(); it != en; ++it )
1552     *it = (wxChar)wxToupper(*it);
1553
1554   return *this;
1555 }
1556
1557 wxString& wxString::MakeLower()
1558 {
1559   for ( iterator it = begin(), en = end(); it != en; ++it )
1560     *it = (wxChar)wxTolower(*it);
1561
1562   return *this;
1563 }
1564
1565 wxString& wxString::MakeCapitalized()
1566 {
1567     const iterator en = end();
1568     iterator it = begin();
1569     if ( it != en )
1570     {
1571         *it = (wxChar)wxToupper(*it);
1572         for ( ++it; it != en; ++it )
1573             *it = (wxChar)wxTolower(*it);
1574     }
1575
1576     return *this;
1577 }
1578
1579 // ---------------------------------------------------------------------------
1580 // trimming and padding
1581 // ---------------------------------------------------------------------------
1582
1583 // some compilers (VC++ 6.0 not to name them) return true for a call to
1584 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1585 // to live with this by checking that the character is a 7 bit one - even if
1586 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1587 // space-like symbols somewhere except in the first 128 chars), it is arguably
1588 // still better than trimming away accented letters
1589 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1590
1591 // trims spaces (in the sense of isspace) from left or right side
1592 wxString& wxString::Trim(bool bFromRight)
1593 {
1594     // first check if we're going to modify the string at all
1595     if ( !empty() &&
1596          (
1597           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1598           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1599          )
1600        )
1601     {
1602         if ( bFromRight )
1603         {
1604             // find last non-space character
1605             reverse_iterator psz = rbegin();
1606             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1607                 ++psz;
1608
1609             // truncate at trailing space start
1610             erase(psz.base(), end());
1611         }
1612         else
1613         {
1614             // find first non-space character
1615             iterator psz = begin();
1616             while ( (psz != end()) && wxSafeIsspace(*psz) )
1617                 ++psz;
1618
1619             // fix up data and length
1620             erase(begin(), psz);
1621         }
1622     }
1623
1624     return *this;
1625 }
1626
1627 // adds nCount characters chPad to the string from either side
1628 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1629 {
1630     wxString s(chPad, nCount);
1631
1632     if ( bFromRight )
1633         *this += s;
1634     else
1635     {
1636         s += *this;
1637         swap(s);
1638     }
1639
1640     return *this;
1641 }
1642
1643 // truncate the string
1644 wxString& wxString::Truncate(size_t uiLen)
1645 {
1646     if ( uiLen < length() )
1647     {
1648         erase(begin() + uiLen, end());
1649     }
1650     //else: nothing to do, string is already short enough
1651
1652     return *this;
1653 }
1654
1655 // ---------------------------------------------------------------------------
1656 // finding (return wxNOT_FOUND if not found and index otherwise)
1657 // ---------------------------------------------------------------------------
1658
1659 // find a character
1660 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1661 {
1662     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1663
1664     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1665 }
1666
1667 // ----------------------------------------------------------------------------
1668 // conversion to numbers
1669 // ----------------------------------------------------------------------------
1670
1671 // The implementation of all the functions below is exactly the same so factor
1672 // it out. Note that number extraction works correctly on UTF-8 strings, so
1673 // we can use wxStringCharType and wx_str() for maximum efficiency.
1674
1675 #ifndef __WXWINCE__
1676     #define DO_IF_NOT_WINCE(x) x
1677 #else
1678     #define DO_IF_NOT_WINCE(x)
1679 #endif
1680
1681 #define WX_STRING_TO_X_TYPE_START                                           \
1682     wxCHECK_MSG( pVal, false, _T("NULL output pointer") );                  \
1683     DO_IF_NOT_WINCE( errno = 0; )                                           \
1684     const wxStringCharType *start = wx_str();                               \
1685     wxStringCharType *end;
1686
1687 #define WX_STRING_TO_X_TYPE_END                                             \
1688     /* return true only if scan was stopped by the terminating NUL and */   \
1689     /* if the string was not empty to start with and no under/overflow */   \
1690     /* occurred: */                                                         \
1691     if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )         \
1692         return false;                                                       \
1693     *pVal = val;                                                            \
1694     return true;
1695
1696 bool wxString::ToLong(long *pVal, int base) const
1697 {
1698     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1699
1700     WX_STRING_TO_X_TYPE_START
1701     long val = wxStrtol(start, &end, base);
1702     WX_STRING_TO_X_TYPE_END
1703 }
1704
1705 bool wxString::ToULong(unsigned long *pVal, int base) const
1706 {
1707     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1708
1709     WX_STRING_TO_X_TYPE_START
1710     unsigned long val = wxStrtoul(start, &end, base);
1711     WX_STRING_TO_X_TYPE_END
1712 }
1713
1714 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1715 {
1716     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1717
1718     WX_STRING_TO_X_TYPE_START
1719     wxLongLong_t val = wxStrtoll(start, &end, base);
1720     WX_STRING_TO_X_TYPE_END
1721 }
1722
1723 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1724 {
1725     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1726
1727     WX_STRING_TO_X_TYPE_START
1728     wxULongLong_t val = wxStrtoull(start, &end, base);
1729     WX_STRING_TO_X_TYPE_END
1730 }
1731
1732 bool wxString::ToDouble(double *pVal) const
1733 {
1734     WX_STRING_TO_X_TYPE_START
1735     double val = wxStrtod(start, &end);
1736     WX_STRING_TO_X_TYPE_END
1737 }
1738
1739 #if wxUSE_XLOCALE
1740
1741 bool wxString::ToCLong(long *pVal, int base) const
1742 {
1743     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1744
1745     WX_STRING_TO_X_TYPE_START
1746 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1747     long val = wxStrtol_lA(start, &end, base, wxCLocale);
1748 #else
1749     long val = wxStrtol_l(start, &end, base, wxCLocale);
1750 #endif
1751     WX_STRING_TO_X_TYPE_END
1752 }
1753
1754 bool wxString::ToCULong(unsigned long *pVal, int base) const
1755 {
1756     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1757
1758     WX_STRING_TO_X_TYPE_START
1759 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1760     unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale);
1761 #else
1762     unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale);
1763 #endif
1764     WX_STRING_TO_X_TYPE_END
1765 }
1766
1767 bool wxString::ToCDouble(double *pVal) const
1768 {
1769     WX_STRING_TO_X_TYPE_START
1770 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1771     double val = wxStrtod_lA(start, &end, wxCLocale);
1772 #else
1773     double val = wxStrtod_l(start, &end, wxCLocale);
1774 #endif
1775     WX_STRING_TO_X_TYPE_END
1776 }
1777
1778 #endif  // wxUSE_XLOCALE
1779
1780 // ---------------------------------------------------------------------------
1781 // formatted output
1782 // ---------------------------------------------------------------------------
1783
1784 #if !wxUSE_UTF8_LOCALE_ONLY
1785 /* static */
1786 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1787 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1788 #else
1789 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1790 #endif
1791 {
1792     va_list argptr;
1793     va_start(argptr, format);
1794
1795     wxString s;
1796     s.PrintfV(format, argptr);
1797
1798     va_end(argptr);
1799
1800     return s;
1801 }
1802 #endif // !wxUSE_UTF8_LOCALE_ONLY
1803
1804 #if wxUSE_UNICODE_UTF8
1805 /* static */
1806 wxString wxString::DoFormatUtf8(const char *format, ...)
1807 {
1808     va_list argptr;
1809     va_start(argptr, format);
1810
1811     wxString s;
1812     s.PrintfV(format, argptr);
1813
1814     va_end(argptr);
1815
1816     return s;
1817 }
1818 #endif // wxUSE_UNICODE_UTF8
1819
1820 /* static */
1821 wxString wxString::FormatV(const wxString& format, va_list argptr)
1822 {
1823     wxString s;
1824     s.PrintfV(format, argptr);
1825     return s;
1826 }
1827
1828 #if !wxUSE_UTF8_LOCALE_ONLY
1829 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1830 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1831 #else
1832 int wxString::DoPrintfWchar(const wxChar *format, ...)
1833 #endif
1834 {
1835     va_list argptr;
1836     va_start(argptr, format);
1837
1838 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1839     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1840     // because it's the only cast that works safely for downcasting when
1841     // multiple inheritance is used:
1842     wxString *str = static_cast<wxString*>(this);
1843 #else
1844     wxString *str = this;
1845 #endif
1846
1847     int iLen = str->PrintfV(format, argptr);
1848
1849     va_end(argptr);
1850
1851     return iLen;
1852 }
1853 #endif // !wxUSE_UTF8_LOCALE_ONLY
1854
1855 #if wxUSE_UNICODE_UTF8
1856 int wxString::DoPrintfUtf8(const char *format, ...)
1857 {
1858     va_list argptr;
1859     va_start(argptr, format);
1860
1861     int iLen = PrintfV(format, argptr);
1862
1863     va_end(argptr);
1864
1865     return iLen;
1866 }
1867 #endif // wxUSE_UNICODE_UTF8
1868
1869 /*
1870     Uses wxVsnprintf and places the result into the this string.
1871
1872     In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1873     it is vswprintf.  Due to a discrepancy between vsnprintf and vswprintf in
1874     the ISO C99 (and thus SUSv3) standard the return value for the case of
1875     an undersized buffer is inconsistent.  For conforming vsnprintf
1876     implementations the function must return the number of characters that
1877     would have been printed had the buffer been large enough.  For conforming
1878     vswprintf implementations the function must return a negative number
1879     and set errno.
1880
1881     What vswprintf sets errno to is undefined but Darwin seems to set it to
1882     EOVERFLOW.  The only expected errno are EILSEQ and EINVAL.  Both of
1883     those are defined in the standard and backed up by several conformance
1884     statements.  Note that ENOMEM mentioned in the manual page does not
1885     apply to swprintf, only wprintf and fwprintf.
1886
1887     Official manual page:
1888     http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1889
1890     Some conformance statements (AIX, Solaris):
1891     http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1892     http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1893
1894     Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1895     EILSEQ and EINVAL are specifically defined to mean the error is other than
1896     an undersized buffer and no other errno are defined we treat those two
1897     as meaning hard errors and everything else gets the old behavior which
1898     is to keep looping and increasing buffer size until the function succeeds.
1899
1900     In practice it's impossible to determine before compilation which behavior
1901     may be used.  The vswprintf function may have vsnprintf-like behavior or
1902     vice-versa.  Behavior detected on one release can theoretically change
1903     with an updated release.  Not to mention that configure testing for it
1904     would require the test to be run on the host system, not the build system
1905     which makes cross compilation difficult. Therefore, we make no assumptions
1906     about behavior and try our best to handle every known case, including the
1907     case where wxVsnprintf returns a negative number and fails to set errno.
1908
1909     There is yet one more non-standard implementation and that is our own.
1910     Fortunately, that can be detected at compile-time.
1911
1912     On top of all that, ISO C99 explicitly defines snprintf to write a null
1913     character to the last position of the specified buffer.  That would be at
1914     at the given buffer size minus 1.  It is supposed to do this even if it
1915     turns out that the buffer is sized too small.
1916
1917     Darwin (tested on 10.5) follows the C99 behavior exactly.
1918
1919     Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1920     errno even when it fails.  However, it only seems to ever fail due
1921     to an undersized buffer.
1922 */
1923 #if wxUSE_UNICODE_UTF8
1924 template<typename BufferType>
1925 #else
1926 // we only need one version in non-UTF8 builds and at least two Windows
1927 // compilers have problems with this function template, so use just one
1928 // normal function here
1929 #endif
1930 static int DoStringPrintfV(wxString& str,
1931                            const wxString& format, va_list argptr)
1932 {
1933     int size = 1024;
1934
1935     for ( ;; )
1936     {
1937 #if wxUSE_UNICODE_UTF8
1938         BufferType tmp(str, size + 1);
1939         typename BufferType::CharType *buf = tmp;
1940 #else
1941         wxStringBuffer tmp(str, size + 1);
1942         wxChar *buf = tmp;
1943 #endif
1944
1945         if ( !buf )
1946         {
1947             // out of memory
1948
1949             // in UTF-8 build, leaving uninitialized junk in the buffer
1950             // could result in invalid non-empty UTF-8 string, so just
1951             // reset the string to empty on failure:
1952             buf[0] = '\0';
1953             return -1;
1954         }
1955
1956         // wxVsnprintf() may modify the original arg pointer, so pass it
1957         // only a copy
1958         va_list argptrcopy;
1959         wxVaCopy(argptrcopy, argptr);
1960
1961 #ifndef __WXWINCE__
1962         // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1963         errno = 0;
1964 #endif
1965         int len = wxVsnprintf(buf, size, format, argptrcopy);
1966         va_end(argptrcopy);
1967
1968         // some implementations of vsnprintf() don't NUL terminate
1969         // the string if there is not enough space for it so
1970         // always do it manually
1971         // FIXME: This really seems to be the wrong and would be an off-by-one
1972         // bug except the code above allocates an extra character.
1973         buf[size] = _T('\0');
1974
1975         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1976         // total number of characters which would have been written if the
1977         // buffer were large enough (newer standards such as Unix98)
1978         if ( len < 0 )
1979         {
1980             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1981             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1982             //     is true if *both* of them use our own implementation,
1983             //     otherwise we can't be sure
1984 #if wxUSE_WXVSNPRINTF
1985             // we know that our own implementation of wxVsnprintf() returns -1
1986             // only for a format error - thus there's something wrong with
1987             // the user's format string
1988             buf[0] = '\0';
1989             return -1;
1990 #else // possibly using system version
1991             // assume it only returns error if there is not enough space, but
1992             // as we don't know how much we need, double the current size of
1993             // the buffer
1994 #ifndef __WXWINCE__
1995             if( (errno == EILSEQ) || (errno == EINVAL) )
1996             // If errno was set to one of the two well-known hard errors
1997             // then fail immediately to avoid an infinite loop.
1998                 return -1;
1999             else
2000 #endif // __WXWINCE__
2001             // still not enough, as we don't know how much we need, double the
2002             // current size of the buffer
2003                 size *= 2;
2004 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
2005         }
2006         else if ( len >= size )
2007         {
2008 #if wxUSE_WXVSNPRINTF
2009             // we know that our own implementation of wxVsnprintf() returns
2010             // size+1 when there's not enough space but that's not the size
2011             // of the required buffer!
2012             size *= 2;      // so we just double the current size of the buffer
2013 #else
2014             // some vsnprintf() implementations NUL-terminate the buffer and
2015             // some don't in len == size case, to be safe always add 1
2016             // FIXME: I don't quite understand this comment.  The vsnprintf
2017             // function is specifically defined to return the number of
2018             // characters printed not including the null terminator.
2019             // So OF COURSE you need to add 1 to get the right buffer size.
2020             // The following line is definitely correct, no question.
2021             size = len + 1;
2022 #endif
2023         }
2024         else // ok, there was enough space
2025         {
2026             break;
2027         }
2028     }
2029
2030     // we could have overshot
2031     str.Shrink();
2032
2033     return str.length();
2034 }
2035
2036 int wxString::PrintfV(const wxString& format, va_list argptr)
2037 {
2038 #if wxUSE_UNICODE_UTF8
2039     #if wxUSE_STL_BASED_WXSTRING
2040         typedef wxStringTypeBuffer<char> Utf8Buffer;
2041     #else
2042         typedef wxStringInternalBuffer Utf8Buffer;
2043     #endif
2044 #endif
2045
2046 #if wxUSE_UTF8_LOCALE_ONLY
2047     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2048 #else
2049     #if wxUSE_UNICODE_UTF8
2050     if ( wxLocaleIsUtf8 )
2051         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2052     else
2053         // wxChar* version
2054         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2055     #else
2056         return DoStringPrintfV(*this, format, argptr);
2057     #endif // UTF8/WCHAR
2058 #endif
2059 }
2060
2061 // ----------------------------------------------------------------------------
2062 // misc other operations
2063 // ----------------------------------------------------------------------------
2064
2065 // returns true if the string matches the pattern which may contain '*' and
2066 // '?' metacharacters (as usual, '?' matches any character and '*' any number
2067 // of them)
2068 bool wxString::Matches(const wxString& mask) const
2069 {
2070     // I disable this code as it doesn't seem to be faster (in fact, it seems
2071     // to be much slower) than the old, hand-written code below and using it
2072     // here requires always linking with libregex even if the user code doesn't
2073     // use it
2074 #if 0 // wxUSE_REGEX
2075     // first translate the shell-like mask into a regex
2076     wxString pattern;
2077     pattern.reserve(wxStrlen(pszMask));
2078
2079     pattern += _T('^');
2080     while ( *pszMask )
2081     {
2082         switch ( *pszMask )
2083         {
2084             case _T('?'):
2085                 pattern += _T('.');
2086                 break;
2087
2088             case _T('*'):
2089                 pattern += _T(".*");
2090                 break;
2091
2092             case _T('^'):
2093             case _T('.'):
2094             case _T('$'):
2095             case _T('('):
2096             case _T(')'):
2097             case _T('|'):
2098             case _T('+'):
2099             case _T('\\'):
2100                 // these characters are special in a RE, quote them
2101                 // (however note that we don't quote '[' and ']' to allow
2102                 // using them for Unix shell like matching)
2103                 pattern += _T('\\');
2104                 // fall through
2105
2106             default:
2107                 pattern += *pszMask;
2108         }
2109
2110         pszMask++;
2111     }
2112     pattern += _T('$');
2113
2114     // and now use it
2115     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2116 #else // !wxUSE_REGEX
2117   // TODO: this is, of course, awfully inefficient...
2118
2119   // FIXME-UTF8: implement using iterators, remove #if
2120 #if wxUSE_UNICODE_UTF8
2121   const wxScopedWCharBuffer maskBuf = mask.wc_str();
2122   const wxScopedWCharBuffer txtBuf = wc_str();
2123   const wxChar *pszMask = maskBuf.data();
2124   const wxChar *pszTxt = txtBuf.data();
2125 #else
2126   const wxChar *pszMask = mask.wx_str();
2127   // the char currently being checked
2128   const wxChar *pszTxt = wx_str();
2129 #endif
2130
2131   // the last location where '*' matched
2132   const wxChar *pszLastStarInText = NULL;
2133   const wxChar *pszLastStarInMask = NULL;
2134
2135 match:
2136   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
2137     switch ( *pszMask ) {
2138       case wxT('?'):
2139         if ( *pszTxt == wxT('\0') )
2140           return false;
2141
2142         // pszTxt and pszMask will be incremented in the loop statement
2143
2144         break;
2145
2146       case wxT('*'):
2147         {
2148           // remember where we started to be able to backtrack later
2149           pszLastStarInText = pszTxt;
2150           pszLastStarInMask = pszMask;
2151
2152           // ignore special chars immediately following this one
2153           // (should this be an error?)
2154           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
2155             pszMask++;
2156
2157           // if there is nothing more, match
2158           if ( *pszMask == wxT('\0') )
2159             return true;
2160
2161           // are there any other metacharacters in the mask?
2162           size_t uiLenMask;
2163           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2164
2165           if ( pEndMask != NULL ) {
2166             // we have to match the string between two metachars
2167             uiLenMask = pEndMask - pszMask;
2168           }
2169           else {
2170             // we have to match the remainder of the string
2171             uiLenMask = wxStrlen(pszMask);
2172           }
2173
2174           wxString strToMatch(pszMask, uiLenMask);
2175           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2176           if ( pMatch == NULL )
2177             return false;
2178
2179           // -1 to compensate "++" in the loop
2180           pszTxt = pMatch + uiLenMask - 1;
2181           pszMask += uiLenMask - 1;
2182         }
2183         break;
2184
2185       default:
2186         if ( *pszMask != *pszTxt )
2187           return false;
2188         break;
2189     }
2190   }
2191
2192   // match only if nothing left
2193   if ( *pszTxt == wxT('\0') )
2194     return true;
2195
2196   // if we failed to match, backtrack if we can
2197   if ( pszLastStarInText ) {
2198     pszTxt = pszLastStarInText + 1;
2199     pszMask = pszLastStarInMask;
2200
2201     pszLastStarInText = NULL;
2202
2203     // don't bother resetting pszLastStarInMask, it's unnecessary
2204
2205     goto match;
2206   }
2207
2208   return false;
2209 #endif // wxUSE_REGEX/!wxUSE_REGEX
2210 }
2211
2212 // Count the number of chars
2213 int wxString::Freq(wxUniChar ch) const
2214 {
2215     int count = 0;
2216     for ( const_iterator i = begin(); i != end(); ++i )
2217     {
2218         if ( *i == ch )
2219             count ++;
2220     }
2221     return count;
2222 }
2223