src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27     #include "wx/log.h"
  28 #endif
  29
  30 #include <ctype.h>
  31
  32 #ifndef __WXWINCE__
  33     #include <errno.h>
  34 #endif
  35
  36 #include <string.h>
  37 #include <stdlib.h>
  38
  39 #include "wx/hashmap.h"
  40 #include "wx/vector.h"
  41 #include "wx/xlocale.h"
  42
  43 #ifdef __WXMSW__
  44     #include "wx/msw/wrapwin.h"
  45 #endif // __WXMSW__
  46
  47 // string handling functions used by wxString:
  48 #if wxUSE_UNICODE_UTF8
  49     #define wxStringMemcpy   memcpy
  50     #define wxStringMemcmp   memcmp
  51     #define wxStringMemchr   memchr
  52     #define wxStringStrlen   strlen
  53 #else
  54     #define wxStringMemcpy   wxTmemcpy
  55     #define wxStringMemcmp   wxTmemcmp
  56     #define wxStringMemchr   wxTmemchr
  57     #define wxStringStrlen   wxStrlen
  58 #endif
  59
  60 // ----------------------------------------------------------------------------
  61 // global variables
  62 // ----------------------------------------------------------------------------
  63
  64 namespace wxPrivate
  65 {
  66
  67 static UntypedBufferData s_untypedNullData(NULL, 0);
  68
  69 UntypedBufferData * const untypedNullDataPtr = &s_untypedNullData;
  70
  71 } // namespace wxPrivate
  72
  73 // ---------------------------------------------------------------------------
  74 // static class variables definition
  75 // ---------------------------------------------------------------------------
  76
  77 //According to STL _must_ be a -1 size_t
  78 const size_t wxString::npos = (size_t) -1;
  79
  80 #if wxUSE_STRING_POS_CACHE
  81
  82 #ifdef wxHAS_COMPILER_TLS
  83
  84 wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
  85
  86 #else // !wxHAS_COMPILER_TLS
  87
  88 struct wxStrCacheInitializer
  89 {
  90     wxStrCacheInitializer()
  91     {
  92         // calling this function triggers s_cache initialization in it, and
  93         // from now on it becomes safe to call from multiple threads
  94         wxString::GetCache();
  95     }
  96 };
  97
  98 /*
  99 wxString::Cache& wxString::GetCache()
 100 {
 101     static wxTLS_TYPE(Cache) s_cache;
 102
 103     return wxTLS_VALUE(s_cache);
 104 }
 105 */
 106
 107 static wxStrCacheInitializer gs_stringCacheInit;
 108
 109 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
 110
 111 // gdb seems to be unable to display thread-local variables correctly, at least
 112 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
 113 #if wxDEBUG_LEVEL >= 2
 114
 115 struct wxStrCacheDumper
 116 {
 117     static void ShowAll()
 118     {
 119         puts("*** wxString cache dump:");
 120         for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
 121         {
 122             const wxString::Cache::Element&
 123                 c = wxString::GetCacheBegin()[n];
 124
 125             printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
 126                    n,
 127                    n == wxString::LastUsedCacheElement() ? " [*]" : "",
 128                    c.str,
 129                    (unsigned long)c.pos,
 130                    (unsigned long)c.impl,
 131                    (long)c.len);
 132         }
 133     }
 134 };
 135
 136 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
 137
 138 #endif // wxDEBUG_LEVEL >= 2
 139
 140 #ifdef wxPROFILE_STRING_CACHE
 141
 142 wxString::CacheStats wxString::ms_cacheStats;
 143
 144 struct wxStrCacheStatsDumper
 145 {
 146     ~wxStrCacheStatsDumper()
 147     {
 148         const wxString::CacheStats& stats = wxString::ms_cacheStats;
 149
 150         if ( stats.postot )
 151         {
 152             puts("*** wxString cache statistics:");
 153             printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
 154                    stats.postot);
 155             printf("\tHits %u (of which %u not used) or %.2f%%\n",
 156                    stats.poshits,
 157                    stats.mishits,
 158                    100.*float(stats.poshits - stats.mishits)/stats.postot);
 159             printf("\tAverage position requested: %.2f\n",
 160                    float(stats.sumpos) / stats.postot);
 161             printf("\tAverage offset after cached hint: %.2f\n",
 162                    float(stats.sumofs) / stats.postot);
 163         }
 164
 165         if ( stats.lentot )
 166         {
 167             printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
 168                    stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
 169         }
 170     }
 171 };
 172
 173 static wxStrCacheStatsDumper s_showCacheStats;
 174
 175 #endif // wxPROFILE_STRING_CACHE
 176
 177 #endif // wxUSE_STRING_POS_CACHE
 178
 179 // ----------------------------------------------------------------------------
 180 // global functions
 181 // ----------------------------------------------------------------------------
 182
 183 #if wxUSE_STD_IOSTREAM
 184
 185 #include <iostream>
 186
 187 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
 188 {
 189 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
 190     const wxScopedCharBuffer buf(str.AsCharBuf());
 191     if ( !buf )
 192         os.clear(wxSTD ios_base::failbit);
 193     else
 194         os << buf.data();
 195
 196     return os;
 197 #else
 198     return os << str.AsInternal();
 199 #endif
 200 }
 201
 202 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
 203 {
 204     return os << str.c_str();
 205 }
 206
 207 wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str)
 208 {
 209     return os << str.data();
 210 }
 211
 212 #ifndef __BORLANDC__
 213 wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str)
 214 {
 215     return os << str.data();
 216 }
 217 #endif
 218
 219 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 220
 221 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
 222 {
 223     return wos << str.wc_str();
 224 }
 225
 226 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
 227 {
 228     return wos << str.AsWChar();
 229 }
 230
 231 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str)
 232 {
 233     return wos << str.data();
 234 }
 235
 236 #endif  // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 237
 238 #endif // wxUSE_STD_IOSTREAM
 239
 240 // ===========================================================================
 241 // wxString class core
 242 // ===========================================================================
 243
 244 #if wxUSE_UNICODE_UTF8
 245
 246 void wxString::PosLenToImpl(size_t pos, size_t len,
 247                             size_t *implPos, size_t *implLen) const
 248 {
 249     if ( pos == npos )
 250     {
 251         *implPos = npos;
 252     }
 253     else // have valid start position
 254     {
 255         const const_iterator b = GetIterForNthChar(pos);
 256         *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
 257         if ( len == npos )
 258         {
 259             *implLen = npos;
 260         }
 261         else // have valid length too
 262         {
 263             // we need to handle the case of length specifying a substring
 264             // going beyond the end of the string, just as std::string does
 265             const const_iterator e(end());
 266             const_iterator i(b);
 267             while ( len && i <= e )
 268             {
 269                 ++i;
 270                 --len;
 271             }
 272
 273             *implLen = i.impl() - b.impl();
 274         }
 275     }
 276 }
 277
 278 #endif // wxUSE_UNICODE_UTF8
 279
 280 // ----------------------------------------------------------------------------
 281 // wxCStrData converted strings caching
 282 // ----------------------------------------------------------------------------
 283
 284 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 285 //             string objects; re-enable after fixing this bug and benchmarking
 286 //             performance to see if using a hash is a good idea at all
 287 #if 0
 288
 289 // For backward compatibility reasons, it must be possible to assign the value
 290 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 291 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 292 // because the memory would be freed immediately, but it has to be valid as long
 293 // as the string is not modified, so that code like this still works:
 294 //
 295 // const wxChar *s = str.c_str();
 296 // while ( s ) { ... }
 297
 298 // FIXME-UTF8: not thread safe!
 299 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 300 //             destroyed, but we should do it when the string is modified, to
 301 //             keep memory usage down
 302 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 303 //             invalidated the cache on every change, we could keep the previous
 304 //             conversion
 305 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 306 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 307
 308 template<typename T>
 309 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 310 {
 311     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 312     if ( i != hash.end() )
 313     {
 314         free(i->second);
 315         hash.erase(i);
 316     }
 317 }
 318
 319 #if wxUSE_UNICODE
 320 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 321 //     so we have to use wxString* here and const-cast when used
 322 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 323                     wxStringCharConversionCache);
 324 static wxStringCharConversionCache gs_stringsCharCache;
 325
 326 const char* wxCStrData::AsChar() const
 327 {
 328     // remove previously cache value, if any (see FIXMEs above):
 329     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 330
 331     // convert the string and keep it:
 332     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 333         m_str->mb_str().release();
 334
 335     return s + m_offset;
 336 }
 337 #endif // wxUSE_UNICODE
 338
 339 #if !wxUSE_UNICODE_WCHAR
 340 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 341                     wxStringWCharConversionCache);
 342 static wxStringWCharConversionCache gs_stringsWCharCache;
 343
 344 const wchar_t* wxCStrData::AsWChar() const
 345 {
 346     // remove previously cache value, if any (see FIXMEs above):
 347     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 348
 349     // convert the string and keep it:
 350     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 351         m_str->wc_str().release();
 352
 353     return s + m_offset;
 354 }
 355 #endif // !wxUSE_UNICODE_WCHAR
 356
 357 wxString::~wxString()
 358 {
 359 #if wxUSE_UNICODE
 360     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 361     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 362 #endif
 363 #if !wxUSE_UNICODE_WCHAR
 364     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 365 #endif
 366 }
 367 #endif
 368
 369 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 370 const char* wxCStrData::AsChar() const
 371 {
 372 #if wxUSE_UNICODE_UTF8
 373     if ( wxLocaleIsUtf8 )
 374         return AsInternal();
 375 #endif
 376     // under non-UTF8 locales, we have to convert the internal UTF-8
 377     // representation using wxConvLibc and cache the result
 378
 379     wxString *str = wxConstCast(m_str, wxString);
 380
 381     // convert the string:
 382     //
 383     // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
 384     //             have it) but it's unfortunately not obvious to implement
 385     //             because we don't know how big buffer do we need for the
 386     //             given string length (in case of multibyte encodings, e.g.
 387     //             ISO-2022-JP or UTF-8 when internal representation is wchar_t)
 388     //
 389     //             One idea would be to store more than just m_convertedToChar
 390     //             in wxString: then we could record the length of the string
 391     //             which was converted the last time and try to reuse the same
 392     //             buffer if the current length is not greater than it (this
 393     //             could still fail because string could have been modified in
 394     //             place but it would work most of the time, so we'd do it and
 395     //             only allocate the new buffer if in-place conversion returned
 396     //             an error). We could also store a bit saying if the string
 397     //             was modified since the last conversion (and update it in all
 398     //             operation modifying the string, of course) to avoid unneeded
 399     //             consequential conversions. But both of these ideas require
 400     //             adding more fields to wxString and require profiling results
 401     //             to be sure that we really gain enough from them to justify
 402     //             doing it.
 403     wxScopedCharBuffer buf(str->mb_str());
 404
 405     // if it failed, return empty string and not NULL to avoid crashes in code
 406     // written with either wxWidgets 2 wxString or std::string behaviour in
 407     // mind: neither of them ever returns NULL and so we shouldn't neither
 408     if ( !buf )
 409         return "";
 410
 411     if ( str->m_convertedToChar &&
 412          strlen(buf) == strlen(str->m_convertedToChar) )
 413     {
 414         // keep the same buffer for as long as possible, so that several calls
 415         // to c_str() in a row still work:
 416         strcpy(str->m_convertedToChar, buf);
 417     }
 418     else
 419     {
 420         str->m_convertedToChar = buf.release();
 421     }
 422
 423     // and keep it:
 424     return str->m_convertedToChar + m_offset;
 425 }
 426 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 427
 428 #if !wxUSE_UNICODE_WCHAR
 429 const wchar_t* wxCStrData::AsWChar() const
 430 {
 431     wxString *str = wxConstCast(m_str, wxString);
 432
 433     // convert the string:
 434     wxScopedWCharBuffer buf(str->wc_str());
 435
 436     // notice that here, unlike above in AsChar(), conversion can't fail as our
 437     // internal UTF-8 is always well-formed -- or the string was corrupted and
 438     // all bets are off anyhow
 439
 440     // FIXME-UTF8: do the conversion in-place in the existing buffer
 441     if ( str->m_convertedToWChar &&
 442          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 443     {
 444         // keep the same buffer for as long as possible, so that several calls
 445         // to c_str() in a row still work:
 446         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 447     }
 448     else
 449     {
 450         str->m_convertedToWChar = buf.release();
 451     }
 452
 453     // and keep it:
 454     return str->m_convertedToWChar + m_offset;
 455 }
 456 #endif // !wxUSE_UNICODE_WCHAR
 457
 458 // ===========================================================================
 459 // wxString class core
 460 // ===========================================================================
 461
 462 // ---------------------------------------------------------------------------
 463 // construction and conversion
 464 // ---------------------------------------------------------------------------
 465
 466 #if wxUSE_UNICODE_WCHAR
 467 /* static */
 468 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 469                                                const wxMBConv& conv)
 470 {
 471     // anything to do?
 472     if ( !psz || nLength == 0 )
 473         return SubstrBufFromMB(wxWCharBuffer(L""), 0);
 474
 475     if ( nLength == npos )
 476         nLength = wxNO_LEN;
 477
 478     size_t wcLen;
 479     wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 480     if ( !wcLen )
 481         return SubstrBufFromMB(wxWCharBuffer(L""), 0);
 482     else
 483         return SubstrBufFromMB(wcBuf, wcLen);
 484 }
 485 #endif // wxUSE_UNICODE_WCHAR
 486
 487 #if wxUSE_UNICODE_UTF8
 488 /* static */
 489 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 490                                                const wxMBConv& conv)
 491 {
 492     // anything to do?
 493     if ( !psz || nLength == 0 )
 494         return SubstrBufFromMB(wxCharBuffer(""), 0);
 495
 496     // if psz is already in UTF-8, we don't have to do the roundtrip to
 497     // wchar_t* and back:
 498     if ( conv.IsUTF8() )
 499     {
 500         // we need to validate the input because UTF8 iterators assume valid
 501         // UTF-8 sequence and psz may be invalid:
 502         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 503         {
 504             // we must pass the real string length to SubstrBufFromMB ctor
 505             if ( nLength == npos )
 506                 nLength = psz ? strlen(psz) : 0;
 507             return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz, nLength),
 508                                    nLength);
 509         }
 510         // else: do the roundtrip through wchar_t*
 511     }
 512
 513     if ( nLength == npos )
 514         nLength = wxNO_LEN;
 515
 516     // first convert to wide string:
 517     size_t wcLen;
 518     wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 519     if ( !wcLen )
 520         return SubstrBufFromMB(wxCharBuffer(""), 0);
 521
 522     // and then to UTF-8:
 523     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
 524     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 525     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 526
 527     return buf;
 528 }
 529 #endif // wxUSE_UNICODE_UTF8
 530
 531 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 532 /* static */
 533 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 534                                                const wxMBConv& conv)
 535 {
 536     // anything to do?
 537     if ( !pwz || nLength == 0 )
 538         return SubstrBufFromWC(wxCharBuffer(""), 0);
 539
 540     if ( nLength == npos )
 541         nLength = wxNO_LEN;
 542
 543     size_t mbLen;
 544     wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 545     if ( !mbLen )
 546         return SubstrBufFromWC(wxCharBuffer(""), 0);
 547     else
 548         return SubstrBufFromWC(mbBuf, mbLen);
 549 }
 550 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 551
 552
 553 #if wxUSE_UNICODE_WCHAR
 554
 555 //Convert wxString in Unicode mode to a multi-byte string
 556 const wxScopedCharBuffer wxString::mb_str(const wxMBConv& conv) const
 557 {
 558     // NB: Length passed to cWC2MB() doesn't include terminating NUL, it's
 559     //     added by it automatically. If we passed length()+1 here, it would
 560     //     create a buffer with 2 trailing NULs of length one greater than
 561     //     expected.
 562     return conv.cWC2MB(wx_str(), length(), NULL);
 563 }
 564
 565 #elif wxUSE_UNICODE_UTF8
 566
 567 const wxScopedWCharBuffer wxString::wc_str() const
 568 {
 569     // NB: Length passed to cMB2WC() doesn't include terminating NUL, it's
 570     //     added by it automatically. If we passed length()+1 here, it would
 571     //     create a buffer with 2 trailing NULs of length one greater than
 572     //     expected.
 573     return wxMBConvStrictUTF8().cMB2WC
 574                                 (
 575                                     m_impl.c_str(),
 576                                     m_impl.length(),
 577                                     NULL
 578                                 );
 579 }
 580
 581 const wxScopedCharBuffer wxString::mb_str(const wxMBConv& conv) const
 582 {
 583     if ( conv.IsUTF8() )
 584         return wxScopedCharBuffer::CreateNonOwned(m_impl.c_str(), m_impl.length());
 585
 586     wxScopedWCharBuffer wcBuf(wc_str());
 587     if ( !wcBuf.length() )
 588         return wxCharBuffer("");
 589
 590     return conv.cWC2MB(wcBuf.data(), wcBuf.length(), NULL);
 591 }
 592
 593 #else // ANSI
 594
 595 //Converts this string to a wide character string if unicode
 596 //mode is not enabled and wxUSE_WCHAR_T is enabled
 597 const wxScopedWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 598 {
 599     // NB: Length passed to cMB2WC() doesn't include terminating NUL, it's
 600     //     added by it automatically. If we passed length()+1 here, it would
 601     //     create a buffer with 2 trailing NULs of length one greater than
 602     //     expected.
 603     return conv.cMB2WC(wx_str(), length(), NULL);
 604 }
 605
 606 #endif // Unicode/ANSI
 607
 608 // shrink to minimal size (releasing extra memory)
 609 bool wxString::Shrink()
 610 {
 611   wxString tmp(begin(), end());
 612   swap(tmp);
 613   return tmp.length() == length();
 614 }
 615
 616 // deprecated compatibility code:
 617 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 618 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 619 {
 620     return DoGetWriteBuf(nLen);
 621 }
 622
 623 void wxString::UngetWriteBuf()
 624 {
 625     DoUngetWriteBuf();
 626 }
 627
 628 void wxString::UngetWriteBuf(size_t nLen)
 629 {
 630     DoUngetWriteBuf(nLen);
 631 }
 632 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 633
 634
 635 // ---------------------------------------------------------------------------
 636 // data access
 637 // ---------------------------------------------------------------------------
 638
 639 // all functions are inline in string.h
 640
 641 // ---------------------------------------------------------------------------
 642 // concatenation operators
 643 // ---------------------------------------------------------------------------
 644
 645 /*
 646  * concatenation functions come in 5 flavours:
 647  *  string + string
 648  *  char   + string      and      string + char
 649  *  C str  + string      and      string + C str
 650  */
 651
 652 wxString operator+(const wxString& str1, const wxString& str2)
 653 {
 654 #if !wxUSE_STL_BASED_WXSTRING
 655     wxASSERT( str1.IsValid() );
 656     wxASSERT( str2.IsValid() );
 657 #endif
 658
 659     wxString s = str1;
 660     s += str2;
 661
 662     return s;
 663 }
 664
 665 wxString operator+(const wxString& str, wxUniChar ch)
 666 {
 667 #if !wxUSE_STL_BASED_WXSTRING
 668     wxASSERT( str.IsValid() );
 669 #endif
 670
 671     wxString s = str;
 672     s += ch;
 673
 674     return s;
 675 }
 676
 677 wxString operator+(wxUniChar ch, const wxString& str)
 678 {
 679 #if !wxUSE_STL_BASED_WXSTRING
 680     wxASSERT( str.IsValid() );
 681 #endif
 682
 683     wxString s = ch;
 684     s += str;
 685
 686     return s;
 687 }
 688
 689 wxString operator+(const wxString& str, const char *psz)
 690 {
 691 #if !wxUSE_STL_BASED_WXSTRING
 692     wxASSERT( str.IsValid() );
 693 #endif
 694
 695     wxString s;
 696     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 697         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 698     }
 699     s += str;
 700     s += psz;
 701
 702     return s;
 703 }
 704
 705 wxString operator+(const wxString& str, const wchar_t *pwz)
 706 {
 707 #if !wxUSE_STL_BASED_WXSTRING
 708     wxASSERT( str.IsValid() );
 709 #endif
 710
 711     wxString s;
 712     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 713         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 714     }
 715     s += str;
 716     s += pwz;
 717
 718     return s;
 719 }
 720
 721 wxString operator+(const char *psz, const wxString& str)
 722 {
 723 #if !wxUSE_STL_BASED_WXSTRING
 724     wxASSERT( str.IsValid() );
 725 #endif
 726
 727     wxString s;
 728     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 729         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 730     }
 731     s = psz;
 732     s += str;
 733
 734     return s;
 735 }
 736
 737 wxString operator+(const wchar_t *pwz, const wxString& str)
 738 {
 739 #if !wxUSE_STL_BASED_WXSTRING
 740     wxASSERT( str.IsValid() );
 741 #endif
 742
 743     wxString s;
 744     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 745         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 746     }
 747     s = pwz;
 748     s += str;
 749
 750     return s;
 751 }
 752
 753 // ---------------------------------------------------------------------------
 754 // string comparison
 755 // ---------------------------------------------------------------------------
 756
 757 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 758 {
 759     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 760                                : wxToupper(GetChar(0u)) == wxToupper(c));
 761 }
 762
 763 #ifdef HAVE_STD_STRING_COMPARE
 764
 765 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 766 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 767 //     sort strings in characters code point order by sorting the byte sequence
 768 //     in byte values order (i.e. what strcmp() and memcmp() do).
 769
 770 int wxString::compare(const wxString& str) const
 771 {
 772     return m_impl.compare(str.m_impl);
 773 }
 774
 775 int wxString::compare(size_t nStart, size_t nLen,
 776                       const wxString& str) const
 777 {
 778     size_t pos, len;
 779     PosLenToImpl(nStart, nLen, &pos, &len);
 780     return m_impl.compare(pos, len, str.m_impl);
 781 }
 782
 783 int wxString::compare(size_t nStart, size_t nLen,
 784                       const wxString& str,
 785                       size_t nStart2, size_t nLen2) const
 786 {
 787     size_t pos, len;
 788     PosLenToImpl(nStart, nLen, &pos, &len);
 789
 790     size_t pos2, len2;
 791     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 792
 793     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 794 }
 795
 796 int wxString::compare(const char* sz) const
 797 {
 798     return m_impl.compare(ImplStr(sz));
 799 }
 800
 801 int wxString::compare(const wchar_t* sz) const
 802 {
 803     return m_impl.compare(ImplStr(sz));
 804 }
 805
 806 int wxString::compare(size_t nStart, size_t nLen,
 807                       const char* sz, size_t nCount) const
 808 {
 809     size_t pos, len;
 810     PosLenToImpl(nStart, nLen, &pos, &len);
 811
 812     SubstrBufFromMB str(ImplStr(sz, nCount));
 813
 814     return m_impl.compare(pos, len, str.data, str.len);
 815 }
 816
 817 int wxString::compare(size_t nStart, size_t nLen,
 818                       const wchar_t* sz, size_t nCount) const
 819 {
 820     size_t pos, len;
 821     PosLenToImpl(nStart, nLen, &pos, &len);
 822
 823     SubstrBufFromWC str(ImplStr(sz, nCount));
 824
 825     return m_impl.compare(pos, len, str.data, str.len);
 826 }
 827
 828 #else // !HAVE_STD_STRING_COMPARE
 829
 830 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 831                           const wxStringCharType* s2, size_t l2)
 832 {
 833     if( l1 == l2 )
 834         return wxStringMemcmp(s1, s2, l1);
 835     else if( l1 < l2 )
 836     {
 837         int ret = wxStringMemcmp(s1, s2, l1);
 838         return ret == 0 ? -1 : ret;
 839     }
 840     else
 841     {
 842         int ret = wxStringMemcmp(s1, s2, l2);
 843         return ret == 0 ? +1 : ret;
 844     }
 845 }
 846
 847 int wxString::compare(const wxString& str) const
 848 {
 849     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 850                      str.m_impl.data(), str.m_impl.length());
 851 }
 852
 853 int wxString::compare(size_t nStart, size_t nLen,
 854                       const wxString& str) const
 855 {
 856     wxASSERT(nStart <= length());
 857     size_type strLen = length() - nStart;
 858     nLen = strLen < nLen ? strLen : nLen;
 859
 860     size_t pos, len;
 861     PosLenToImpl(nStart, nLen, &pos, &len);
 862
 863     return ::wxDoCmp(m_impl.data() + pos,  len,
 864                      str.m_impl.data(), str.m_impl.length());
 865 }
 866
 867 int wxString::compare(size_t nStart, size_t nLen,
 868                       const wxString& str,
 869                       size_t nStart2, size_t nLen2) const
 870 {
 871     wxASSERT(nStart <= length());
 872     wxASSERT(nStart2 <= str.length());
 873     size_type strLen  =     length() - nStart,
 874               strLen2 = str.length() - nStart2;
 875     nLen  = strLen  < nLen  ? strLen  : nLen;
 876     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 877
 878     size_t pos, len;
 879     PosLenToImpl(nStart, nLen, &pos, &len);
 880     size_t pos2, len2;
 881     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 882
 883     return ::wxDoCmp(m_impl.data() + pos, len,
 884                      str.m_impl.data() + pos2, len2);
 885 }
 886
 887 int wxString::compare(const char* sz) const
 888 {
 889     SubstrBufFromMB str(ImplStr(sz, npos));
 890     if ( str.len == npos )
 891         str.len = wxStringStrlen(str.data);
 892     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 893 }
 894
 895 int wxString::compare(const wchar_t* sz) const
 896 {
 897     SubstrBufFromWC str(ImplStr(sz, npos));
 898     if ( str.len == npos )
 899         str.len = wxStringStrlen(str.data);
 900     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 901 }
 902
 903 int wxString::compare(size_t nStart, size_t nLen,
 904                       const char* sz, size_t nCount) const
 905 {
 906     wxASSERT(nStart <= length());
 907     size_type strLen = length() - nStart;
 908     nLen = strLen < nLen ? strLen : nLen;
 909
 910     size_t pos, len;
 911     PosLenToImpl(nStart, nLen, &pos, &len);
 912
 913     SubstrBufFromMB str(ImplStr(sz, nCount));
 914     if ( str.len == npos )
 915         str.len = wxStringStrlen(str.data);
 916
 917     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 918 }
 919
 920 int wxString::compare(size_t nStart, size_t nLen,
 921                       const wchar_t* sz, size_t nCount) const
 922 {
 923     wxASSERT(nStart <= length());
 924     size_type strLen = length() - nStart;
 925     nLen = strLen < nLen ? strLen : nLen;
 926
 927     size_t pos, len;
 928     PosLenToImpl(nStart, nLen, &pos, &len);
 929
 930     SubstrBufFromWC str(ImplStr(sz, nCount));
 931     if ( str.len == npos )
 932         str.len = wxStringStrlen(str.data);
 933
 934     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 935 }
 936
 937 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 938
 939
 940 // ---------------------------------------------------------------------------
 941 // find_{first,last}_[not]_of functions
 942 // ---------------------------------------------------------------------------
 943
 944 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 945
 946 // NB: All these functions are implemented  with the argument being wxChar*,
 947 //     i.e. widechar string in any Unicode build, even though native string
 948 //     representation is char* in the UTF-8 build. This is because we couldn't
 949 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 950
 951 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 952 {
 953     return find_first_of(sz, nStart, wxStrlen(sz));
 954 }
 955
 956 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 957 {
 958     return find_first_not_of(sz, nStart, wxStrlen(sz));
 959 }
 960
 961 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 962 {
 963     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 964
 965     size_t idx = nStart;
 966     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 967     {
 968         if ( wxTmemchr(sz, *i, n) )
 969             return idx;
 970     }
 971
 972     return npos;
 973 }
 974
 975 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 976 {
 977     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 978
 979     size_t idx = nStart;
 980     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 981     {
 982         if ( !wxTmemchr(sz, *i, n) )
 983             return idx;
 984     }
 985
 986     return npos;
 987 }
 988
 989
 990 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 991 {
 992     return find_last_of(sz, nStart, wxStrlen(sz));
 993 }
 994
 995 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 996 {
 997     return find_last_not_of(sz, nStart, wxStrlen(sz));
 998 }
 999
1000 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
1001 {
1002     size_t len = length();
1003
1004     if ( nStart == npos )
1005     {
1006         nStart = len - 1;
1007     }
1008     else
1009     {
1010         wxASSERT_MSG( nStart <= len, _T("invalid index") );
1011     }
1012
1013     size_t idx = nStart;
1014     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1015           i != rend(); --idx, ++i )
1016     {
1017         if ( wxTmemchr(sz, *i, n) )
1018             return idx;
1019     }
1020
1021     return npos;
1022 }
1023
1024 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
1025 {
1026     size_t len = length();
1027
1028     if ( nStart == npos )
1029     {
1030         nStart = len - 1;
1031     }
1032     else
1033     {
1034         wxASSERT_MSG( nStart <= len, _T("invalid index") );
1035     }
1036
1037     size_t idx = nStart;
1038     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1039           i != rend(); --idx, ++i )
1040     {
1041         if ( !wxTmemchr(sz, *i, n) )
1042             return idx;
1043     }
1044
1045     return npos;
1046 }
1047
1048 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
1049 {
1050     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
1051
1052     size_t idx = nStart;
1053     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1054     {
1055         if ( *i != ch )
1056             return idx;
1057     }
1058
1059     return npos;
1060 }
1061
1062 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1063 {
1064     size_t len = length();
1065
1066     if ( nStart == npos )
1067     {
1068         nStart = len - 1;
1069     }
1070     else
1071     {
1072         wxASSERT_MSG( nStart <= len, _T("invalid index") );
1073     }
1074
1075     size_t idx = nStart;
1076     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1077           i != rend(); --idx, ++i )
1078     {
1079         if ( *i != ch )
1080             return idx;
1081     }
1082
1083     return npos;
1084 }
1085
1086 // the functions above were implemented for wchar_t* arguments in Unicode
1087 // build and char* in ANSI build; below are implementations for the other
1088 // version:
1089 #if wxUSE_UNICODE
1090     #define wxOtherCharType char
1091     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
1092 #else
1093     #define wxOtherCharType wchar_t
1094     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
1095 #endif
1096
1097 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1098     { return find_first_of(STRCONV(sz), nStart); }
1099
1100 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1101                                size_t n) const
1102     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1103 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1104     { return find_last_of(STRCONV(sz), nStart); }
1105 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1106                               size_t n) const
1107     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1108 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1109     { return find_first_not_of(STRCONV(sz), nStart); }
1110 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1111                                    size_t n) const
1112     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1113 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1114     { return find_last_not_of(STRCONV(sz), nStart); }
1115 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1116                                   size_t n) const
1117     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1118
1119 #undef wxOtherCharType
1120 #undef STRCONV
1121
1122 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1123
1124 // ===========================================================================
1125 // other common string functions
1126 // ===========================================================================
1127
1128 int wxString::CmpNoCase(const wxString& s) const
1129 {
1130 #if defined(__WXMSW__) && !wxUSE_UNICODE_UTF8
1131     // prefer to use CompareString() if available as it's more efficient than
1132     // doing it manual or even using wxStricmp() (see #10375)
1133     switch ( ::CompareString(LOCALE_USER_DEFAULT, NORM_IGNORECASE,
1134                              m_impl.c_str(), m_impl.length(),
1135                              s.m_impl.c_str(), s.m_impl.length()) )
1136     {
1137         case CSTR_LESS_THAN:
1138             return -1;
1139
1140         case CSTR_EQUAL:
1141             return 0;
1142
1143         case CSTR_GREATER_THAN:
1144             return 1;
1145
1146         default:
1147             wxFAIL_MSG( "unexpected CompareString() return value" );
1148             // fall through
1149
1150         case 0:
1151             wxLogLastError("CompareString");
1152             // use generic code below
1153     }
1154 #endif // __WXMSW__ && !wxUSE_UNICODE_UTF8
1155
1156     // do the comparison manually: notice that we can't use wxStricmp() as it
1157     // doesn't handle embedded NULs
1158
1159     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1160     const_iterator i1 = begin();
1161     const_iterator end1 = end();
1162     const_iterator i2 = s.begin();
1163     const_iterator end2 = s.end();
1164
1165     for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1166     {
1167         wxUniChar lower1 = (wxChar)wxTolower(*i1);
1168         wxUniChar lower2 = (wxChar)wxTolower(*i2);
1169         if ( lower1 != lower2 )
1170             return lower1 < lower2 ? -1 : 1;
1171     }
1172
1173     size_t len1 = length();
1174     size_t len2 = s.length();
1175
1176     if ( len1 < len2 )
1177         return -1;
1178     else if ( len1 > len2 )
1179         return 1;
1180     return 0;
1181 }
1182
1183
1184 #if wxUSE_UNICODE
1185
1186 #ifdef __MWERKS__
1187 #ifndef __SCHAR_MAX__
1188 #define __SCHAR_MAX__ 127
1189 #endif
1190 #endif
1191
1192 wxString wxString::FromAscii(const char *ascii, size_t len)
1193 {
1194     if (!ascii || len == 0)
1195        return wxEmptyString;
1196
1197     wxString res;
1198
1199     {
1200         wxStringInternalBuffer buf(res, len);
1201         wxStringCharType *dest = buf;
1202
1203         for ( ; len > 0; --len )
1204         {
1205             unsigned char c = (unsigned char)*ascii++;
1206             wxASSERT_MSG( c < 0x80,
1207                           _T("Non-ASCII value passed to FromAscii().") );
1208
1209             *dest++ = (wchar_t)c;
1210         }
1211     }
1212
1213     return res;
1214 }
1215
1216 wxString wxString::FromAscii(const char *ascii)
1217 {
1218     return FromAscii(ascii, wxStrlen(ascii));
1219 }
1220
1221 wxString wxString::FromAscii(char ascii)
1222 {
1223     // What do we do with '\0' ?
1224
1225     unsigned char c = (unsigned char)ascii;
1226
1227     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1228
1229     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1230     return wxString(wxUniChar((wchar_t)c));
1231 }
1232
1233 const wxScopedCharBuffer wxString::ToAscii() const
1234 {
1235     // this will allocate enough space for the terminating NUL too
1236     wxCharBuffer buffer(length());
1237     char *dest = buffer.data();
1238
1239     for ( const_iterator i = begin(); i != end(); ++i )
1240     {
1241         wxUniChar c(*i);
1242         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1243         *dest++ = c.IsAscii() ? (char)c : '_';
1244
1245         // the output string can't have embedded NULs anyhow, so we can safely
1246         // stop at first of them even if we do have any
1247         if ( !c )
1248             break;
1249     }
1250
1251     return buffer;
1252 }
1253
1254 #endif // wxUSE_UNICODE
1255
1256 // extract string of length nCount starting at nFirst
1257 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1258 {
1259     size_t nLen = length();
1260
1261     // default value of nCount is npos and means "till the end"
1262     if ( nCount == npos )
1263     {
1264         nCount = nLen - nFirst;
1265     }
1266
1267     // out-of-bounds requests return sensible things
1268     if ( nFirst + nCount > nLen )
1269     {
1270         nCount = nLen - nFirst;
1271     }
1272
1273     if ( nFirst > nLen )
1274     {
1275         // AllocCopy() will return empty string
1276         return wxEmptyString;
1277     }
1278
1279     wxString dest(*this, nFirst, nCount);
1280     if ( dest.length() != nCount )
1281     {
1282         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1283     }
1284
1285     return dest;
1286 }
1287
1288 // check that the string starts with prefix and return the rest of the string
1289 // in the provided pointer if it is not NULL, otherwise return false
1290 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1291 {
1292     if ( compare(0, prefix.length(), prefix) != 0 )
1293         return false;
1294
1295     if ( rest )
1296     {
1297         // put the rest of the string into provided pointer
1298         rest->assign(*this, prefix.length(), npos);
1299     }
1300
1301     return true;
1302 }
1303
1304
1305 // check that the string ends with suffix and return the rest of it in the
1306 // provided pointer if it is not NULL, otherwise return false
1307 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1308 {
1309     int start = length() - suffix.length();
1310
1311     if ( start < 0 || compare(start, npos, suffix) != 0 )
1312         return false;
1313
1314     if ( rest )
1315     {
1316         // put the rest of the string into provided pointer
1317         rest->assign(*this, 0, start);
1318     }
1319
1320     return true;
1321 }
1322
1323
1324 // extract nCount last (rightmost) characters
1325 wxString wxString::Right(size_t nCount) const
1326 {
1327   if ( nCount > length() )
1328     nCount = length();
1329
1330   wxString dest(*this, length() - nCount, nCount);
1331   if ( dest.length() != nCount ) {
1332     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1333   }
1334   return dest;
1335 }
1336
1337 // get all characters after the last occurrence of ch
1338 // (returns the whole string if ch not found)
1339 wxString wxString::AfterLast(wxUniChar ch) const
1340 {
1341   wxString str;
1342   int iPos = Find(ch, true);
1343   if ( iPos == wxNOT_FOUND )
1344     str = *this;
1345   else
1346     str.assign(*this, iPos + 1, npos);
1347
1348   return str;
1349 }
1350
1351 // extract nCount first (leftmost) characters
1352 wxString wxString::Left(size_t nCount) const
1353 {
1354   if ( nCount > length() )
1355     nCount = length();
1356
1357   wxString dest(*this, 0, nCount);
1358   if ( dest.length() != nCount ) {
1359     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1360   }
1361   return dest;
1362 }
1363
1364 // get all characters before the first occurrence of ch
1365 // (returns the whole string if ch not found)
1366 wxString wxString::BeforeFirst(wxUniChar ch) const
1367 {
1368   int iPos = Find(ch);
1369   if ( iPos == wxNOT_FOUND )
1370       iPos = length();
1371   return wxString(*this, 0, iPos);
1372 }
1373
1374 /// get all characters before the last occurrence of ch
1375 /// (returns empty string if ch not found)
1376 wxString wxString::BeforeLast(wxUniChar ch) const
1377 {
1378   wxString str;
1379   int iPos = Find(ch, true);
1380   if ( iPos != wxNOT_FOUND && iPos != 0 )
1381     str = wxString(c_str(), iPos);
1382
1383   return str;
1384 }
1385
1386 /// get all characters after the first occurrence of ch
1387 /// (returns empty string if ch not found)
1388 wxString wxString::AfterFirst(wxUniChar ch) const
1389 {
1390   wxString str;
1391   int iPos = Find(ch);
1392   if ( iPos != wxNOT_FOUND )
1393       str.assign(*this, iPos + 1, npos);
1394
1395   return str;
1396 }
1397
1398 // replace first (or all) occurrences of some substring with another one
1399 size_t wxString::Replace(const wxString& strOld,
1400                          const wxString& strNew, bool bReplaceAll)
1401 {
1402     // if we tried to replace an empty string we'd enter an infinite loop below
1403     wxCHECK_MSG( !strOld.empty(), 0,
1404                  _T("wxString::Replace(): invalid parameter") );
1405
1406     wxSTRING_INVALIDATE_CACHE();
1407
1408     size_t uiCount = 0;   // count of replacements made
1409
1410     // optimize the special common case: replacement of one character by
1411     // another one (in UTF-8 case we can only do this for ASCII characters)
1412     //
1413     // benchmarks show that this special version is around 3 times faster
1414     // (depending on the proportion of matching characters and UTF-8/wchar_t
1415     // build)
1416     if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1417     {
1418         const wxStringCharType chOld = strOld.m_impl[0],
1419                                chNew = strNew.m_impl[0];
1420
1421         // this loop is the simplified version of the one below
1422         for ( size_t pos = 0; ; )
1423         {
1424             pos = m_impl.find(chOld, pos);
1425             if ( pos == npos )
1426                 break;
1427
1428             m_impl[pos++] = chNew;
1429
1430             uiCount++;
1431
1432             if ( !bReplaceAll )
1433                 break;
1434         }
1435     }
1436     else if ( !bReplaceAll)
1437     {
1438         size_t pos = m_impl.find(strOld, 0);
1439         if ( pos != npos )
1440         {
1441             m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl);
1442             uiCount = 1;
1443         }
1444     }
1445     else // replace all occurrences
1446     {
1447         const size_t uiOldLen = strOld.m_impl.length();
1448         const size_t uiNewLen = strNew.m_impl.length();
1449
1450         // first scan the string to find all positions at which the replacement
1451         // should be made
1452         wxVector<size_t> replacePositions;
1453
1454         size_t pos;
1455         for ( pos = m_impl.find(strOld.m_impl, 0);
1456               pos != npos;
1457               pos = m_impl.find(strOld.m_impl, pos + uiOldLen))
1458         {
1459             replacePositions.push_back(pos);
1460             ++uiCount;
1461         }
1462
1463         if ( !uiCount )
1464             return 0;
1465
1466         // allocate enough memory for the whole new string
1467         wxString tmp;
1468         tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen));
1469
1470         // copy this string to tmp doing replacements on the fly
1471         size_t replNum = 0;
1472         for ( pos = 0; replNum < uiCount; replNum++ )
1473         {
1474             const size_t nextReplPos = replacePositions[replNum];
1475
1476             if ( pos != nextReplPos )
1477             {
1478                 tmp.m_impl.append(m_impl, pos, nextReplPos - pos);
1479             }
1480
1481             tmp.m_impl.append(strNew.m_impl);
1482             pos = nextReplPos + uiOldLen;
1483         }
1484
1485         if ( pos != m_impl.length() )
1486         {
1487             // append the rest of the string unchanged
1488             tmp.m_impl.append(m_impl, pos, m_impl.length() - pos);
1489         }
1490
1491         swap(tmp);
1492     }
1493
1494     return uiCount;
1495 }
1496
1497 bool wxString::IsAscii() const
1498 {
1499     for ( const_iterator i = begin(); i != end(); ++i )
1500     {
1501         if ( !(*i).IsAscii() )
1502             return false;
1503     }
1504
1505     return true;
1506 }
1507
1508 bool wxString::IsWord() const
1509 {
1510     for ( const_iterator i = begin(); i != end(); ++i )
1511     {
1512         if ( !wxIsalpha(*i) )
1513             return false;
1514     }
1515
1516     return true;
1517 }
1518
1519 bool wxString::IsNumber() const
1520 {
1521     if ( empty() )
1522         return true;
1523
1524     const_iterator i = begin();
1525
1526     if ( *i == _T('-') || *i == _T('+') )
1527         ++i;
1528
1529     for ( ; i != end(); ++i )
1530     {
1531         if ( !wxIsdigit(*i) )
1532             return false;
1533     }
1534
1535     return true;
1536 }
1537
1538 wxString wxString::Strip(stripType w) const
1539 {
1540     wxString s = *this;
1541     if ( w & leading ) s.Trim(false);
1542     if ( w & trailing ) s.Trim(true);
1543     return s;
1544 }
1545
1546 // ---------------------------------------------------------------------------
1547 // case conversion
1548 // ---------------------------------------------------------------------------
1549
1550 wxString& wxString::MakeUpper()
1551 {
1552   for ( iterator it = begin(), en = end(); it != en; ++it )
1553     *it = (wxChar)wxToupper(*it);
1554
1555   return *this;
1556 }
1557
1558 wxString& wxString::MakeLower()
1559 {
1560   for ( iterator it = begin(), en = end(); it != en; ++it )
1561     *it = (wxChar)wxTolower(*it);
1562
1563   return *this;
1564 }
1565
1566 wxString& wxString::MakeCapitalized()
1567 {
1568     const iterator en = end();
1569     iterator it = begin();
1570     if ( it != en )
1571     {
1572         *it = (wxChar)wxToupper(*it);
1573         for ( ++it; it != en; ++it )
1574             *it = (wxChar)wxTolower(*it);
1575     }
1576
1577     return *this;
1578 }
1579
1580 // ---------------------------------------------------------------------------
1581 // trimming and padding
1582 // ---------------------------------------------------------------------------
1583
1584 // some compilers (VC++ 6.0 not to name them) return true for a call to
1585 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1586 // to live with this by checking that the character is a 7 bit one - even if
1587 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1588 // space-like symbols somewhere except in the first 128 chars), it is arguably
1589 // still better than trimming away accented letters
1590 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1591
1592 // trims spaces (in the sense of isspace) from left or right side
1593 wxString& wxString::Trim(bool bFromRight)
1594 {
1595     // first check if we're going to modify the string at all
1596     if ( !empty() &&
1597          (
1598           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1599           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1600          )
1601        )
1602     {
1603         if ( bFromRight )
1604         {
1605             // find last non-space character
1606             reverse_iterator psz = rbegin();
1607             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1608                 ++psz;
1609
1610             // truncate at trailing space start
1611             erase(psz.base(), end());
1612         }
1613         else
1614         {
1615             // find first non-space character
1616             iterator psz = begin();
1617             while ( (psz != end()) && wxSafeIsspace(*psz) )
1618                 ++psz;
1619
1620             // fix up data and length
1621             erase(begin(), psz);
1622         }
1623     }
1624
1625     return *this;
1626 }
1627
1628 // adds nCount characters chPad to the string from either side
1629 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1630 {
1631     wxString s(chPad, nCount);
1632
1633     if ( bFromRight )
1634         *this += s;
1635     else
1636     {
1637         s += *this;
1638         swap(s);
1639     }
1640
1641     return *this;
1642 }
1643
1644 // truncate the string
1645 wxString& wxString::Truncate(size_t uiLen)
1646 {
1647     if ( uiLen < length() )
1648     {
1649         erase(begin() + uiLen, end());
1650     }
1651     //else: nothing to do, string is already short enough
1652
1653     return *this;
1654 }
1655
1656 // ---------------------------------------------------------------------------
1657 // finding (return wxNOT_FOUND if not found and index otherwise)
1658 // ---------------------------------------------------------------------------
1659
1660 // find a character
1661 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1662 {
1663     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1664
1665     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1666 }
1667
1668 // ----------------------------------------------------------------------------
1669 // conversion to numbers
1670 // ----------------------------------------------------------------------------
1671
1672 // The implementation of all the functions below is exactly the same so factor
1673 // it out. Note that number extraction works correctly on UTF-8 strings, so
1674 // we can use wxStringCharType and wx_str() for maximum efficiency.
1675
1676 #ifndef __WXWINCE__
1677     #define DO_IF_NOT_WINCE(x) x
1678 #else
1679     #define DO_IF_NOT_WINCE(x)
1680 #endif
1681
1682 #define WX_STRING_TO_X_TYPE_START                                           \
1683     wxCHECK_MSG( pVal, false, _T("NULL output pointer") );                  \
1684     DO_IF_NOT_WINCE( errno = 0; )                                           \
1685     const wxStringCharType *start = wx_str();                               \
1686     wxStringCharType *end;
1687
1688 #define WX_STRING_TO_X_TYPE_END                                             \
1689     /* return true only if scan was stopped by the terminating NUL and */   \
1690     /* if the string was not empty to start with and no under/overflow */   \
1691     /* occurred: */                                                         \
1692     if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )         \
1693         return false;                                                       \
1694     *pVal = val;                                                            \
1695     return true;
1696
1697 bool wxString::ToLong(long *pVal, int base) const
1698 {
1699     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1700
1701     WX_STRING_TO_X_TYPE_START
1702     long val = wxStrtol(start, &end, base);
1703     WX_STRING_TO_X_TYPE_END
1704 }
1705
1706 bool wxString::ToULong(unsigned long *pVal, int base) const
1707 {
1708     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1709
1710     WX_STRING_TO_X_TYPE_START
1711     unsigned long val = wxStrtoul(start, &end, base);
1712     WX_STRING_TO_X_TYPE_END
1713 }
1714
1715 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1716 {
1717     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1718
1719     WX_STRING_TO_X_TYPE_START
1720     wxLongLong_t val = wxStrtoll(start, &end, base);
1721     WX_STRING_TO_X_TYPE_END
1722 }
1723
1724 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1725 {
1726     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1727
1728     WX_STRING_TO_X_TYPE_START
1729     wxULongLong_t val = wxStrtoull(start, &end, base);
1730     WX_STRING_TO_X_TYPE_END
1731 }
1732
1733 bool wxString::ToDouble(double *pVal) const
1734 {
1735     WX_STRING_TO_X_TYPE_START
1736     double val = wxStrtod(start, &end);
1737     WX_STRING_TO_X_TYPE_END
1738 }
1739
1740 #if wxUSE_XLOCALE
1741
1742 bool wxString::ToCLong(long *pVal, int base) const
1743 {
1744     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1745
1746     WX_STRING_TO_X_TYPE_START
1747 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1748     long val = wxStrtol_lA(start, &end, base, wxCLocale);
1749 #else
1750     long val = wxStrtol_l(start, &end, base, wxCLocale);
1751 #endif
1752     WX_STRING_TO_X_TYPE_END
1753 }
1754
1755 bool wxString::ToCULong(unsigned long *pVal, int base) const
1756 {
1757     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1758
1759     WX_STRING_TO_X_TYPE_START
1760 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1761     unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale);
1762 #else
1763     unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale);
1764 #endif
1765     WX_STRING_TO_X_TYPE_END
1766 }
1767
1768 bool wxString::ToCDouble(double *pVal) const
1769 {
1770     WX_STRING_TO_X_TYPE_START
1771 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1772     double val = wxStrtod_lA(start, &end, wxCLocale);
1773 #else
1774     double val = wxStrtod_l(start, &end, wxCLocale);
1775 #endif
1776     WX_STRING_TO_X_TYPE_END
1777 }
1778
1779 #endif  // wxUSE_XLOCALE
1780
1781 // ---------------------------------------------------------------------------
1782 // formatted output
1783 // ---------------------------------------------------------------------------
1784
1785 #if !wxUSE_UTF8_LOCALE_ONLY
1786 /* static */
1787 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1788 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1789 #else
1790 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1791 #endif
1792 {
1793     va_list argptr;
1794     va_start(argptr, format);
1795
1796     wxString s;
1797     s.PrintfV(format, argptr);
1798
1799     va_end(argptr);
1800
1801     return s;
1802 }
1803 #endif // !wxUSE_UTF8_LOCALE_ONLY
1804
1805 #if wxUSE_UNICODE_UTF8
1806 /* static */
1807 wxString wxString::DoFormatUtf8(const char *format, ...)
1808 {
1809     va_list argptr;
1810     va_start(argptr, format);
1811
1812     wxString s;
1813     s.PrintfV(format, argptr);
1814
1815     va_end(argptr);
1816
1817     return s;
1818 }
1819 #endif // wxUSE_UNICODE_UTF8
1820
1821 /* static */
1822 wxString wxString::FormatV(const wxString& format, va_list argptr)
1823 {
1824     wxString s;
1825     s.PrintfV(format, argptr);
1826     return s;
1827 }
1828
1829 #if !wxUSE_UTF8_LOCALE_ONLY
1830 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1831 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1832 #else
1833 int wxString::DoPrintfWchar(const wxChar *format, ...)
1834 #endif
1835 {
1836     va_list argptr;
1837     va_start(argptr, format);
1838
1839 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1840     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1841     // because it's the only cast that works safely for downcasting when
1842     // multiple inheritance is used:
1843     wxString *str = static_cast<wxString*>(this);
1844 #else
1845     wxString *str = this;
1846 #endif
1847
1848     int iLen = str->PrintfV(format, argptr);
1849
1850     va_end(argptr);
1851
1852     return iLen;
1853 }
1854 #endif // !wxUSE_UTF8_LOCALE_ONLY
1855
1856 #if wxUSE_UNICODE_UTF8
1857 int wxString::DoPrintfUtf8(const char *format, ...)
1858 {
1859     va_list argptr;
1860     va_start(argptr, format);
1861
1862     int iLen = PrintfV(format, argptr);
1863
1864     va_end(argptr);
1865
1866     return iLen;
1867 }
1868 #endif // wxUSE_UNICODE_UTF8
1869
1870 /*
1871     Uses wxVsnprintf and places the result into the this string.
1872
1873     In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1874     it is vswprintf.  Due to a discrepancy between vsnprintf and vswprintf in
1875     the ISO C99 (and thus SUSv3) standard the return value for the case of
1876     an undersized buffer is inconsistent.  For conforming vsnprintf
1877     implementations the function must return the number of characters that
1878     would have been printed had the buffer been large enough.  For conforming
1879     vswprintf implementations the function must return a negative number
1880     and set errno.
1881
1882     What vswprintf sets errno to is undefined but Darwin seems to set it to
1883     EOVERFLOW.  The only expected errno are EILSEQ and EINVAL.  Both of
1884     those are defined in the standard and backed up by several conformance
1885     statements.  Note that ENOMEM mentioned in the manual page does not
1886     apply to swprintf, only wprintf and fwprintf.
1887
1888     Official manual page:
1889     http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1890
1891     Some conformance statements (AIX, Solaris):
1892     http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1893     http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1894
1895     Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1896     EILSEQ and EINVAL are specifically defined to mean the error is other than
1897     an undersized buffer and no other errno are defined we treat those two
1898     as meaning hard errors and everything else gets the old behavior which
1899     is to keep looping and increasing buffer size until the function succeeds.
1900
1901     In practice it's impossible to determine before compilation which behavior
1902     may be used.  The vswprintf function may have vsnprintf-like behavior or
1903     vice-versa.  Behavior detected on one release can theoretically change
1904     with an updated release.  Not to mention that configure testing for it
1905     would require the test to be run on the host system, not the build system
1906     which makes cross compilation difficult. Therefore, we make no assumptions
1907     about behavior and try our best to handle every known case, including the
1908     case where wxVsnprintf returns a negative number and fails to set errno.
1909
1910     There is yet one more non-standard implementation and that is our own.
1911     Fortunately, that can be detected at compile-time.
1912
1913     On top of all that, ISO C99 explicitly defines snprintf to write a null
1914     character to the last position of the specified buffer.  That would be at
1915     at the given buffer size minus 1.  It is supposed to do this even if it
1916     turns out that the buffer is sized too small.
1917
1918     Darwin (tested on 10.5) follows the C99 behavior exactly.
1919
1920     Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1921     errno even when it fails.  However, it only seems to ever fail due
1922     to an undersized buffer.
1923 */
1924 #if wxUSE_UNICODE_UTF8
1925 template<typename BufferType>
1926 #else
1927 // we only need one version in non-UTF8 builds and at least two Windows
1928 // compilers have problems with this function template, so use just one
1929 // normal function here
1930 #endif
1931 static int DoStringPrintfV(wxString& str,
1932                            const wxString& format, va_list argptr)
1933 {
1934     int size = 1024;
1935
1936     for ( ;; )
1937     {
1938 #if wxUSE_UNICODE_UTF8
1939         BufferType tmp(str, size + 1);
1940         typename BufferType::CharType *buf = tmp;
1941 #else
1942         wxStringBuffer tmp(str, size + 1);
1943         wxChar *buf = tmp;
1944 #endif
1945
1946         if ( !buf )
1947         {
1948             // out of memory
1949
1950             // in UTF-8 build, leaving uninitialized junk in the buffer
1951             // could result in invalid non-empty UTF-8 string, so just
1952             // reset the string to empty on failure:
1953             buf[0] = '\0';
1954             return -1;
1955         }
1956
1957         // wxVsnprintf() may modify the original arg pointer, so pass it
1958         // only a copy
1959         va_list argptrcopy;
1960         wxVaCopy(argptrcopy, argptr);
1961
1962 #ifndef __WXWINCE__
1963         // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1964         errno = 0;
1965 #endif
1966         int len = wxVsnprintf(buf, size, format, argptrcopy);
1967         va_end(argptrcopy);
1968
1969         // some implementations of vsnprintf() don't NUL terminate
1970         // the string if there is not enough space for it so
1971         // always do it manually
1972         // FIXME: This really seems to be the wrong and would be an off-by-one
1973         // bug except the code above allocates an extra character.
1974         buf[size] = _T('\0');
1975
1976         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1977         // total number of characters which would have been written if the
1978         // buffer were large enough (newer standards such as Unix98)
1979         if ( len < 0 )
1980         {
1981             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1982             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1983             //     is true if *both* of them use our own implementation,
1984             //     otherwise we can't be sure
1985 #if wxUSE_WXVSNPRINTF
1986             // we know that our own implementation of wxVsnprintf() returns -1
1987             // only for a format error - thus there's something wrong with
1988             // the user's format string
1989             buf[0] = '\0';
1990             return -1;
1991 #else // possibly using system version
1992             // assume it only returns error if there is not enough space, but
1993             // as we don't know how much we need, double the current size of
1994             // the buffer
1995 #ifndef __WXWINCE__
1996             if( (errno == EILSEQ) || (errno == EINVAL) )
1997             // If errno was set to one of the two well-known hard errors
1998             // then fail immediately to avoid an infinite loop.
1999                 return -1;
2000             else
2001 #endif // __WXWINCE__
2002             // still not enough, as we don't know how much we need, double the
2003             // current size of the buffer
2004                 size *= 2;
2005 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
2006         }
2007         else if ( len >= size )
2008         {
2009 #if wxUSE_WXVSNPRINTF
2010             // we know that our own implementation of wxVsnprintf() returns
2011             // size+1 when there's not enough space but that's not the size
2012             // of the required buffer!
2013             size *= 2;      // so we just double the current size of the buffer
2014 #else
2015             // some vsnprintf() implementations NUL-terminate the buffer and
2016             // some don't in len == size case, to be safe always add 1
2017             // FIXME: I don't quite understand this comment.  The vsnprintf
2018             // function is specifically defined to return the number of
2019             // characters printed not including the null terminator.
2020             // So OF COURSE you need to add 1 to get the right buffer size.
2021             // The following line is definitely correct, no question.
2022             size = len + 1;
2023 #endif
2024         }
2025         else // ok, there was enough space
2026         {
2027             break;
2028         }
2029     }
2030
2031     // we could have overshot
2032     str.Shrink();
2033
2034     return str.length();
2035 }
2036
2037 int wxString::PrintfV(const wxString& format, va_list argptr)
2038 {
2039 #if wxUSE_UNICODE_UTF8
2040     #if wxUSE_STL_BASED_WXSTRING
2041         typedef wxStringTypeBuffer<char> Utf8Buffer;
2042     #else
2043         typedef wxStringInternalBuffer Utf8Buffer;
2044     #endif
2045 #endif
2046
2047 #if wxUSE_UTF8_LOCALE_ONLY
2048     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2049 #else
2050     #if wxUSE_UNICODE_UTF8
2051     if ( wxLocaleIsUtf8 )
2052         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2053     else
2054         // wxChar* version
2055         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2056     #else
2057         return DoStringPrintfV(*this, format, argptr);
2058     #endif // UTF8/WCHAR
2059 #endif
2060 }
2061
2062 // ----------------------------------------------------------------------------
2063 // misc other operations
2064 // ----------------------------------------------------------------------------
2065
2066 // returns true if the string matches the pattern which may contain '*' and
2067 // '?' metacharacters (as usual, '?' matches any character and '*' any number
2068 // of them)
2069 bool wxString::Matches(const wxString& mask) const
2070 {
2071     // I disable this code as it doesn't seem to be faster (in fact, it seems
2072     // to be much slower) than the old, hand-written code below and using it
2073     // here requires always linking with libregex even if the user code doesn't
2074     // use it
2075 #if 0 // wxUSE_REGEX
2076     // first translate the shell-like mask into a regex
2077     wxString pattern;
2078     pattern.reserve(wxStrlen(pszMask));
2079
2080     pattern += _T('^');
2081     while ( *pszMask )
2082     {
2083         switch ( *pszMask )
2084         {
2085             case _T('?'):
2086                 pattern += _T('.');
2087                 break;
2088
2089             case _T('*'):
2090                 pattern += _T(".*");
2091                 break;
2092
2093             case _T('^'):
2094             case _T('.'):
2095             case _T('$'):
2096             case _T('('):
2097             case _T(')'):
2098             case _T('|'):
2099             case _T('+'):
2100             case _T('\\'):
2101                 // these characters are special in a RE, quote them
2102                 // (however note that we don't quote '[' and ']' to allow
2103                 // using them for Unix shell like matching)
2104                 pattern += _T('\\');
2105                 // fall through
2106
2107             default:
2108                 pattern += *pszMask;
2109         }
2110
2111         pszMask++;
2112     }
2113     pattern += _T('$');
2114
2115     // and now use it
2116     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2117 #else // !wxUSE_REGEX
2118   // TODO: this is, of course, awfully inefficient...
2119
2120   // FIXME-UTF8: implement using iterators, remove #if
2121 #if wxUSE_UNICODE_UTF8
2122   const wxScopedWCharBuffer maskBuf = mask.wc_str();
2123   const wxScopedWCharBuffer txtBuf = wc_str();
2124   const wxChar *pszMask = maskBuf.data();
2125   const wxChar *pszTxt = txtBuf.data();
2126 #else
2127   const wxChar *pszMask = mask.wx_str();
2128   // the char currently being checked
2129   const wxChar *pszTxt = wx_str();
2130 #endif
2131
2132   // the last location where '*' matched
2133   const wxChar *pszLastStarInText = NULL;
2134   const wxChar *pszLastStarInMask = NULL;
2135
2136 match:
2137   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
2138     switch ( *pszMask ) {
2139       case wxT('?'):
2140         if ( *pszTxt == wxT('\0') )
2141           return false;
2142
2143         // pszTxt and pszMask will be incremented in the loop statement
2144
2145         break;
2146
2147       case wxT('*'):
2148         {
2149           // remember where we started to be able to backtrack later
2150           pszLastStarInText = pszTxt;
2151           pszLastStarInMask = pszMask;
2152
2153           // ignore special chars immediately following this one
2154           // (should this be an error?)
2155           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
2156             pszMask++;
2157
2158           // if there is nothing more, match
2159           if ( *pszMask == wxT('\0') )
2160             return true;
2161
2162           // are there any other metacharacters in the mask?
2163           size_t uiLenMask;
2164           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2165
2166           if ( pEndMask != NULL ) {
2167             // we have to match the string between two metachars
2168             uiLenMask = pEndMask - pszMask;
2169           }
2170           else {
2171             // we have to match the remainder of the string
2172             uiLenMask = wxStrlen(pszMask);
2173           }
2174
2175           wxString strToMatch(pszMask, uiLenMask);
2176           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2177           if ( pMatch == NULL )
2178             return false;
2179
2180           // -1 to compensate "++" in the loop
2181           pszTxt = pMatch + uiLenMask - 1;
2182           pszMask += uiLenMask - 1;
2183         }
2184         break;
2185
2186       default:
2187         if ( *pszMask != *pszTxt )
2188           return false;
2189         break;
2190     }
2191   }
2192
2193   // match only if nothing left
2194   if ( *pszTxt == wxT('\0') )
2195     return true;
2196
2197   // if we failed to match, backtrack if we can
2198   if ( pszLastStarInText ) {
2199     pszTxt = pszLastStarInText + 1;
2200     pszMask = pszLastStarInMask;
2201
2202     pszLastStarInText = NULL;
2203
2204     // don't bother resetting pszLastStarInMask, it's unnecessary
2205
2206     goto match;
2207   }
2208
2209   return false;
2210 #endif // wxUSE_REGEX/!wxUSE_REGEX
2211 }
2212
2213 // Count the number of chars
2214 int wxString::Freq(wxUniChar ch) const
2215 {
2216     int count = 0;
2217     for ( const_iterator i = begin(); i != end(); ++i )
2218     {
2219         if ( *i == ch )
2220             count ++;
2221     }
2222     return count;
2223 }
2224