src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #include "wx/hashmap.h"
  39
  40 // string handling functions used by wxString:
  41 #if wxUSE_UNICODE_UTF8
  42     #define wxStringMemcpy   memcpy
  43     #define wxStringMemcmp   memcmp
  44     #define wxStringMemchr   memchr
  45     #define wxStringStrlen   strlen
  46 #else
  47     #define wxStringMemcpy   wxTmemcpy
  48     #define wxStringMemcmp   wxTmemcmp
  49     #define wxStringMemchr   wxTmemchr
  50     #define wxStringStrlen   wxStrlen
  51 #endif
  52
  53 // ----------------------------------------------------------------------------
  54 // global variables
  55 // ----------------------------------------------------------------------------
  56
  57 namespace wxPrivate
  58 {
  59
  60 static UntypedBufferData s_untypedNullData(NULL);
  61
  62 UntypedBufferData * const untypedNullDataPtr = &s_untypedNullData;
  63
  64 } // namespace wxPrivate
  65
  66 // ---------------------------------------------------------------------------
  67 // static class variables definition
  68 // ---------------------------------------------------------------------------
  69
  70 //According to STL _must_ be a -1 size_t
  71 const size_t wxString::npos = (size_t) -1;
  72
  73 #if wxUSE_STRING_POS_CACHE
  74
  75 #ifdef wxHAS_COMPILER_TLS
  76
  77 wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
  78
  79 #else // !wxHAS_COMPILER_TLS
  80
  81 struct wxStrCacheInitializer
  82 {
  83     wxStrCacheInitializer()
  84     {
  85         // calling this function triggers s_cache initialization in it, and
  86         // from now on it becomes safe to call from multiple threads
  87         wxString::GetCache();
  88     }
  89 };
  90
  91 /*
  92 wxString::Cache& wxString::GetCache()
  93 {
  94     static wxTLS_TYPE(Cache) s_cache;
  95
  96     return wxTLS_VALUE(s_cache);
  97 }
  98 */
  99
 100 static wxStrCacheInitializer gs_stringCacheInit;
 101
 102 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
 103
 104 // gdb seems to be unable to display thread-local variables correctly, at least
 105 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
 106 #ifdef __WXDEBUG__
 107
 108 struct wxStrCacheDumper
 109 {
 110     static void ShowAll()
 111     {
 112         puts("*** wxString cache dump:");
 113         for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
 114         {
 115             const wxString::Cache::Element&
 116                 c = wxString::GetCacheBegin()[n];
 117
 118             printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
 119                    n,
 120                    n == wxString::LastUsedCacheElement() ? " [*]" : "",
 121                    c.str,
 122                    (unsigned long)c.pos,
 123                    (unsigned long)c.impl,
 124                    (long)c.len);
 125         }
 126     }
 127 };
 128
 129 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
 130
 131 #endif // __WXDEBUG__
 132
 133 #ifdef wxPROFILE_STRING_CACHE
 134
 135 wxString::CacheStats wxString::ms_cacheStats;
 136
 137 struct wxStrCacheStatsDumper
 138 {
 139     ~wxStrCacheStatsDumper()
 140     {
 141         const wxString::CacheStats& stats = wxString::ms_cacheStats;
 142
 143         if ( stats.postot )
 144         {
 145             puts("*** wxString cache statistics:");
 146             printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
 147                    stats.postot);
 148             printf("\tHits %u (of which %u not used) or %.2f%%\n",
 149                    stats.poshits,
 150                    stats.mishits,
 151                    100.*float(stats.poshits - stats.mishits)/stats.postot);
 152             printf("\tAverage position requested: %.2f\n",
 153                    float(stats.sumpos) / stats.postot);
 154             printf("\tAverage offset after cached hint: %.2f\n",
 155                    float(stats.sumofs) / stats.postot);
 156         }
 157
 158         if ( stats.lentot )
 159         {
 160             printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
 161                    stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
 162         }
 163     }
 164 };
 165
 166 static wxStrCacheStatsDumper s_showCacheStats;
 167
 168 #endif // wxPROFILE_STRING_CACHE
 169
 170 #endif // wxUSE_STRING_POS_CACHE
 171
 172 // ----------------------------------------------------------------------------
 173 // global functions
 174 // ----------------------------------------------------------------------------
 175
 176 #if wxUSE_STD_IOSTREAM
 177
 178 #include <iostream>
 179
 180 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
 181 {
 182 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
 183     const wxCharBuffer buf(str.AsCharBuf());
 184     if ( !buf )
 185         os.clear(wxSTD ios_base::failbit);
 186     else
 187         os << buf.data();
 188
 189     return os;
 190 #else
 191     return os << str.AsInternal();
 192 #endif
 193 }
 194
 195 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
 196 {
 197     return os << str.c_str();
 198 }
 199
 200 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
 201 {
 202     return os << str.data();
 203 }
 204
 205 #ifndef __BORLANDC__
 206 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
 207 {
 208     return os << str.data();
 209 }
 210 #endif
 211
 212 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 213
 214 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
 215 {
 216     return wos << str.wc_str();
 217 }
 218
 219 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
 220 {
 221     return wos << str.AsWChar();
 222 }
 223
 224 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
 225 {
 226     return wos << str.data();
 227 }
 228
 229 #endif  // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 230
 231 #endif // wxUSE_STD_IOSTREAM
 232
 233 // ===========================================================================
 234 // wxString class core
 235 // ===========================================================================
 236
 237 #if wxUSE_UNICODE_UTF8
 238
 239 void wxString::PosLenToImpl(size_t pos, size_t len,
 240                             size_t *implPos, size_t *implLen) const
 241 {
 242     if ( pos == npos )
 243     {
 244         *implPos = npos;
 245     }
 246     else // have valid start position
 247     {
 248         const const_iterator b = GetIterForNthChar(pos);
 249         *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
 250         if ( len == npos )
 251         {
 252             *implLen = npos;
 253         }
 254         else // have valid length too
 255         {
 256             // we need to handle the case of length specifying a substring
 257             // going beyond the end of the string, just as std::string does
 258             const const_iterator e(end());
 259             const_iterator i(b);
 260             while ( len && i <= e )
 261             {
 262                 ++i;
 263                 --len;
 264             }
 265
 266             *implLen = i.impl() - b.impl();
 267         }
 268     }
 269 }
 270
 271 #endif // wxUSE_UNICODE_UTF8
 272
 273 // ----------------------------------------------------------------------------
 274 // wxCStrData converted strings caching
 275 // ----------------------------------------------------------------------------
 276
 277 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 278 //             string objects; re-enable after fixing this bug and benchmarking
 279 //             performance to see if using a hash is a good idea at all
 280 #if 0
 281
 282 // For backward compatibility reasons, it must be possible to assign the value
 283 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 284 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 285 // because the memory would be freed immediately, but it has to be valid as long
 286 // as the string is not modified, so that code like this still works:
 287 //
 288 // const wxChar *s = str.c_str();
 289 // while ( s ) { ... }
 290
 291 // FIXME-UTF8: not thread safe!
 292 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 293 //             destroyed, but we should do it when the string is modified, to
 294 //             keep memory usage down
 295 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 296 //             invalidated the cache on every change, we could keep the previous
 297 //             conversion
 298 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 299 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 300
 301 template<typename T>
 302 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 303 {
 304     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 305     if ( i != hash.end() )
 306     {
 307         free(i->second);
 308         hash.erase(i);
 309     }
 310 }
 311
 312 #if wxUSE_UNICODE
 313 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 314 //     so we have to use wxString* here and const-cast when used
 315 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 316                     wxStringCharConversionCache);
 317 static wxStringCharConversionCache gs_stringsCharCache;
 318
 319 const char* wxCStrData::AsChar() const
 320 {
 321     // remove previously cache value, if any (see FIXMEs above):
 322     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 323
 324     // convert the string and keep it:
 325     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 326         m_str->mb_str().release();
 327
 328     return s + m_offset;
 329 }
 330 #endif // wxUSE_UNICODE
 331
 332 #if !wxUSE_UNICODE_WCHAR
 333 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 334                     wxStringWCharConversionCache);
 335 static wxStringWCharConversionCache gs_stringsWCharCache;
 336
 337 const wchar_t* wxCStrData::AsWChar() const
 338 {
 339     // remove previously cache value, if any (see FIXMEs above):
 340     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 341
 342     // convert the string and keep it:
 343     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 344         m_str->wc_str().release();
 345
 346     return s + m_offset;
 347 }
 348 #endif // !wxUSE_UNICODE_WCHAR
 349
 350 wxString::~wxString()
 351 {
 352 #if wxUSE_UNICODE
 353     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 354     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 355 #endif
 356 #if !wxUSE_UNICODE_WCHAR
 357     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 358 #endif
 359 }
 360 #endif
 361
 362 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 363 const char* wxCStrData::AsChar() const
 364 {
 365 #if wxUSE_UNICODE_UTF8
 366     if ( wxLocaleIsUtf8 )
 367         return AsInternal();
 368 #endif
 369     // under non-UTF8 locales, we have to convert the internal UTF-8
 370     // representation using wxConvLibc and cache the result
 371
 372     wxString *str = wxConstCast(m_str, wxString);
 373
 374     // convert the string:
 375     //
 376     // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
 377     //             have it) but it's unfortunately not obvious to implement
 378     //             because we don't know how big buffer do we need for the
 379     //             given string length (in case of multibyte encodings, e.g.
 380     //             ISO-2022-JP or UTF-8 when internal representation is wchar_t)
 381     //
 382     //             One idea would be to store more than just m_convertedToChar
 383     //             in wxString: then we could record the length of the string
 384     //             which was converted the last time and try to reuse the same
 385     //             buffer if the current length is not greater than it (this
 386     //             could still fail because string could have been modified in
 387     //             place but it would work most of the time, so we'd do it and
 388     //             only allocate the new buffer if in-place conversion returned
 389     //             an error). We could also store a bit saying if the string
 390     //             was modified since the last conversion (and update it in all
 391     //             operation modifying the string, of course) to avoid unneeded
 392     //             consequential conversions. But both of these ideas require
 393     //             adding more fields to wxString and require profiling results
 394     //             to be sure that we really gain enough from them to justify
 395     //             doing it.
 396     wxCharBuffer buf(str->mb_str());
 397
 398     // if it failed, return empty string and not NULL to avoid crashes in code
 399     // written with either wxWidgets 2 wxString or std::string behaviour in
 400     // mind: neither of them ever returns NULL and so we shouldn't neither
 401     if ( !buf )
 402         return "";
 403
 404     if ( str->m_convertedToChar &&
 405          strlen(buf) == strlen(str->m_convertedToChar) )
 406     {
 407         // keep the same buffer for as long as possible, so that several calls
 408         // to c_str() in a row still work:
 409         strcpy(str->m_convertedToChar, buf);
 410     }
 411     else
 412     {
 413         str->m_convertedToChar = buf.release();
 414     }
 415
 416     // and keep it:
 417     return str->m_convertedToChar + m_offset;
 418 }
 419 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 420
 421 #if !wxUSE_UNICODE_WCHAR
 422 const wchar_t* wxCStrData::AsWChar() const
 423 {
 424     wxString *str = wxConstCast(m_str, wxString);
 425
 426     // convert the string:
 427     wxWCharBuffer buf(str->wc_str());
 428
 429     // notice that here, unlike above in AsChar(), conversion can't fail as our
 430     // internal UTF-8 is always well-formed -- or the string was corrupted and
 431     // all bets are off anyhow
 432
 433     // FIXME-UTF8: do the conversion in-place in the existing buffer
 434     if ( str->m_convertedToWChar &&
 435          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 436     {
 437         // keep the same buffer for as long as possible, so that several calls
 438         // to c_str() in a row still work:
 439         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 440     }
 441     else
 442     {
 443         str->m_convertedToWChar = buf.release();
 444     }
 445
 446     // and keep it:
 447     return str->m_convertedToWChar + m_offset;
 448 }
 449 #endif // !wxUSE_UNICODE_WCHAR
 450
 451 // ===========================================================================
 452 // wxString class core
 453 // ===========================================================================
 454
 455 // ---------------------------------------------------------------------------
 456 // construction and conversion
 457 // ---------------------------------------------------------------------------
 458
 459 #if wxUSE_UNICODE_WCHAR
 460 /* static */
 461 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 462                                                const wxMBConv& conv)
 463 {
 464     // anything to do?
 465     if ( !psz || nLength == 0 )
 466         return SubstrBufFromMB(L"", 0);
 467
 468     if ( nLength == npos )
 469         nLength = wxNO_LEN;
 470
 471     size_t wcLen;
 472     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 473     if ( !wcLen )
 474         return SubstrBufFromMB(_T(""), 0);
 475     else
 476         return SubstrBufFromMB(wcBuf, wcLen);
 477 }
 478 #endif // wxUSE_UNICODE_WCHAR
 479
 480 #if wxUSE_UNICODE_UTF8
 481 /* static */
 482 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 483                                                const wxMBConv& conv)
 484 {
 485     // anything to do?
 486     if ( !psz || nLength == 0 )
 487         return SubstrBufFromMB("", 0);
 488
 489     // if psz is already in UTF-8, we don't have to do the roundtrip to
 490     // wchar_t* and back:
 491     if ( conv.IsUTF8() )
 492     {
 493         // we need to validate the input because UTF8 iterators assume valid
 494         // UTF-8 sequence and psz may be invalid:
 495         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 496         {
 497             // we must pass the real string length to SubstrBufFromMB ctor
 498             if ( nLength == npos )
 499                 nLength = psz ? strlen(psz) : 0;
 500             return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
 501         }
 502         // else: do the roundtrip through wchar_t*
 503     }
 504
 505     if ( nLength == npos )
 506         nLength = wxNO_LEN;
 507
 508     // first convert to wide string:
 509     size_t wcLen;
 510     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 511     if ( !wcLen )
 512         return SubstrBufFromMB("", 0);
 513
 514     // and then to UTF-8:
 515     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
 516     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 517     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 518
 519     return buf;
 520 }
 521 #endif // wxUSE_UNICODE_UTF8
 522
 523 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 524 /* static */
 525 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 526                                                const wxMBConv& conv)
 527 {
 528     // anything to do?
 529     if ( !pwz || nLength == 0 )
 530         return SubstrBufFromWC("", 0);
 531
 532     if ( nLength == npos )
 533         nLength = wxNO_LEN;
 534
 535     size_t mbLen;
 536     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 537     if ( !mbLen )
 538         return SubstrBufFromWC("", 0);
 539     else
 540         return SubstrBufFromWC(mbBuf, mbLen);
 541 }
 542 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 543
 544
 545 #if wxUSE_UNICODE_WCHAR
 546
 547 //Convert wxString in Unicode mode to a multi-byte string
 548 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 549 {
 550     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 551 }
 552
 553 #elif wxUSE_UNICODE_UTF8
 554
 555 const wxWCharBuffer wxString::wc_str() const
 556 {
 557     return wxMBConvStrictUTF8().cMB2WC
 558                                 (
 559                                     m_impl.c_str(),
 560                                     m_impl.length() + 1, // size, not length
 561                                     NULL
 562                                 );
 563 }
 564
 565 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 566 {
 567     if ( conv.IsUTF8() )
 568         return wxCharBuffer::CreateNonOwned(m_impl.c_str());
 569
 570     // FIXME-UTF8: use wc_str() here once we have buffers with length
 571
 572     size_t wcLen;
 573     wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
 574                                              (
 575                                                 m_impl.c_str(),
 576                                                 m_impl.length() + 1, // size
 577                                                 &wcLen
 578                                              ));
 579     if ( !wcLen )
 580         return wxCharBuffer("");
 581
 582     return conv.cWC2MB(wcBuf, wcLen+1, NULL);
 583 }
 584
 585 #else // ANSI
 586
 587 //Converts this string to a wide character string if unicode
 588 //mode is not enabled and wxUSE_WCHAR_T is enabled
 589 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 590 {
 591     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 592 }
 593
 594 #endif // Unicode/ANSI
 595
 596 // shrink to minimal size (releasing extra memory)
 597 bool wxString::Shrink()
 598 {
 599   wxString tmp(begin(), end());
 600   swap(tmp);
 601   return tmp.length() == length();
 602 }
 603
 604 // deprecated compatibility code:
 605 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 606 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 607 {
 608     return DoGetWriteBuf(nLen);
 609 }
 610
 611 void wxString::UngetWriteBuf()
 612 {
 613     DoUngetWriteBuf();
 614 }
 615
 616 void wxString::UngetWriteBuf(size_t nLen)
 617 {
 618     DoUngetWriteBuf(nLen);
 619 }
 620 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 621
 622
 623 // ---------------------------------------------------------------------------
 624 // data access
 625 // ---------------------------------------------------------------------------
 626
 627 // all functions are inline in string.h
 628
 629 // ---------------------------------------------------------------------------
 630 // concatenation operators
 631 // ---------------------------------------------------------------------------
 632
 633 /*
 634  * concatenation functions come in 5 flavours:
 635  *  string + string
 636  *  char   + string      and      string + char
 637  *  C str  + string      and      string + C str
 638  */
 639
 640 wxString operator+(const wxString& str1, const wxString& str2)
 641 {
 642 #if !wxUSE_STL_BASED_WXSTRING
 643     wxASSERT( str1.IsValid() );
 644     wxASSERT( str2.IsValid() );
 645 #endif
 646
 647     wxString s = str1;
 648     s += str2;
 649
 650     return s;
 651 }
 652
 653 wxString operator+(const wxString& str, wxUniChar ch)
 654 {
 655 #if !wxUSE_STL_BASED_WXSTRING
 656     wxASSERT( str.IsValid() );
 657 #endif
 658
 659     wxString s = str;
 660     s += ch;
 661
 662     return s;
 663 }
 664
 665 wxString operator+(wxUniChar ch, const wxString& str)
 666 {
 667 #if !wxUSE_STL_BASED_WXSTRING
 668     wxASSERT( str.IsValid() );
 669 #endif
 670
 671     wxString s = ch;
 672     s += str;
 673
 674     return s;
 675 }
 676
 677 wxString operator+(const wxString& str, const char *psz)
 678 {
 679 #if !wxUSE_STL_BASED_WXSTRING
 680     wxASSERT( str.IsValid() );
 681 #endif
 682
 683     wxString s;
 684     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 685         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 686     }
 687     s += str;
 688     s += psz;
 689
 690     return s;
 691 }
 692
 693 wxString operator+(const wxString& str, const wchar_t *pwz)
 694 {
 695 #if !wxUSE_STL_BASED_WXSTRING
 696     wxASSERT( str.IsValid() );
 697 #endif
 698
 699     wxString s;
 700     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 701         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 702     }
 703     s += str;
 704     s += pwz;
 705
 706     return s;
 707 }
 708
 709 wxString operator+(const char *psz, const wxString& str)
 710 {
 711 #if !wxUSE_STL_BASED_WXSTRING
 712     wxASSERT( str.IsValid() );
 713 #endif
 714
 715     wxString s;
 716     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 717         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 718     }
 719     s = psz;
 720     s += str;
 721
 722     return s;
 723 }
 724
 725 wxString operator+(const wchar_t *pwz, const wxString& str)
 726 {
 727 #if !wxUSE_STL_BASED_WXSTRING
 728     wxASSERT( str.IsValid() );
 729 #endif
 730
 731     wxString s;
 732     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 733         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 734     }
 735     s = pwz;
 736     s += str;
 737
 738     return s;
 739 }
 740
 741 // ---------------------------------------------------------------------------
 742 // string comparison
 743 // ---------------------------------------------------------------------------
 744
 745 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 746 {
 747     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 748                                : wxToupper(GetChar(0u)) == wxToupper(c));
 749 }
 750
 751 #ifdef HAVE_STD_STRING_COMPARE
 752
 753 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 754 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 755 //     sort strings in characters code point order by sorting the byte sequence
 756 //     in byte values order (i.e. what strcmp() and memcmp() do).
 757
 758 int wxString::compare(const wxString& str) const
 759 {
 760     return m_impl.compare(str.m_impl);
 761 }
 762
 763 int wxString::compare(size_t nStart, size_t nLen,
 764                       const wxString& str) const
 765 {
 766     size_t pos, len;
 767     PosLenToImpl(nStart, nLen, &pos, &len);
 768     return m_impl.compare(pos, len, str.m_impl);
 769 }
 770
 771 int wxString::compare(size_t nStart, size_t nLen,
 772                       const wxString& str,
 773                       size_t nStart2, size_t nLen2) const
 774 {
 775     size_t pos, len;
 776     PosLenToImpl(nStart, nLen, &pos, &len);
 777
 778     size_t pos2, len2;
 779     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 780
 781     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 782 }
 783
 784 int wxString::compare(const char* sz) const
 785 {
 786     return m_impl.compare(ImplStr(sz));
 787 }
 788
 789 int wxString::compare(const wchar_t* sz) const
 790 {
 791     return m_impl.compare(ImplStr(sz));
 792 }
 793
 794 int wxString::compare(size_t nStart, size_t nLen,
 795                       const char* sz, size_t nCount) const
 796 {
 797     size_t pos, len;
 798     PosLenToImpl(nStart, nLen, &pos, &len);
 799
 800     SubstrBufFromMB str(ImplStr(sz, nCount));
 801
 802     return m_impl.compare(pos, len, str.data, str.len);
 803 }
 804
 805 int wxString::compare(size_t nStart, size_t nLen,
 806                       const wchar_t* sz, size_t nCount) const
 807 {
 808     size_t pos, len;
 809     PosLenToImpl(nStart, nLen, &pos, &len);
 810
 811     SubstrBufFromWC str(ImplStr(sz, nCount));
 812
 813     return m_impl.compare(pos, len, str.data, str.len);
 814 }
 815
 816 #else // !HAVE_STD_STRING_COMPARE
 817
 818 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 819                           const wxStringCharType* s2, size_t l2)
 820 {
 821     if( l1 == l2 )
 822         return wxStringMemcmp(s1, s2, l1);
 823     else if( l1 < l2 )
 824     {
 825         int ret = wxStringMemcmp(s1, s2, l1);
 826         return ret == 0 ? -1 : ret;
 827     }
 828     else
 829     {
 830         int ret = wxStringMemcmp(s1, s2, l2);
 831         return ret == 0 ? +1 : ret;
 832     }
 833 }
 834
 835 int wxString::compare(const wxString& str) const
 836 {
 837     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 838                      str.m_impl.data(), str.m_impl.length());
 839 }
 840
 841 int wxString::compare(size_t nStart, size_t nLen,
 842                       const wxString& str) const
 843 {
 844     wxASSERT(nStart <= length());
 845     size_type strLen = length() - nStart;
 846     nLen = strLen < nLen ? strLen : nLen;
 847
 848     size_t pos, len;
 849     PosLenToImpl(nStart, nLen, &pos, &len);
 850
 851     return ::wxDoCmp(m_impl.data() + pos,  len,
 852                      str.m_impl.data(), str.m_impl.length());
 853 }
 854
 855 int wxString::compare(size_t nStart, size_t nLen,
 856                       const wxString& str,
 857                       size_t nStart2, size_t nLen2) const
 858 {
 859     wxASSERT(nStart <= length());
 860     wxASSERT(nStart2 <= str.length());
 861     size_type strLen  =     length() - nStart,
 862               strLen2 = str.length() - nStart2;
 863     nLen  = strLen  < nLen  ? strLen  : nLen;
 864     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 865
 866     size_t pos, len;
 867     PosLenToImpl(nStart, nLen, &pos, &len);
 868     size_t pos2, len2;
 869     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 870
 871     return ::wxDoCmp(m_impl.data() + pos, len,
 872                      str.m_impl.data() + pos2, len2);
 873 }
 874
 875 int wxString::compare(const char* sz) const
 876 {
 877     SubstrBufFromMB str(ImplStr(sz, npos));
 878     if ( str.len == npos )
 879         str.len = wxStringStrlen(str.data);
 880     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 881 }
 882
 883 int wxString::compare(const wchar_t* sz) const
 884 {
 885     SubstrBufFromWC str(ImplStr(sz, npos));
 886     if ( str.len == npos )
 887         str.len = wxStringStrlen(str.data);
 888     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 889 }
 890
 891 int wxString::compare(size_t nStart, size_t nLen,
 892                       const char* sz, size_t nCount) const
 893 {
 894     wxASSERT(nStart <= length());
 895     size_type strLen = length() - nStart;
 896     nLen = strLen < nLen ? strLen : nLen;
 897
 898     size_t pos, len;
 899     PosLenToImpl(nStart, nLen, &pos, &len);
 900
 901     SubstrBufFromMB str(ImplStr(sz, nCount));
 902     if ( str.len == npos )
 903         str.len = wxStringStrlen(str.data);
 904
 905     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 906 }
 907
 908 int wxString::compare(size_t nStart, size_t nLen,
 909                       const wchar_t* sz, size_t nCount) const
 910 {
 911     wxASSERT(nStart <= length());
 912     size_type strLen = length() - nStart;
 913     nLen = strLen < nLen ? strLen : nLen;
 914
 915     size_t pos, len;
 916     PosLenToImpl(nStart, nLen, &pos, &len);
 917
 918     SubstrBufFromWC str(ImplStr(sz, nCount));
 919     if ( str.len == npos )
 920         str.len = wxStringStrlen(str.data);
 921
 922     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 923 }
 924
 925 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 926
 927
 928 // ---------------------------------------------------------------------------
 929 // find_{first,last}_[not]_of functions
 930 // ---------------------------------------------------------------------------
 931
 932 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 933
 934 // NB: All these functions are implemented  with the argument being wxChar*,
 935 //     i.e. widechar string in any Unicode build, even though native string
 936 //     representation is char* in the UTF-8 build. This is because we couldn't
 937 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 938
 939 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 940 {
 941     return find_first_of(sz, nStart, wxStrlen(sz));
 942 }
 943
 944 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 945 {
 946     return find_first_not_of(sz, nStart, wxStrlen(sz));
 947 }
 948
 949 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 950 {
 951     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 952
 953     size_t idx = nStart;
 954     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 955     {
 956         if ( wxTmemchr(sz, *i, n) )
 957             return idx;
 958     }
 959
 960     return npos;
 961 }
 962
 963 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 964 {
 965     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 966
 967     size_t idx = nStart;
 968     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 969     {
 970         if ( !wxTmemchr(sz, *i, n) )
 971             return idx;
 972     }
 973
 974     return npos;
 975 }
 976
 977
 978 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 979 {
 980     return find_last_of(sz, nStart, wxStrlen(sz));
 981 }
 982
 983 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 984 {
 985     return find_last_not_of(sz, nStart, wxStrlen(sz));
 986 }
 987
 988 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 989 {
 990     size_t len = length();
 991
 992     if ( nStart == npos )
 993     {
 994         nStart = len - 1;
 995     }
 996     else
 997     {
 998         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 999     }
1000
1001     size_t idx = nStart;
1002     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1003           i != rend(); --idx, ++i )
1004     {
1005         if ( wxTmemchr(sz, *i, n) )
1006             return idx;
1007     }
1008
1009     return npos;
1010 }
1011
1012 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
1013 {
1014     size_t len = length();
1015
1016     if ( nStart == npos )
1017     {
1018         nStart = len - 1;
1019     }
1020     else
1021     {
1022         wxASSERT_MSG( nStart <= len, _T("invalid index") );
1023     }
1024
1025     size_t idx = nStart;
1026     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1027           i != rend(); --idx, ++i )
1028     {
1029         if ( !wxTmemchr(sz, *i, n) )
1030             return idx;
1031     }
1032
1033     return npos;
1034 }
1035
1036 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
1037 {
1038     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
1039
1040     size_t idx = nStart;
1041     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1042     {
1043         if ( *i != ch )
1044             return idx;
1045     }
1046
1047     return npos;
1048 }
1049
1050 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1051 {
1052     size_t len = length();
1053
1054     if ( nStart == npos )
1055     {
1056         nStart = len - 1;
1057     }
1058     else
1059     {
1060         wxASSERT_MSG( nStart <= len, _T("invalid index") );
1061     }
1062
1063     size_t idx = nStart;
1064     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1065           i != rend(); --idx, ++i )
1066     {
1067         if ( *i != ch )
1068             return idx;
1069     }
1070
1071     return npos;
1072 }
1073
1074 // the functions above were implemented for wchar_t* arguments in Unicode
1075 // build and char* in ANSI build; below are implementations for the other
1076 // version:
1077 #if wxUSE_UNICODE
1078     #define wxOtherCharType char
1079     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
1080 #else
1081     #define wxOtherCharType wchar_t
1082     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
1083 #endif
1084
1085 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1086     { return find_first_of(STRCONV(sz), nStart); }
1087
1088 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1089                                size_t n) const
1090     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1091 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1092     { return find_last_of(STRCONV(sz), nStart); }
1093 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1094                               size_t n) const
1095     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1096 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1097     { return find_first_not_of(STRCONV(sz), nStart); }
1098 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1099                                    size_t n) const
1100     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1101 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1102     { return find_last_not_of(STRCONV(sz), nStart); }
1103 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1104                                   size_t n) const
1105     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1106
1107 #undef wxOtherCharType
1108 #undef STRCONV
1109
1110 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1111
1112 // ===========================================================================
1113 // other common string functions
1114 // ===========================================================================
1115
1116 int wxString::CmpNoCase(const wxString& s) const
1117 {
1118 #if wxUSE_UNICODE_UTF8
1119     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1120
1121     const_iterator i1 = begin();
1122     const_iterator end1 = end();
1123     const_iterator i2 = s.begin();
1124     const_iterator end2 = s.end();
1125
1126     for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1127     {
1128         wxUniChar lower1 = (wxChar)wxTolower(*i1);
1129         wxUniChar lower2 = (wxChar)wxTolower(*i2);
1130         if ( lower1 != lower2 )
1131             return lower1 < lower2 ? -1 : 1;
1132     }
1133
1134     size_t len1 = length();
1135     size_t len2 = s.length();
1136
1137     if ( len1 < len2 )
1138         return -1;
1139     else if ( len1 > len2 )
1140         return 1;
1141     return 0;
1142 #else // wxUSE_UNICODE_WCHAR or ANSI
1143     return wxStricmp(m_impl.c_str(), s.m_impl.c_str());
1144 #endif
1145 }
1146
1147
1148 #if wxUSE_UNICODE
1149
1150 #ifdef __MWERKS__
1151 #ifndef __SCHAR_MAX__
1152 #define __SCHAR_MAX__ 127
1153 #endif
1154 #endif
1155
1156 wxString wxString::FromAscii(const char *ascii, size_t len)
1157 {
1158     if (!ascii || len == 0)
1159        return wxEmptyString;
1160
1161     wxString res;
1162
1163     {
1164         wxStringInternalBuffer buf(res, len);
1165         wxStringCharType *dest = buf;
1166
1167         for ( ; len > 0; --len )
1168         {
1169             unsigned char c = (unsigned char)*ascii++;
1170             wxASSERT_MSG( c < 0x80,
1171                           _T("Non-ASCII value passed to FromAscii().") );
1172
1173             *dest++ = (wchar_t)c;
1174         }
1175     }
1176
1177     return res;
1178 }
1179
1180 wxString wxString::FromAscii(const char *ascii)
1181 {
1182     return FromAscii(ascii, wxStrlen(ascii));
1183 }
1184
1185 wxString wxString::FromAscii(char ascii)
1186 {
1187     // What do we do with '\0' ?
1188
1189     unsigned char c = (unsigned char)ascii;
1190
1191     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1192
1193     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1194     return wxString(wxUniChar((wchar_t)c));
1195 }
1196
1197 const wxCharBuffer wxString::ToAscii() const
1198 {
1199     // this will allocate enough space for the terminating NUL too
1200     wxCharBuffer buffer(length());
1201     char *dest = buffer.data();
1202
1203     for ( const_iterator i = begin(); i != end(); ++i )
1204     {
1205         wxUniChar c(*i);
1206         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1207         *dest++ = c.IsAscii() ? (char)c : '_';
1208
1209         // the output string can't have embedded NULs anyhow, so we can safely
1210         // stop at first of them even if we do have any
1211         if ( !c )
1212             break;
1213     }
1214
1215     return buffer;
1216 }
1217
1218 #endif // wxUSE_UNICODE
1219
1220 // extract string of length nCount starting at nFirst
1221 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1222 {
1223     size_t nLen = length();
1224
1225     // default value of nCount is npos and means "till the end"
1226     if ( nCount == npos )
1227     {
1228         nCount = nLen - nFirst;
1229     }
1230
1231     // out-of-bounds requests return sensible things
1232     if ( nFirst + nCount > nLen )
1233     {
1234         nCount = nLen - nFirst;
1235     }
1236
1237     if ( nFirst > nLen )
1238     {
1239         // AllocCopy() will return empty string
1240         return wxEmptyString;
1241     }
1242
1243     wxString dest(*this, nFirst, nCount);
1244     if ( dest.length() != nCount )
1245     {
1246         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1247     }
1248
1249     return dest;
1250 }
1251
1252 // check that the string starts with prefix and return the rest of the string
1253 // in the provided pointer if it is not NULL, otherwise return false
1254 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1255 {
1256     if ( compare(0, prefix.length(), prefix) != 0 )
1257         return false;
1258
1259     if ( rest )
1260     {
1261         // put the rest of the string into provided pointer
1262         rest->assign(*this, prefix.length(), npos);
1263     }
1264
1265     return true;
1266 }
1267
1268
1269 // check that the string ends with suffix and return the rest of it in the
1270 // provided pointer if it is not NULL, otherwise return false
1271 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1272 {
1273     int start = length() - suffix.length();
1274
1275     if ( start < 0 || compare(start, npos, suffix) != 0 )
1276         return false;
1277
1278     if ( rest )
1279     {
1280         // put the rest of the string into provided pointer
1281         rest->assign(*this, 0, start);
1282     }
1283
1284     return true;
1285 }
1286
1287
1288 // extract nCount last (rightmost) characters
1289 wxString wxString::Right(size_t nCount) const
1290 {
1291   if ( nCount > length() )
1292     nCount = length();
1293
1294   wxString dest(*this, length() - nCount, nCount);
1295   if ( dest.length() != nCount ) {
1296     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1297   }
1298   return dest;
1299 }
1300
1301 // get all characters after the last occurrence of ch
1302 // (returns the whole string if ch not found)
1303 wxString wxString::AfterLast(wxUniChar ch) const
1304 {
1305   wxString str;
1306   int iPos = Find(ch, true);
1307   if ( iPos == wxNOT_FOUND )
1308     str = *this;
1309   else
1310     str.assign(*this, iPos + 1, npos);
1311
1312   return str;
1313 }
1314
1315 // extract nCount first (leftmost) characters
1316 wxString wxString::Left(size_t nCount) const
1317 {
1318   if ( nCount > length() )
1319     nCount = length();
1320
1321   wxString dest(*this, 0, nCount);
1322   if ( dest.length() != nCount ) {
1323     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1324   }
1325   return dest;
1326 }
1327
1328 // get all characters before the first occurrence of ch
1329 // (returns the whole string if ch not found)
1330 wxString wxString::BeforeFirst(wxUniChar ch) const
1331 {
1332   int iPos = Find(ch);
1333   if ( iPos == wxNOT_FOUND )
1334       iPos = length();
1335   return wxString(*this, 0, iPos);
1336 }
1337
1338 /// get all characters before the last occurrence of ch
1339 /// (returns empty string if ch not found)
1340 wxString wxString::BeforeLast(wxUniChar ch) const
1341 {
1342   wxString str;
1343   int iPos = Find(ch, true);
1344   if ( iPos != wxNOT_FOUND && iPos != 0 )
1345     str = wxString(c_str(), iPos);
1346
1347   return str;
1348 }
1349
1350 /// get all characters after the first occurrence of ch
1351 /// (returns empty string if ch not found)
1352 wxString wxString::AfterFirst(wxUniChar ch) const
1353 {
1354   wxString str;
1355   int iPos = Find(ch);
1356   if ( iPos != wxNOT_FOUND )
1357       str.assign(*this, iPos + 1, npos);
1358
1359   return str;
1360 }
1361
1362 // replace first (or all) occurrences of some substring with another one
1363 size_t wxString::Replace(const wxString& strOld,
1364                          const wxString& strNew, bool bReplaceAll)
1365 {
1366     // if we tried to replace an empty string we'd enter an infinite loop below
1367     wxCHECK_MSG( !strOld.empty(), 0,
1368                  _T("wxString::Replace(): invalid parameter") );
1369
1370     wxSTRING_INVALIDATE_CACHE();
1371
1372     size_t uiCount = 0;   // count of replacements made
1373
1374     // optimize the special common case: replacement of one character by
1375     // another one (in UTF-8 case we can only do this for ASCII characters)
1376     //
1377     // benchmarks show that this special version is around 3 times faster
1378     // (depending on the proportion of matching characters and UTF-8/wchar_t
1379     // build)
1380     if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1381     {
1382         const wxStringCharType chOld = strOld.m_impl[0],
1383                                chNew = strNew.m_impl[0];
1384
1385         // this loop is the simplified version of the one below
1386         for ( size_t pos = 0; ; )
1387         {
1388             pos = m_impl.find(chOld, pos);
1389             if ( pos == npos )
1390                 break;
1391
1392             m_impl[pos++] = chNew;
1393
1394             uiCount++;
1395
1396             if ( !bReplaceAll )
1397                 break;
1398         }
1399     }
1400     else // general case
1401     {
1402         const size_t uiOldLen = strOld.m_impl.length();
1403         const size_t uiNewLen = strNew.m_impl.length();
1404
1405         for ( size_t pos = 0; ; )
1406         {
1407             pos = m_impl.find(strOld.m_impl, pos);
1408             if ( pos == npos )
1409                 break;
1410
1411             // replace this occurrence of the old string with the new one
1412             m_impl.replace(pos, uiOldLen, strNew.m_impl);
1413
1414             // move up pos past the string that was replaced
1415             pos += uiNewLen;
1416
1417             // increase replace count
1418             uiCount++;
1419
1420             // stop after the first one?
1421             if ( !bReplaceAll )
1422                 break;
1423         }
1424     }
1425
1426     return uiCount;
1427 }
1428
1429 bool wxString::IsAscii() const
1430 {
1431     for ( const_iterator i = begin(); i != end(); ++i )
1432     {
1433         if ( !(*i).IsAscii() )
1434             return false;
1435     }
1436
1437     return true;
1438 }
1439
1440 bool wxString::IsWord() const
1441 {
1442     for ( const_iterator i = begin(); i != end(); ++i )
1443     {
1444         if ( !wxIsalpha(*i) )
1445             return false;
1446     }
1447
1448     return true;
1449 }
1450
1451 bool wxString::IsNumber() const
1452 {
1453     if ( empty() )
1454         return true;
1455
1456     const_iterator i = begin();
1457
1458     if ( *i == _T('-') || *i == _T('+') )
1459         ++i;
1460
1461     for ( ; i != end(); ++i )
1462     {
1463         if ( !wxIsdigit(*i) )
1464             return false;
1465     }
1466
1467     return true;
1468 }
1469
1470 wxString wxString::Strip(stripType w) const
1471 {
1472     wxString s = *this;
1473     if ( w & leading ) s.Trim(false);
1474     if ( w & trailing ) s.Trim(true);
1475     return s;
1476 }
1477
1478 // ---------------------------------------------------------------------------
1479 // case conversion
1480 // ---------------------------------------------------------------------------
1481
1482 wxString& wxString::MakeUpper()
1483 {
1484   for ( iterator it = begin(), en = end(); it != en; ++it )
1485     *it = (wxChar)wxToupper(*it);
1486
1487   return *this;
1488 }
1489
1490 wxString& wxString::MakeLower()
1491 {
1492   for ( iterator it = begin(), en = end(); it != en; ++it )
1493     *it = (wxChar)wxTolower(*it);
1494
1495   return *this;
1496 }
1497
1498 wxString& wxString::MakeCapitalized()
1499 {
1500     const iterator en = end();
1501     iterator it = begin();
1502     if ( it != en )
1503     {
1504         *it = (wxChar)wxToupper(*it);
1505         for ( ++it; it != en; ++it )
1506             *it = (wxChar)wxTolower(*it);
1507     }
1508
1509     return *this;
1510 }
1511
1512 // ---------------------------------------------------------------------------
1513 // trimming and padding
1514 // ---------------------------------------------------------------------------
1515
1516 // some compilers (VC++ 6.0 not to name them) return true for a call to
1517 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1518 // to live with this by checking that the character is a 7 bit one - even if
1519 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1520 // space-like symbols somewhere except in the first 128 chars), it is arguably
1521 // still better than trimming away accented letters
1522 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1523
1524 // trims spaces (in the sense of isspace) from left or right side
1525 wxString& wxString::Trim(bool bFromRight)
1526 {
1527     // first check if we're going to modify the string at all
1528     if ( !empty() &&
1529          (
1530           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1531           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1532          )
1533        )
1534     {
1535         if ( bFromRight )
1536         {
1537             // find last non-space character
1538             reverse_iterator psz = rbegin();
1539             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1540                 ++psz;
1541
1542             // truncate at trailing space start
1543             erase(psz.base(), end());
1544         }
1545         else
1546         {
1547             // find first non-space character
1548             iterator psz = begin();
1549             while ( (psz != end()) && wxSafeIsspace(*psz) )
1550                 ++psz;
1551
1552             // fix up data and length
1553             erase(begin(), psz);
1554         }
1555     }
1556
1557     return *this;
1558 }
1559
1560 // adds nCount characters chPad to the string from either side
1561 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1562 {
1563     wxString s(chPad, nCount);
1564
1565     if ( bFromRight )
1566         *this += s;
1567     else
1568     {
1569         s += *this;
1570         swap(s);
1571     }
1572
1573     return *this;
1574 }
1575
1576 // truncate the string
1577 wxString& wxString::Truncate(size_t uiLen)
1578 {
1579     if ( uiLen < length() )
1580     {
1581         erase(begin() + uiLen, end());
1582     }
1583     //else: nothing to do, string is already short enough
1584
1585     return *this;
1586 }
1587
1588 // ---------------------------------------------------------------------------
1589 // finding (return wxNOT_FOUND if not found and index otherwise)
1590 // ---------------------------------------------------------------------------
1591
1592 // find a character
1593 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1594 {
1595     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1596
1597     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1598 }
1599
1600 // ----------------------------------------------------------------------------
1601 // conversion to numbers
1602 // ----------------------------------------------------------------------------
1603
1604 // The implementation of all the functions below is exactly the same so factor
1605 // it out. Note that number extraction works correctly on UTF-8 strings, so
1606 // we can use wxStringCharType and wx_str() for maximum efficiency.
1607
1608 #ifndef __WXWINCE__
1609     #define DO_IF_NOT_WINCE(x) x
1610 #else
1611     #define DO_IF_NOT_WINCE(x)
1612 #endif
1613
1614 #define WX_STRING_TO_INT_TYPE(out, base, func, T)                           \
1615     wxCHECK_MSG( out, false, _T("NULL output pointer") );                   \
1616     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );  \
1617                                                                             \
1618     DO_IF_NOT_WINCE( errno = 0; )                                           \
1619                                                                             \
1620     const wxStringCharType *start = wx_str();                               \
1621     wxStringCharType *end;                                                  \
1622     T val = func(start, &end, base);                                        \
1623                                                                             \
1624     /* return true only if scan was stopped by the terminating NUL and */   \
1625     /* if the string was not empty to start with and no under/overflow */   \
1626     /* occurred: */                                                         \
1627     if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )         \
1628         return false;                                                       \
1629     *out = val;                                                             \
1630     return true
1631
1632 bool wxString::ToLong(long *pVal, int base) const
1633 {
1634     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
1635 }
1636
1637 bool wxString::ToULong(unsigned long *pVal, int base) const
1638 {
1639     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
1640 }
1641
1642 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1643 {
1644     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
1645 }
1646
1647 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1648 {
1649     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
1650 }
1651
1652 bool wxString::ToDouble(double *pVal) const
1653 {
1654     wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
1655
1656     DO_IF_NOT_WINCE( errno = 0; )
1657
1658     const wxChar *start = c_str();
1659     wxChar *end;
1660     double val = wxStrtod(start, &end);
1661
1662     // return true only if scan was stopped by the terminating NUL and if the
1663     // string was not empty to start with and no under/overflow occurred
1664     if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1665         return false;
1666
1667     *pVal = val;
1668
1669     return true;
1670 }
1671
1672 // ---------------------------------------------------------------------------
1673 // formatted output
1674 // ---------------------------------------------------------------------------
1675
1676 #if !wxUSE_UTF8_LOCALE_ONLY
1677 /* static */
1678 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1679 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1680 #else
1681 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1682 #endif
1683 {
1684     va_list argptr;
1685     va_start(argptr, format);
1686
1687     wxString s;
1688     s.PrintfV(format, argptr);
1689
1690     va_end(argptr);
1691
1692     return s;
1693 }
1694 #endif // !wxUSE_UTF8_LOCALE_ONLY
1695
1696 #if wxUSE_UNICODE_UTF8
1697 /* static */
1698 wxString wxString::DoFormatUtf8(const char *format, ...)
1699 {
1700     va_list argptr;
1701     va_start(argptr, format);
1702
1703     wxString s;
1704     s.PrintfV(format, argptr);
1705
1706     va_end(argptr);
1707
1708     return s;
1709 }
1710 #endif // wxUSE_UNICODE_UTF8
1711
1712 /* static */
1713 wxString wxString::FormatV(const wxString& format, va_list argptr)
1714 {
1715     wxString s;
1716     s.PrintfV(format, argptr);
1717     return s;
1718 }
1719
1720 #if !wxUSE_UTF8_LOCALE_ONLY
1721 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1722 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1723 #else
1724 int wxString::DoPrintfWchar(const wxChar *format, ...)
1725 #endif
1726 {
1727     va_list argptr;
1728     va_start(argptr, format);
1729
1730 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1731     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1732     // because it's the only cast that works safely for downcasting when
1733     // multiple inheritance is used:
1734     wxString *str = static_cast<wxString*>(this);
1735 #else
1736     wxString *str = this;
1737 #endif
1738
1739     int iLen = str->PrintfV(format, argptr);
1740
1741     va_end(argptr);
1742
1743     return iLen;
1744 }
1745 #endif // !wxUSE_UTF8_LOCALE_ONLY
1746
1747 #if wxUSE_UNICODE_UTF8
1748 int wxString::DoPrintfUtf8(const char *format, ...)
1749 {
1750     va_list argptr;
1751     va_start(argptr, format);
1752
1753     int iLen = PrintfV(format, argptr);
1754
1755     va_end(argptr);
1756
1757     return iLen;
1758 }
1759 #endif // wxUSE_UNICODE_UTF8
1760
1761 /*
1762     Uses wxVsnprintf and places the result into the this string.
1763
1764     In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1765     it is vswprintf.  Due to a discrepancy between vsnprintf and vswprintf in
1766     the ISO C99 (and thus SUSv3) standard the return value for the case of
1767     an undersized buffer is inconsistent.  For conforming vsnprintf
1768     implementations the function must return the number of characters that
1769     would have been printed had the buffer been large enough.  For conforming
1770     vswprintf implementations the function must return a negative number
1771     and set errno.
1772
1773     What vswprintf sets errno to is undefined but Darwin seems to set it to
1774     EOVERFLOW.  The only expected errno are EILSEQ and EINVAL.  Both of
1775     those are defined in the standard and backed up by several conformance
1776     statements.  Note that ENOMEM mentioned in the manual page does not
1777     apply to swprintf, only wprintf and fwprintf.
1778
1779     Official manual page:
1780     http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1781
1782     Some conformance statements (AIX, Solaris):
1783     http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1784     http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1785
1786     Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1787     EILSEQ and EINVAL are specifically defined to mean the error is other than
1788     an undersized buffer and no other errno are defined we treat those two
1789     as meaning hard errors and everything else gets the old behavior which
1790     is to keep looping and increasing buffer size until the function succeeds.
1791
1792     In practice it's impossible to determine before compilation which behavior
1793     may be used.  The vswprintf function may have vsnprintf-like behavior or
1794     vice-versa.  Behavior detected on one release can theoretically change
1795     with an updated release.  Not to mention that configure testing for it
1796     would require the test to be run on the host system, not the build system
1797     which makes cross compilation difficult. Therefore, we make no assumptions
1798     about behavior and try our best to handle every known case, including the
1799     case where wxVsnprintf returns a negative number and fails to set errno.
1800
1801     There is yet one more non-standard implementation and that is our own.
1802     Fortunately, that can be detected at compile-time.
1803
1804     On top of all that, ISO C99 explicitly defines snprintf to write a null
1805     character to the last position of the specified buffer.  That would be at
1806     at the given buffer size minus 1.  It is supposed to do this even if it
1807     turns out that the buffer is sized too small.
1808
1809     Darwin (tested on 10.5) follows the C99 behavior exactly.
1810
1811     Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1812     errno even when it fails.  However, it only seems to ever fail due
1813     to an undersized buffer.
1814 */
1815 #if wxUSE_UNICODE_UTF8
1816 template<typename BufferType>
1817 #else
1818 // we only need one version in non-UTF8 builds and at least two Windows
1819 // compilers have problems with this function template, so use just one
1820 // normal function here
1821 #endif
1822 static int DoStringPrintfV(wxString& str,
1823                            const wxString& format, va_list argptr)
1824 {
1825     int size = 1024;
1826
1827     for ( ;; )
1828     {
1829 #if wxUSE_UNICODE_UTF8
1830         BufferType tmp(str, size + 1);
1831         typename BufferType::CharType *buf = tmp;
1832 #else
1833         wxStringBuffer tmp(str, size + 1);
1834         wxChar *buf = tmp;
1835 #endif
1836
1837         if ( !buf )
1838         {
1839             // out of memory
1840
1841             // in UTF-8 build, leaving uninitialized junk in the buffer
1842             // could result in invalid non-empty UTF-8 string, so just
1843             // reset the string to empty on failure:
1844             buf[0] = '\0';
1845             return -1;
1846         }
1847
1848         // wxVsnprintf() may modify the original arg pointer, so pass it
1849         // only a copy
1850         va_list argptrcopy;
1851         wxVaCopy(argptrcopy, argptr);
1852
1853 #ifndef __WXWINCE__
1854         // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1855         errno = 0;
1856 #endif
1857         int len = wxVsnprintf(buf, size, format, argptrcopy);
1858         va_end(argptrcopy);
1859
1860         // some implementations of vsnprintf() don't NUL terminate
1861         // the string if there is not enough space for it so
1862         // always do it manually
1863         // FIXME: This really seems to be the wrong and would be an off-by-one
1864         // bug except the code above allocates an extra character.
1865         buf[size] = _T('\0');
1866
1867         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1868         // total number of characters which would have been written if the
1869         // buffer were large enough (newer standards such as Unix98)
1870         if ( len < 0 )
1871         {
1872             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1873             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1874             //     is true if *both* of them use our own implementation,
1875             //     otherwise we can't be sure
1876 #if wxUSE_WXVSNPRINTF
1877             // we know that our own implementation of wxVsnprintf() returns -1
1878             // only for a format error - thus there's something wrong with
1879             // the user's format string
1880             buf[0] = '\0';
1881             return -1;
1882 #else // possibly using system version
1883             // assume it only returns error if there is not enough space, but
1884             // as we don't know how much we need, double the current size of
1885             // the buffer
1886 #ifndef __WXWINCE__
1887             if( (errno == EILSEQ) || (errno == EINVAL) )
1888             // If errno was set to one of the two well-known hard errors
1889             // then fail immediately to avoid an infinite loop.
1890                 return -1;
1891             else
1892 #endif // __WXWINCE__
1893             // still not enough, as we don't know how much we need, double the
1894             // current size of the buffer
1895                 size *= 2;
1896 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1897         }
1898         else if ( len >= size )
1899         {
1900 #if wxUSE_WXVSNPRINTF
1901             // we know that our own implementation of wxVsnprintf() returns
1902             // size+1 when there's not enough space but that's not the size
1903             // of the required buffer!
1904             size *= 2;      // so we just double the current size of the buffer
1905 #else
1906             // some vsnprintf() implementations NUL-terminate the buffer and
1907             // some don't in len == size case, to be safe always add 1
1908             // FIXME: I don't quite understand this comment.  The vsnprintf
1909             // function is specifically defined to return the number of
1910             // characters printed not including the null terminator.
1911             // So OF COURSE you need to add 1 to get the right buffer size.
1912             // The following line is definitely correct, no question.
1913             size = len + 1;
1914 #endif
1915         }
1916         else // ok, there was enough space
1917         {
1918             break;
1919         }
1920     }
1921
1922     // we could have overshot
1923     str.Shrink();
1924
1925     return str.length();
1926 }
1927
1928 int wxString::PrintfV(const wxString& format, va_list argptr)
1929 {
1930 #if wxUSE_UNICODE_UTF8
1931     #if wxUSE_STL_BASED_WXSTRING
1932         typedef wxStringTypeBuffer<char> Utf8Buffer;
1933     #else
1934         typedef wxStringInternalBuffer Utf8Buffer;
1935     #endif
1936 #endif
1937
1938 #if wxUSE_UTF8_LOCALE_ONLY
1939     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1940 #else
1941     #if wxUSE_UNICODE_UTF8
1942     if ( wxLocaleIsUtf8 )
1943         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1944     else
1945         // wxChar* version
1946         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1947     #else
1948         return DoStringPrintfV(*this, format, argptr);
1949     #endif // UTF8/WCHAR
1950 #endif
1951 }
1952
1953 // ----------------------------------------------------------------------------
1954 // misc other operations
1955 // ----------------------------------------------------------------------------
1956
1957 // returns true if the string matches the pattern which may contain '*' and
1958 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1959 // of them)
1960 bool wxString::Matches(const wxString& mask) const
1961 {
1962     // I disable this code as it doesn't seem to be faster (in fact, it seems
1963     // to be much slower) than the old, hand-written code below and using it
1964     // here requires always linking with libregex even if the user code doesn't
1965     // use it
1966 #if 0 // wxUSE_REGEX
1967     // first translate the shell-like mask into a regex
1968     wxString pattern;
1969     pattern.reserve(wxStrlen(pszMask));
1970
1971     pattern += _T('^');
1972     while ( *pszMask )
1973     {
1974         switch ( *pszMask )
1975         {
1976             case _T('?'):
1977                 pattern += _T('.');
1978                 break;
1979
1980             case _T('*'):
1981                 pattern += _T(".*");
1982                 break;
1983
1984             case _T('^'):
1985             case _T('.'):
1986             case _T('$'):
1987             case _T('('):
1988             case _T(')'):
1989             case _T('|'):
1990             case _T('+'):
1991             case _T('\\'):
1992                 // these characters are special in a RE, quote them
1993                 // (however note that we don't quote '[' and ']' to allow
1994                 // using them for Unix shell like matching)
1995                 pattern += _T('\\');
1996                 // fall through
1997
1998             default:
1999                 pattern += *pszMask;
2000         }
2001
2002         pszMask++;
2003     }
2004     pattern += _T('$');
2005
2006     // and now use it
2007     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2008 #else // !wxUSE_REGEX
2009   // TODO: this is, of course, awfully inefficient...
2010
2011   // FIXME-UTF8: implement using iterators, remove #if
2012 #if wxUSE_UNICODE_UTF8
2013   wxWCharBuffer maskBuf = mask.wc_str();
2014   wxWCharBuffer txtBuf = wc_str();
2015   const wxChar *pszMask = maskBuf.data();
2016   const wxChar *pszTxt = txtBuf.data();
2017 #else
2018   const wxChar *pszMask = mask.wx_str();
2019   // the char currently being checked
2020   const wxChar *pszTxt = wx_str();
2021 #endif
2022
2023   // the last location where '*' matched
2024   const wxChar *pszLastStarInText = NULL;
2025   const wxChar *pszLastStarInMask = NULL;
2026
2027 match:
2028   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
2029     switch ( *pszMask ) {
2030       case wxT('?'):
2031         if ( *pszTxt == wxT('\0') )
2032           return false;
2033
2034         // pszTxt and pszMask will be incremented in the loop statement
2035
2036         break;
2037
2038       case wxT('*'):
2039         {
2040           // remember where we started to be able to backtrack later
2041           pszLastStarInText = pszTxt;
2042           pszLastStarInMask = pszMask;
2043
2044           // ignore special chars immediately following this one
2045           // (should this be an error?)
2046           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
2047             pszMask++;
2048
2049           // if there is nothing more, match
2050           if ( *pszMask == wxT('\0') )
2051             return true;
2052
2053           // are there any other metacharacters in the mask?
2054           size_t uiLenMask;
2055           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2056
2057           if ( pEndMask != NULL ) {
2058             // we have to match the string between two metachars
2059             uiLenMask = pEndMask - pszMask;
2060           }
2061           else {
2062             // we have to match the remainder of the string
2063             uiLenMask = wxStrlen(pszMask);
2064           }
2065
2066           wxString strToMatch(pszMask, uiLenMask);
2067           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2068           if ( pMatch == NULL )
2069             return false;
2070
2071           // -1 to compensate "++" in the loop
2072           pszTxt = pMatch + uiLenMask - 1;
2073           pszMask += uiLenMask - 1;
2074         }
2075         break;
2076
2077       default:
2078         if ( *pszMask != *pszTxt )
2079           return false;
2080         break;
2081     }
2082   }
2083
2084   // match only if nothing left
2085   if ( *pszTxt == wxT('\0') )
2086     return true;
2087
2088   // if we failed to match, backtrack if we can
2089   if ( pszLastStarInText ) {
2090     pszTxt = pszLastStarInText + 1;
2091     pszMask = pszLastStarInMask;
2092
2093     pszLastStarInText = NULL;
2094
2095     // don't bother resetting pszLastStarInMask, it's unnecessary
2096
2097     goto match;
2098   }
2099
2100   return false;
2101 #endif // wxUSE_REGEX/!wxUSE_REGEX
2102 }
2103
2104 // Count the number of chars
2105 int wxString::Freq(wxUniChar ch) const
2106 {
2107     int count = 0;
2108     for ( const_iterator i = begin(); i != end(); ++i )
2109     {
2110         if ( *i == ch )
2111             count ++;
2112     }
2113     return count;
2114 }
2115