src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #include "wx/hashmap.h"
  39
  40 // string handling functions used by wxString:
  41 #if wxUSE_UNICODE_UTF8
  42     #define wxStringMemcpy   memcpy
  43     #define wxStringMemcmp   memcmp
  44     #define wxStringMemchr   memchr
  45     #define wxStringStrlen   strlen
  46 #else
  47     #define wxStringMemcpy   wxTmemcpy
  48     #define wxStringMemcmp   wxTmemcmp
  49     #define wxStringMemchr   wxTmemchr
  50     #define wxStringStrlen   wxStrlen
  51 #endif
  52
  53
  54 // ---------------------------------------------------------------------------
  55 // static class variables definition
  56 // ---------------------------------------------------------------------------
  57
  58 //According to STL _must_ be a -1 size_t
  59 const size_t wxString::npos = (size_t) -1;
  60
  61 #if wxUSE_STRING_POS_CACHE
  62 wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
  63
  64 // gdb seems to be unable to display thread-local variables correctly, at least
  65 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
  66 #ifdef __WXDEBUG__
  67
  68 struct wxStrCacheDumper
  69 {
  70     static void ShowAll()
  71     {
  72         puts("*** wxString cache dump:");
  73         for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
  74         {
  75             const wxString::Cache::Element&
  76                 c = wxString::ms_cache.cached[n];
  77
  78             printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
  79                    n,
  80                    n == wxString::ms_cache.lastUsed ? " [*]" : "",
  81                    c.str,
  82                    (unsigned long)c.pos,
  83                    (unsigned long)c.impl,
  84                    (long)c.len);
  85         }
  86     }
  87 };
  88
  89 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
  90
  91 #endif // __WXDEBUG__
  92
  93 #ifdef wxPROFILE_STRING_CACHE
  94
  95 wxString::CacheStats wxString::ms_cacheStats;
  96
  97 namespace
  98 {
  99
 100 struct ShowCacheStats
 101 {
 102     ~ShowCacheStats()
 103     {
 104         const wxString::CacheStats& stats = wxString::ms_cacheStats;
 105
 106         if ( stats.postot )
 107         {
 108             puts("*** wxString cache statistics:");
 109             printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
 110                    stats.postot);
 111             printf("\tHits %u (of which %u not used) or %.2f%%\n",
 112                    stats.poshits,
 113                    stats.mishits,
 114                    100.*float(stats.poshits - stats.mishits)/stats.postot);
 115             printf("\tAverage position requested: %.2f\n",
 116                    float(stats.sumpos) / stats.postot);
 117             printf("\tAverage offset after cached hint: %.2f\n",
 118                    float(stats.sumofs) / stats.postot);
 119         }
 120
 121         if ( stats.lentot )
 122         {
 123             printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
 124                    stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
 125         }
 126     }
 127 } s_showCacheStats;
 128
 129 } // anonymous namespace
 130
 131 #endif // wxPROFILE_STRING_CACHE
 132
 133 #endif // wxUSE_STRING_POS_CACHE
 134
 135 // ----------------------------------------------------------------------------
 136 // global functions
 137 // ----------------------------------------------------------------------------
 138
 139 #if wxUSE_STD_IOSTREAM
 140
 141 #include <iostream>
 142
 143 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
 144 {
 145 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
 146     return os << (const char *)str.AsCharBuf();
 147 #else
 148     return os << str.AsInternal();
 149 #endif
 150 }
 151
 152 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
 153 {
 154     return os << str.c_str();
 155 }
 156
 157 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
 158 {
 159     return os << str.data();
 160 }
 161
 162 #ifndef __BORLANDC__
 163 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
 164 {
 165     return os << str.data();
 166 }
 167 #endif
 168
 169 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 170
 171 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
 172 {
 173     return wos << str.wc_str();
 174 }
 175
 176 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
 177 {
 178     return wos << str.AsWChar();
 179 }
 180
 181 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
 182 {
 183     return wos << str.data();
 184 }
 185
 186 #endif  // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 187
 188 #endif // wxUSE_STD_IOSTREAM
 189
 190 // ===========================================================================
 191 // wxString class core
 192 // ===========================================================================
 193
 194 #if wxUSE_UNICODE_UTF8
 195
 196 void wxString::PosLenToImpl(size_t pos, size_t len,
 197                             size_t *implPos, size_t *implLen) const
 198 {
 199     if ( pos == npos )
 200     {
 201         *implPos = npos;
 202     }
 203     else // have valid start position
 204     {
 205         const const_iterator b = GetIterForNthChar(pos);
 206         *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
 207         if ( len == npos )
 208         {
 209             *implLen = npos;
 210         }
 211         else // have valid length too
 212         {
 213             // we need to handle the case of length specifying a substring
 214             // going beyond the end of the string, just as std::string does
 215             const const_iterator e(end());
 216             const_iterator i(b);
 217             while ( len && i <= e )
 218             {
 219                 ++i;
 220                 --len;
 221             }
 222
 223             *implLen = i.impl() - b.impl();
 224         }
 225     }
 226 }
 227
 228 #endif // wxUSE_UNICODE_UTF8
 229
 230 // ----------------------------------------------------------------------------
 231 // wxCStrData converted strings caching
 232 // ----------------------------------------------------------------------------
 233
 234 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 235 //             string objects; re-enable after fixing this bug and benchmarking
 236 //             performance to see if using a hash is a good idea at all
 237 #if 0
 238
 239 // For backward compatibility reasons, it must be possible to assign the value
 240 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 241 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 242 // because the memory would be freed immediately, but it has to be valid as long
 243 // as the string is not modified, so that code like this still works:
 244 //
 245 // const wxChar *s = str.c_str();
 246 // while ( s ) { ... }
 247
 248 // FIXME-UTF8: not thread safe!
 249 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 250 //             destroyed, but we should do it when the string is modified, to
 251 //             keep memory usage down
 252 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 253 //             invalidated the cache on every change, we could keep the previous
 254 //             conversion
 255 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 256 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 257
 258 template<typename T>
 259 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 260 {
 261     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 262     if ( i != hash.end() )
 263     {
 264         free(i->second);
 265         hash.erase(i);
 266     }
 267 }
 268
 269 #if wxUSE_UNICODE
 270 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 271 //     so we have to use wxString* here and const-cast when used
 272 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 273                     wxStringCharConversionCache);
 274 static wxStringCharConversionCache gs_stringsCharCache;
 275
 276 const char* wxCStrData::AsChar() const
 277 {
 278     // remove previously cache value, if any (see FIXMEs above):
 279     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 280
 281     // convert the string and keep it:
 282     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 283         m_str->mb_str().release();
 284
 285     return s + m_offset;
 286 }
 287 #endif // wxUSE_UNICODE
 288
 289 #if !wxUSE_UNICODE_WCHAR
 290 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 291                     wxStringWCharConversionCache);
 292 static wxStringWCharConversionCache gs_stringsWCharCache;
 293
 294 const wchar_t* wxCStrData::AsWChar() const
 295 {
 296     // remove previously cache value, if any (see FIXMEs above):
 297     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 298
 299     // convert the string and keep it:
 300     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 301         m_str->wc_str().release();
 302
 303     return s + m_offset;
 304 }
 305 #endif // !wxUSE_UNICODE_WCHAR
 306
 307 wxString::~wxString()
 308 {
 309 #if wxUSE_UNICODE
 310     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 311     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 312 #endif
 313 #if !wxUSE_UNICODE_WCHAR
 314     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 315 #endif
 316 }
 317 #endif
 318
 319 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 320 const char* wxCStrData::AsChar() const
 321 {
 322 #if wxUSE_UNICODE_UTF8
 323     if ( wxLocaleIsUtf8 )
 324         return AsInternal();
 325 #endif
 326     // under non-UTF8 locales, we have to convert the internal UTF-8
 327     // representation using wxConvLibc and cache the result
 328
 329     wxString *str = wxConstCast(m_str, wxString);
 330
 331     // convert the string:
 332     //
 333     // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
 334     //             have it) but it's unfortunately not obvious to implement
 335     //             because we don't know how big buffer do we need for the
 336     //             given string length (in case of multibyte encodings, e.g.
 337     //             ISO-2022-JP or UTF-8 when internal representation is wchar_t)
 338     //
 339     //             One idea would be to store more than just m_convertedToChar
 340     //             in wxString: then we could record the length of the string
 341     //             which was converted the last time and try to reuse the same
 342     //             buffer if the current length is not greater than it (this
 343     //             could still fail because string could have been modified in
 344     //             place but it would work most of the time, so we'd do it and
 345     //             only allocate the new buffer if in-place conversion returned
 346     //             an error). We could also store a bit saying if the string
 347     //             was modified since the last conversion (and update it in all
 348     //             operation modifying the string, of course) to avoid unneeded
 349     //             consequential conversions. But both of these ideas require
 350     //             adding more fields to wxString and require profiling results
 351     //             to be sure that we really gain enough from them to justify
 352     //             doing it.
 353     wxCharBuffer buf(str->mb_str());
 354
 355     // if it failed, return empty string and not NULL to avoid crashes in code
 356     // written with either wxWidgets 2 wxString or std::string behaviour in
 357     // mind: neither of them ever returns NULL and so we shouldn't neither
 358     if ( !buf )
 359         return "";
 360
 361     if ( str->m_convertedToChar &&
 362          strlen(buf) == strlen(str->m_convertedToChar) )
 363     {
 364         // keep the same buffer for as long as possible, so that several calls
 365         // to c_str() in a row still work:
 366         strcpy(str->m_convertedToChar, buf);
 367     }
 368     else
 369     {
 370         str->m_convertedToChar = buf.release();
 371     }
 372
 373     // and keep it:
 374     return str->m_convertedToChar + m_offset;
 375 }
 376 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 377
 378 #if !wxUSE_UNICODE_WCHAR
 379 const wchar_t* wxCStrData::AsWChar() const
 380 {
 381     wxString *str = wxConstCast(m_str, wxString);
 382
 383     // convert the string:
 384     wxWCharBuffer buf(str->wc_str());
 385
 386     // notice that here, unlike above in AsChar(), conversion can't fail as our
 387     // internal UTF-8 is always well-formed -- or the string was corrupted and
 388     // all bets are off anyhow
 389
 390     // FIXME-UTF8: do the conversion in-place in the existing buffer
 391     if ( str->m_convertedToWChar &&
 392          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 393     {
 394         // keep the same buffer for as long as possible, so that several calls
 395         // to c_str() in a row still work:
 396         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 397     }
 398     else
 399     {
 400         str->m_convertedToWChar = buf.release();
 401     }
 402
 403     // and keep it:
 404     return str->m_convertedToWChar + m_offset;
 405 }
 406 #endif // !wxUSE_UNICODE_WCHAR
 407
 408 // ===========================================================================
 409 // wxString class core
 410 // ===========================================================================
 411
 412 // ---------------------------------------------------------------------------
 413 // construction and conversion
 414 // ---------------------------------------------------------------------------
 415
 416 #if wxUSE_UNICODE_WCHAR
 417 /* static */
 418 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 419                                                const wxMBConv& conv)
 420 {
 421     // anything to do?
 422     if ( !psz || nLength == 0 )
 423         return SubstrBufFromMB(L"", 0);
 424
 425     if ( nLength == npos )
 426         nLength = wxNO_LEN;
 427
 428     size_t wcLen;
 429     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 430     if ( !wcLen )
 431         return SubstrBufFromMB(_T(""), 0);
 432     else
 433         return SubstrBufFromMB(wcBuf, wcLen);
 434 }
 435 #endif // wxUSE_UNICODE_WCHAR
 436
 437 #if wxUSE_UNICODE_UTF8
 438 /* static */
 439 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 440                                                const wxMBConv& conv)
 441 {
 442     // anything to do?
 443     if ( !psz || nLength == 0 )
 444         return SubstrBufFromMB("", 0);
 445
 446     // if psz is already in UTF-8, we don't have to do the roundtrip to
 447     // wchar_t* and back:
 448     if ( conv.IsUTF8() )
 449     {
 450         // we need to validate the input because UTF8 iterators assume valid
 451         // UTF-8 sequence and psz may be invalid:
 452         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 453         {
 454             // we must pass the real string length to SubstrBufFromMB ctor
 455             if ( nLength == npos )
 456                 nLength = psz ? strlen(psz) : 0;
 457             return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
 458         }
 459         // else: do the roundtrip through wchar_t*
 460     }
 461
 462     if ( nLength == npos )
 463         nLength = wxNO_LEN;
 464
 465     // first convert to wide string:
 466     size_t wcLen;
 467     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 468     if ( !wcLen )
 469         return SubstrBufFromMB("", 0);
 470
 471     // and then to UTF-8:
 472     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
 473     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 474     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 475
 476     return buf;
 477 }
 478 #endif // wxUSE_UNICODE_UTF8
 479
 480 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 481 /* static */
 482 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 483                                                const wxMBConv& conv)
 484 {
 485     // anything to do?
 486     if ( !pwz || nLength == 0 )
 487         return SubstrBufFromWC("", 0);
 488
 489     if ( nLength == npos )
 490         nLength = wxNO_LEN;
 491
 492     size_t mbLen;
 493     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 494     if ( !mbLen )
 495         return SubstrBufFromWC("", 0);
 496     else
 497         return SubstrBufFromWC(mbBuf, mbLen);
 498 }
 499 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 500
 501
 502 #if wxUSE_UNICODE_WCHAR
 503
 504 //Convert wxString in Unicode mode to a multi-byte string
 505 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 506 {
 507     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 508 }
 509
 510 #elif wxUSE_UNICODE_UTF8
 511
 512 const wxWCharBuffer wxString::wc_str() const
 513 {
 514     return wxMBConvStrictUTF8().cMB2WC
 515                                 (
 516                                     m_impl.c_str(),
 517                                     m_impl.length() + 1, // size, not length
 518                                     NULL
 519                                 );
 520 }
 521
 522 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 523 {
 524     if ( conv.IsUTF8() )
 525         return wxCharBuffer::CreateNonOwned(m_impl.c_str());
 526
 527     // FIXME-UTF8: use wc_str() here once we have buffers with length
 528
 529     size_t wcLen;
 530     wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
 531                                              (
 532                                                 m_impl.c_str(),
 533                                                 m_impl.length() + 1, // size
 534                                                 &wcLen
 535                                              ));
 536     if ( !wcLen )
 537         return wxCharBuffer("");
 538
 539     return conv.cWC2MB(wcBuf, wcLen+1, NULL);
 540 }
 541
 542 #else // ANSI
 543
 544 //Converts this string to a wide character string if unicode
 545 //mode is not enabled and wxUSE_WCHAR_T is enabled
 546 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 547 {
 548     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 549 }
 550
 551 #endif // Unicode/ANSI
 552
 553 // shrink to minimal size (releasing extra memory)
 554 bool wxString::Shrink()
 555 {
 556   wxString tmp(begin(), end());
 557   swap(tmp);
 558   return tmp.length() == length();
 559 }
 560
 561 // deprecated compatibility code:
 562 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 563 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 564 {
 565     return DoGetWriteBuf(nLen);
 566 }
 567
 568 void wxString::UngetWriteBuf()
 569 {
 570     DoUngetWriteBuf();
 571 }
 572
 573 void wxString::UngetWriteBuf(size_t nLen)
 574 {
 575     DoUngetWriteBuf(nLen);
 576 }
 577 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 578
 579
 580 // ---------------------------------------------------------------------------
 581 // data access
 582 // ---------------------------------------------------------------------------
 583
 584 // all functions are inline in string.h
 585
 586 // ---------------------------------------------------------------------------
 587 // concatenation operators
 588 // ---------------------------------------------------------------------------
 589
 590 /*
 591  * concatenation functions come in 5 flavours:
 592  *  string + string
 593  *  char   + string      and      string + char
 594  *  C str  + string      and      string + C str
 595  */
 596
 597 wxString operator+(const wxString& str1, const wxString& str2)
 598 {
 599 #if !wxUSE_STL_BASED_WXSTRING
 600     wxASSERT( str1.IsValid() );
 601     wxASSERT( str2.IsValid() );
 602 #endif
 603
 604     wxString s = str1;
 605     s += str2;
 606
 607     return s;
 608 }
 609
 610 wxString operator+(const wxString& str, wxUniChar ch)
 611 {
 612 #if !wxUSE_STL_BASED_WXSTRING
 613     wxASSERT( str.IsValid() );
 614 #endif
 615
 616     wxString s = str;
 617     s += ch;
 618
 619     return s;
 620 }
 621
 622 wxString operator+(wxUniChar ch, const wxString& str)
 623 {
 624 #if !wxUSE_STL_BASED_WXSTRING
 625     wxASSERT( str.IsValid() );
 626 #endif
 627
 628     wxString s = ch;
 629     s += str;
 630
 631     return s;
 632 }
 633
 634 wxString operator+(const wxString& str, const char *psz)
 635 {
 636 #if !wxUSE_STL_BASED_WXSTRING
 637     wxASSERT( str.IsValid() );
 638 #endif
 639
 640     wxString s;
 641     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 642         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 643     }
 644     s += str;
 645     s += psz;
 646
 647     return s;
 648 }
 649
 650 wxString operator+(const wxString& str, const wchar_t *pwz)
 651 {
 652 #if !wxUSE_STL_BASED_WXSTRING
 653     wxASSERT( str.IsValid() );
 654 #endif
 655
 656     wxString s;
 657     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 658         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 659     }
 660     s += str;
 661     s += pwz;
 662
 663     return s;
 664 }
 665
 666 wxString operator+(const char *psz, const wxString& str)
 667 {
 668 #if !wxUSE_STL_BASED_WXSTRING
 669     wxASSERT( str.IsValid() );
 670 #endif
 671
 672     wxString s;
 673     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 674         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 675     }
 676     s = psz;
 677     s += str;
 678
 679     return s;
 680 }
 681
 682 wxString operator+(const wchar_t *pwz, const wxString& str)
 683 {
 684 #if !wxUSE_STL_BASED_WXSTRING
 685     wxASSERT( str.IsValid() );
 686 #endif
 687
 688     wxString s;
 689     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 690         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 691     }
 692     s = pwz;
 693     s += str;
 694
 695     return s;
 696 }
 697
 698 // ---------------------------------------------------------------------------
 699 // string comparison
 700 // ---------------------------------------------------------------------------
 701
 702 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 703 {
 704     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 705                                : wxToupper(GetChar(0u)) == wxToupper(c));
 706 }
 707
 708 #ifdef HAVE_STD_STRING_COMPARE
 709
 710 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 711 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 712 //     sort strings in characters code point order by sorting the byte sequence
 713 //     in byte values order (i.e. what strcmp() and memcmp() do).
 714
 715 int wxString::compare(const wxString& str) const
 716 {
 717     return m_impl.compare(str.m_impl);
 718 }
 719
 720 int wxString::compare(size_t nStart, size_t nLen,
 721                       const wxString& str) const
 722 {
 723     size_t pos, len;
 724     PosLenToImpl(nStart, nLen, &pos, &len);
 725     return m_impl.compare(pos, len, str.m_impl);
 726 }
 727
 728 int wxString::compare(size_t nStart, size_t nLen,
 729                       const wxString& str,
 730                       size_t nStart2, size_t nLen2) const
 731 {
 732     size_t pos, len;
 733     PosLenToImpl(nStart, nLen, &pos, &len);
 734
 735     size_t pos2, len2;
 736     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 737
 738     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 739 }
 740
 741 int wxString::compare(const char* sz) const
 742 {
 743     return m_impl.compare(ImplStr(sz));
 744 }
 745
 746 int wxString::compare(const wchar_t* sz) const
 747 {
 748     return m_impl.compare(ImplStr(sz));
 749 }
 750
 751 int wxString::compare(size_t nStart, size_t nLen,
 752                       const char* sz, size_t nCount) const
 753 {
 754     size_t pos, len;
 755     PosLenToImpl(nStart, nLen, &pos, &len);
 756
 757     SubstrBufFromMB str(ImplStr(sz, nCount));
 758
 759     return m_impl.compare(pos, len, str.data, str.len);
 760 }
 761
 762 int wxString::compare(size_t nStart, size_t nLen,
 763                       const wchar_t* sz, size_t nCount) const
 764 {
 765     size_t pos, len;
 766     PosLenToImpl(nStart, nLen, &pos, &len);
 767
 768     SubstrBufFromWC str(ImplStr(sz, nCount));
 769
 770     return m_impl.compare(pos, len, str.data, str.len);
 771 }
 772
 773 #else // !HAVE_STD_STRING_COMPARE
 774
 775 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 776                           const wxStringCharType* s2, size_t l2)
 777 {
 778     if( l1 == l2 )
 779         return wxStringMemcmp(s1, s2, l1);
 780     else if( l1 < l2 )
 781     {
 782         int ret = wxStringMemcmp(s1, s2, l1);
 783         return ret == 0 ? -1 : ret;
 784     }
 785     else
 786     {
 787         int ret = wxStringMemcmp(s1, s2, l2);
 788         return ret == 0 ? +1 : ret;
 789     }
 790 }
 791
 792 int wxString::compare(const wxString& str) const
 793 {
 794     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 795                      str.m_impl.data(), str.m_impl.length());
 796 }
 797
 798 int wxString::compare(size_t nStart, size_t nLen,
 799                       const wxString& str) const
 800 {
 801     wxASSERT(nStart <= length());
 802     size_type strLen = length() - nStart;
 803     nLen = strLen < nLen ? strLen : nLen;
 804
 805     size_t pos, len;
 806     PosLenToImpl(nStart, nLen, &pos, &len);
 807
 808     return ::wxDoCmp(m_impl.data() + pos,  len,
 809                      str.m_impl.data(), str.m_impl.length());
 810 }
 811
 812 int wxString::compare(size_t nStart, size_t nLen,
 813                       const wxString& str,
 814                       size_t nStart2, size_t nLen2) const
 815 {
 816     wxASSERT(nStart <= length());
 817     wxASSERT(nStart2 <= str.length());
 818     size_type strLen  =     length() - nStart,
 819               strLen2 = str.length() - nStart2;
 820     nLen  = strLen  < nLen  ? strLen  : nLen;
 821     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 822
 823     size_t pos, len;
 824     PosLenToImpl(nStart, nLen, &pos, &len);
 825     size_t pos2, len2;
 826     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 827
 828     return ::wxDoCmp(m_impl.data() + pos, len,
 829                      str.m_impl.data() + pos2, len2);
 830 }
 831
 832 int wxString::compare(const char* sz) const
 833 {
 834     SubstrBufFromMB str(ImplStr(sz, npos));
 835     if ( str.len == npos )
 836         str.len = wxStringStrlen(str.data);
 837     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 838 }
 839
 840 int wxString::compare(const wchar_t* sz) const
 841 {
 842     SubstrBufFromWC str(ImplStr(sz, npos));
 843     if ( str.len == npos )
 844         str.len = wxStringStrlen(str.data);
 845     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 846 }
 847
 848 int wxString::compare(size_t nStart, size_t nLen,
 849                       const char* sz, size_t nCount) const
 850 {
 851     wxASSERT(nStart <= length());
 852     size_type strLen = length() - nStart;
 853     nLen = strLen < nLen ? strLen : nLen;
 854
 855     size_t pos, len;
 856     PosLenToImpl(nStart, nLen, &pos, &len);
 857
 858     SubstrBufFromMB str(ImplStr(sz, nCount));
 859     if ( str.len == npos )
 860         str.len = wxStringStrlen(str.data);
 861
 862     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 863 }
 864
 865 int wxString::compare(size_t nStart, size_t nLen,
 866                       const wchar_t* sz, size_t nCount) const
 867 {
 868     wxASSERT(nStart <= length());
 869     size_type strLen = length() - nStart;
 870     nLen = strLen < nLen ? strLen : nLen;
 871
 872     size_t pos, len;
 873     PosLenToImpl(nStart, nLen, &pos, &len);
 874
 875     SubstrBufFromWC str(ImplStr(sz, nCount));
 876     if ( str.len == npos )
 877         str.len = wxStringStrlen(str.data);
 878
 879     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 880 }
 881
 882 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 883
 884
 885 // ---------------------------------------------------------------------------
 886 // find_{first,last}_[not]_of functions
 887 // ---------------------------------------------------------------------------
 888
 889 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 890
 891 // NB: All these functions are implemented  with the argument being wxChar*,
 892 //     i.e. widechar string in any Unicode build, even though native string
 893 //     representation is char* in the UTF-8 build. This is because we couldn't
 894 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 895
 896 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 897 {
 898     return find_first_of(sz, nStart, wxStrlen(sz));
 899 }
 900
 901 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 902 {
 903     return find_first_not_of(sz, nStart, wxStrlen(sz));
 904 }
 905
 906 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 907 {
 908     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 909
 910     size_t idx = nStart;
 911     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 912     {
 913         if ( wxTmemchr(sz, *i, n) )
 914             return idx;
 915     }
 916
 917     return npos;
 918 }
 919
 920 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 921 {
 922     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 923
 924     size_t idx = nStart;
 925     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 926     {
 927         if ( !wxTmemchr(sz, *i, n) )
 928             return idx;
 929     }
 930
 931     return npos;
 932 }
 933
 934
 935 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 936 {
 937     return find_last_of(sz, nStart, wxStrlen(sz));
 938 }
 939
 940 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 941 {
 942     return find_last_not_of(sz, nStart, wxStrlen(sz));
 943 }
 944
 945 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 946 {
 947     size_t len = length();
 948
 949     if ( nStart == npos )
 950     {
 951         nStart = len - 1;
 952     }
 953     else
 954     {
 955         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 956     }
 957
 958     size_t idx = nStart;
 959     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 960           i != rend(); --idx, ++i )
 961     {
 962         if ( wxTmemchr(sz, *i, n) )
 963             return idx;
 964     }
 965
 966     return npos;
 967 }
 968
 969 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 970 {
 971     size_t len = length();
 972
 973     if ( nStart == npos )
 974     {
 975         nStart = len - 1;
 976     }
 977     else
 978     {
 979         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 980     }
 981
 982     size_t idx = nStart;
 983     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 984           i != rend(); --idx, ++i )
 985     {
 986         if ( !wxTmemchr(sz, *i, n) )
 987             return idx;
 988     }
 989
 990     return npos;
 991 }
 992
 993 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
 994 {
 995     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 996
 997     size_t idx = nStart;
 998     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 999     {
1000         if ( *i != ch )
1001             return idx;
1002     }
1003
1004     return npos;
1005 }
1006
1007 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1008 {
1009     size_t len = length();
1010
1011     if ( nStart == npos )
1012     {
1013         nStart = len - 1;
1014     }
1015     else
1016     {
1017         wxASSERT_MSG( nStart <= len, _T("invalid index") );
1018     }
1019
1020     size_t idx = nStart;
1021     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1022           i != rend(); --idx, ++i )
1023     {
1024         if ( *i != ch )
1025             return idx;
1026     }
1027
1028     return npos;
1029 }
1030
1031 // the functions above were implemented for wchar_t* arguments in Unicode
1032 // build and char* in ANSI build; below are implementations for the other
1033 // version:
1034 #if wxUSE_UNICODE
1035     #define wxOtherCharType char
1036     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
1037 #else
1038     #define wxOtherCharType wchar_t
1039     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
1040 #endif
1041
1042 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1043     { return find_first_of(STRCONV(sz), nStart); }
1044
1045 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1046                                size_t n) const
1047     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1048 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1049     { return find_last_of(STRCONV(sz), nStart); }
1050 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1051                               size_t n) const
1052     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1053 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1054     { return find_first_not_of(STRCONV(sz), nStart); }
1055 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1056                                    size_t n) const
1057     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1058 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1059     { return find_last_not_of(STRCONV(sz), nStart); }
1060 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1061                                   size_t n) const
1062     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1063
1064 #undef wxOtherCharType
1065 #undef STRCONV
1066
1067 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1068
1069 // ===========================================================================
1070 // other common string functions
1071 // ===========================================================================
1072
1073 int wxString::CmpNoCase(const wxString& s) const
1074 {
1075     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1076
1077     const_iterator i1 = begin();
1078     const_iterator end1 = end();
1079     const_iterator i2 = s.begin();
1080     const_iterator end2 = s.end();
1081
1082     for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1083     {
1084         wxUniChar lower1 = (wxChar)wxTolower(*i1);
1085         wxUniChar lower2 = (wxChar)wxTolower(*i2);
1086         if ( lower1 != lower2 )
1087             return lower1 < lower2 ? -1 : 1;
1088     }
1089
1090     size_t len1 = length();
1091     size_t len2 = s.length();
1092
1093     if ( len1 < len2 )
1094         return -1;
1095     else if ( len1 > len2 )
1096         return 1;
1097     return 0;
1098 }
1099
1100
1101 #if wxUSE_UNICODE
1102
1103 #ifdef __MWERKS__
1104 #ifndef __SCHAR_MAX__
1105 #define __SCHAR_MAX__ 127
1106 #endif
1107 #endif
1108
1109 wxString wxString::FromAscii(const char *ascii, size_t len)
1110 {
1111     if (!ascii || len == 0)
1112        return wxEmptyString;
1113
1114     wxString res;
1115
1116     {
1117         wxStringInternalBuffer buf(res, len);
1118         wxStringCharType *dest = buf;
1119
1120         for ( ; len > 0; --len )
1121         {
1122             unsigned char c = (unsigned char)*ascii++;
1123             wxASSERT_MSG( c < 0x80,
1124                           _T("Non-ASCII value passed to FromAscii().") );
1125
1126             *dest++ = (wchar_t)c;
1127         }
1128     }
1129
1130     return res;
1131 }
1132
1133 wxString wxString::FromAscii(const char *ascii)
1134 {
1135     return FromAscii(ascii, wxStrlen(ascii));
1136 }
1137
1138 wxString wxString::FromAscii(char ascii)
1139 {
1140     // What do we do with '\0' ?
1141
1142     unsigned char c = (unsigned char)ascii;
1143
1144     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1145
1146     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1147     return wxString(wxUniChar((wchar_t)c));
1148 }
1149
1150 const wxCharBuffer wxString::ToAscii() const
1151 {
1152     // this will allocate enough space for the terminating NUL too
1153     wxCharBuffer buffer(length());
1154     char *dest = buffer.data();
1155
1156     for ( const_iterator i = begin(); i != end(); ++i )
1157     {
1158         wxUniChar c(*i);
1159         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1160         *dest++ = c.IsAscii() ? (char)c : '_';
1161
1162         // the output string can't have embedded NULs anyhow, so we can safely
1163         // stop at first of them even if we do have any
1164         if ( !c )
1165             break;
1166     }
1167
1168     return buffer;
1169 }
1170
1171 #endif // wxUSE_UNICODE
1172
1173 // extract string of length nCount starting at nFirst
1174 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1175 {
1176     size_t nLen = length();
1177
1178     // default value of nCount is npos and means "till the end"
1179     if ( nCount == npos )
1180     {
1181         nCount = nLen - nFirst;
1182     }
1183
1184     // out-of-bounds requests return sensible things
1185     if ( nFirst + nCount > nLen )
1186     {
1187         nCount = nLen - nFirst;
1188     }
1189
1190     if ( nFirst > nLen )
1191     {
1192         // AllocCopy() will return empty string
1193         return wxEmptyString;
1194     }
1195
1196     wxString dest(*this, nFirst, nCount);
1197     if ( dest.length() != nCount )
1198     {
1199         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1200     }
1201
1202     return dest;
1203 }
1204
1205 // check that the string starts with prefix and return the rest of the string
1206 // in the provided pointer if it is not NULL, otherwise return false
1207 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1208 {
1209     if ( compare(0, prefix.length(), prefix) != 0 )
1210         return false;
1211
1212     if ( rest )
1213     {
1214         // put the rest of the string into provided pointer
1215         rest->assign(*this, prefix.length(), npos);
1216     }
1217
1218     return true;
1219 }
1220
1221
1222 // check that the string ends with suffix and return the rest of it in the
1223 // provided pointer if it is not NULL, otherwise return false
1224 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1225 {
1226     int start = length() - suffix.length();
1227
1228     if ( start < 0 || compare(start, npos, suffix) != 0 )
1229         return false;
1230
1231     if ( rest )
1232     {
1233         // put the rest of the string into provided pointer
1234         rest->assign(*this, 0, start);
1235     }
1236
1237     return true;
1238 }
1239
1240
1241 // extract nCount last (rightmost) characters
1242 wxString wxString::Right(size_t nCount) const
1243 {
1244   if ( nCount > length() )
1245     nCount = length();
1246
1247   wxString dest(*this, length() - nCount, nCount);
1248   if ( dest.length() != nCount ) {
1249     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1250   }
1251   return dest;
1252 }
1253
1254 // get all characters after the last occurence of ch
1255 // (returns the whole string if ch not found)
1256 wxString wxString::AfterLast(wxUniChar ch) const
1257 {
1258   wxString str;
1259   int iPos = Find(ch, true);
1260   if ( iPos == wxNOT_FOUND )
1261     str = *this;
1262   else
1263     str = wx_str() + iPos + 1;
1264
1265   return str;
1266 }
1267
1268 // extract nCount first (leftmost) characters
1269 wxString wxString::Left(size_t nCount) const
1270 {
1271   if ( nCount > length() )
1272     nCount = length();
1273
1274   wxString dest(*this, 0, nCount);
1275   if ( dest.length() != nCount ) {
1276     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1277   }
1278   return dest;
1279 }
1280
1281 // get all characters before the first occurence of ch
1282 // (returns the whole string if ch not found)
1283 wxString wxString::BeforeFirst(wxUniChar ch) const
1284 {
1285   int iPos = Find(ch);
1286   if ( iPos == wxNOT_FOUND ) iPos = length();
1287   return wxString(*this, 0, iPos);
1288 }
1289
1290 /// get all characters before the last occurence of ch
1291 /// (returns empty string if ch not found)
1292 wxString wxString::BeforeLast(wxUniChar ch) const
1293 {
1294   wxString str;
1295   int iPos = Find(ch, true);
1296   if ( iPos != wxNOT_FOUND && iPos != 0 )
1297     str = wxString(c_str(), iPos);
1298
1299   return str;
1300 }
1301
1302 /// get all characters after the first occurence of ch
1303 /// (returns empty string if ch not found)
1304 wxString wxString::AfterFirst(wxUniChar ch) const
1305 {
1306   wxString str;
1307   int iPos = Find(ch);
1308   if ( iPos != wxNOT_FOUND )
1309     str = wx_str() + iPos + 1;
1310
1311   return str;
1312 }
1313
1314 // replace first (or all) occurences of some substring with another one
1315 size_t wxString::Replace(const wxString& strOld,
1316                          const wxString& strNew, bool bReplaceAll)
1317 {
1318     // if we tried to replace an empty string we'd enter an infinite loop below
1319     wxCHECK_MSG( !strOld.empty(), 0,
1320                  _T("wxString::Replace(): invalid parameter") );
1321
1322     wxSTRING_INVALIDATE_CACHE();
1323
1324     size_t uiCount = 0;   // count of replacements made
1325
1326     // optimize the special common case: replacement of one character by
1327     // another one (in UTF-8 case we can only do this for ASCII characters)
1328     //
1329     // benchmarks show that this special version is around 3 times faster
1330     // (depending on the proportion of matching characters and UTF-8/wchar_t
1331     // build)
1332     if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1333     {
1334         const wxStringCharType chOld = strOld.m_impl[0],
1335                                chNew = strNew.m_impl[0];
1336
1337         // this loop is the simplified version of the one below
1338         for ( size_t pos = 0; ; )
1339         {
1340             pos = m_impl.find(chOld, pos);
1341             if ( pos == npos )
1342                 break;
1343
1344             m_impl[pos++] = chNew;
1345
1346             uiCount++;
1347
1348             if ( !bReplaceAll )
1349                 break;
1350         }
1351     }
1352     else // general case
1353     {
1354         const size_t uiOldLen = strOld.m_impl.length();
1355         const size_t uiNewLen = strNew.m_impl.length();
1356
1357         for ( size_t pos = 0; ; )
1358         {
1359             pos = m_impl.find(strOld.m_impl, pos);
1360             if ( pos == npos )
1361                 break;
1362
1363             // replace this occurrence of the old string with the new one
1364             m_impl.replace(pos, uiOldLen, strNew.m_impl);
1365
1366             // move up pos past the string that was replaced
1367             pos += uiNewLen;
1368
1369             // increase replace count
1370             uiCount++;
1371
1372             // stop after the first one?
1373             if ( !bReplaceAll )
1374                 break;
1375         }
1376     }
1377
1378     return uiCount;
1379 }
1380
1381 bool wxString::IsAscii() const
1382 {
1383     for ( const_iterator i = begin(); i != end(); ++i )
1384     {
1385         if ( !(*i).IsAscii() )
1386             return false;
1387     }
1388
1389     return true;
1390 }
1391
1392 bool wxString::IsWord() const
1393 {
1394     for ( const_iterator i = begin(); i != end(); ++i )
1395     {
1396         if ( !wxIsalpha(*i) )
1397             return false;
1398     }
1399
1400     return true;
1401 }
1402
1403 bool wxString::IsNumber() const
1404 {
1405     if ( empty() )
1406         return true;
1407
1408     const_iterator i = begin();
1409
1410     if ( *i == _T('-') || *i == _T('+') )
1411         ++i;
1412
1413     for ( ; i != end(); ++i )
1414     {
1415         if ( !wxIsdigit(*i) )
1416             return false;
1417     }
1418
1419     return true;
1420 }
1421
1422 wxString wxString::Strip(stripType w) const
1423 {
1424     wxString s = *this;
1425     if ( w & leading ) s.Trim(false);
1426     if ( w & trailing ) s.Trim(true);
1427     return s;
1428 }
1429
1430 // ---------------------------------------------------------------------------
1431 // case conversion
1432 // ---------------------------------------------------------------------------
1433
1434 wxString& wxString::MakeUpper()
1435 {
1436   for ( iterator it = begin(), en = end(); it != en; ++it )
1437     *it = (wxChar)wxToupper(*it);
1438
1439   return *this;
1440 }
1441
1442 wxString& wxString::MakeLower()
1443 {
1444   for ( iterator it = begin(), en = end(); it != en; ++it )
1445     *it = (wxChar)wxTolower(*it);
1446
1447   return *this;
1448 }
1449
1450 wxString& wxString::MakeCapitalized()
1451 {
1452     const iterator en = end();
1453     iterator it = begin();
1454     if ( it != en )
1455     {
1456         *it = (wxChar)wxToupper(*it);
1457         for ( ++it; it != en; ++it )
1458             *it = (wxChar)wxTolower(*it);
1459     }
1460
1461     return *this;
1462 }
1463
1464 // ---------------------------------------------------------------------------
1465 // trimming and padding
1466 // ---------------------------------------------------------------------------
1467
1468 // some compilers (VC++ 6.0 not to name them) return true for a call to
1469 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1470 // to live with this by checking that the character is a 7 bit one - even if
1471 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1472 // space-like symbols somewhere except in the first 128 chars), it is arguably
1473 // still better than trimming away accented letters
1474 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1475
1476 // trims spaces (in the sense of isspace) from left or right side
1477 wxString& wxString::Trim(bool bFromRight)
1478 {
1479     // first check if we're going to modify the string at all
1480     if ( !empty() &&
1481          (
1482           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1483           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1484          )
1485        )
1486     {
1487         if ( bFromRight )
1488         {
1489             // find last non-space character
1490             reverse_iterator psz = rbegin();
1491             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1492                 ++psz;
1493
1494             // truncate at trailing space start
1495             erase(psz.base(), end());
1496         }
1497         else
1498         {
1499             // find first non-space character
1500             iterator psz = begin();
1501             while ( (psz != end()) && wxSafeIsspace(*psz) )
1502                 ++psz;
1503
1504             // fix up data and length
1505             erase(begin(), psz);
1506         }
1507     }
1508
1509     return *this;
1510 }
1511
1512 // adds nCount characters chPad to the string from either side
1513 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1514 {
1515     wxString s(chPad, nCount);
1516
1517     if ( bFromRight )
1518         *this += s;
1519     else
1520     {
1521         s += *this;
1522         swap(s);
1523     }
1524
1525     return *this;
1526 }
1527
1528 // truncate the string
1529 wxString& wxString::Truncate(size_t uiLen)
1530 {
1531     if ( uiLen < length() )
1532     {
1533         erase(begin() + uiLen, end());
1534     }
1535     //else: nothing to do, string is already short enough
1536
1537     return *this;
1538 }
1539
1540 // ---------------------------------------------------------------------------
1541 // finding (return wxNOT_FOUND if not found and index otherwise)
1542 // ---------------------------------------------------------------------------
1543
1544 // find a character
1545 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1546 {
1547     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1548
1549     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1550 }
1551
1552 // ----------------------------------------------------------------------------
1553 // conversion to numbers
1554 // ----------------------------------------------------------------------------
1555
1556 // The implementation of all the functions below is exactly the same so factor
1557 // it out. Note that number extraction works correctly on UTF-8 strings, so
1558 // we can use wxStringCharType and wx_str() for maximum efficiency.
1559
1560 #ifndef __WXWINCE__
1561     #define DO_IF_NOT_WINCE(x) x
1562 #else
1563     #define DO_IF_NOT_WINCE(x)
1564 #endif
1565
1566 #define WX_STRING_TO_INT_TYPE(out, base, func, T)                           \
1567     wxCHECK_MSG( out, false, _T("NULL output pointer") );                   \
1568     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );  \
1569                                                                             \
1570     DO_IF_NOT_WINCE( errno = 0; )                                           \
1571                                                                             \
1572     const wxStringCharType *start = wx_str();                               \
1573     wxStringCharType *end;                                                  \
1574     T val = func(start, &end, base);                                        \
1575                                                                             \
1576     /* return true only if scan was stopped by the terminating NUL and */   \
1577     /* if the string was not empty to start with and no under/overflow */   \
1578     /* occurred: */                                                         \
1579     if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )         \
1580         return false;                                                       \
1581     *out = val;                                                             \
1582     return true
1583
1584 bool wxString::ToLong(long *pVal, int base) const
1585 {
1586     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
1587 }
1588
1589 bool wxString::ToULong(unsigned long *pVal, int base) const
1590 {
1591     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
1592 }
1593
1594 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1595 {
1596     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
1597 }
1598
1599 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1600 {
1601     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
1602 }
1603
1604 bool wxString::ToDouble(double *pVal) const
1605 {
1606     wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
1607
1608     DO_IF_NOT_WINCE( errno = 0; )
1609
1610     const wxChar *start = c_str();
1611     wxChar *end;
1612     double val = wxStrtod(start, &end);
1613
1614     // return true only if scan was stopped by the terminating NUL and if the
1615     // string was not empty to start with and no under/overflow occurred
1616     if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1617         return false;
1618
1619     *pVal = val;
1620
1621     return true;
1622 }
1623
1624 // ---------------------------------------------------------------------------
1625 // formatted output
1626 // ---------------------------------------------------------------------------
1627
1628 #if !wxUSE_UTF8_LOCALE_ONLY
1629 /* static */
1630 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1631 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1632 #else
1633 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1634 #endif
1635 {
1636     va_list argptr;
1637     va_start(argptr, format);
1638
1639     wxString s;
1640     s.PrintfV(format, argptr);
1641
1642     va_end(argptr);
1643
1644     return s;
1645 }
1646 #endif // !wxUSE_UTF8_LOCALE_ONLY
1647
1648 #if wxUSE_UNICODE_UTF8
1649 /* static */
1650 wxString wxString::DoFormatUtf8(const char *format, ...)
1651 {
1652     va_list argptr;
1653     va_start(argptr, format);
1654
1655     wxString s;
1656     s.PrintfV(format, argptr);
1657
1658     va_end(argptr);
1659
1660     return s;
1661 }
1662 #endif // wxUSE_UNICODE_UTF8
1663
1664 /* static */
1665 wxString wxString::FormatV(const wxString& format, va_list argptr)
1666 {
1667     wxString s;
1668     s.PrintfV(format, argptr);
1669     return s;
1670 }
1671
1672 #if !wxUSE_UTF8_LOCALE_ONLY
1673 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1674 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1675 #else
1676 int wxString::DoPrintfWchar(const wxChar *format, ...)
1677 #endif
1678 {
1679     va_list argptr;
1680     va_start(argptr, format);
1681
1682 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1683     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1684     // because it's the only cast that works safely for downcasting when
1685     // multiple inheritance is used:
1686     wxString *str = static_cast<wxString*>(this);
1687 #else
1688     wxString *str = this;
1689 #endif
1690
1691     int iLen = str->PrintfV(format, argptr);
1692
1693     va_end(argptr);
1694
1695     return iLen;
1696 }
1697 #endif // !wxUSE_UTF8_LOCALE_ONLY
1698
1699 #if wxUSE_UNICODE_UTF8
1700 int wxString::DoPrintfUtf8(const char *format, ...)
1701 {
1702     va_list argptr;
1703     va_start(argptr, format);
1704
1705     int iLen = PrintfV(format, argptr);
1706
1707     va_end(argptr);
1708
1709     return iLen;
1710 }
1711 #endif // wxUSE_UNICODE_UTF8
1712
1713 /*
1714     Uses wxVsnprintf and places the result into the this string.
1715
1716     In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1717     it is vswprintf.  Due to a discrepancy between vsnprintf and vswprintf in
1718     the ISO C99 (and thus SUSv3) standard the return value for the case of
1719     an undersized buffer is inconsistent.  For conforming vsnprintf
1720     implementations the function must return the number of characters that
1721     would have been printed had the buffer been large enough.  For conforming
1722     vswprintf implementations the function must return a negative number
1723     and set errno.
1724
1725     What vswprintf sets errno to is undefined but Darwin seems to set it to
1726     EOVERFLOW.  The only expected errno are EILSEQ and EINVAL.  Both of
1727     those are defined in the standard and backed up by several conformance
1728     statements.  Note that ENOMEM mentioned in the manual page does not
1729     apply to swprintf, only wprintf and fwprintf.
1730
1731     Official manual page:
1732     http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1733
1734     Some conformance statements (AIX, Solaris):
1735     http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1736     http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1737
1738     Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1739     EILSEQ and EINVAL are specifically defined to mean the error is other than
1740     an undersized buffer and no other errno are defined we treat those two
1741     as meaning hard errors and everything else gets the old behavior which
1742     is to keep looping and increasing buffer size until the function succeeds.
1743
1744     In practice it's impossible to determine before compilation which behavior
1745     may be used.  The vswprintf function may have vsnprintf-like behavior or
1746     vice-versa.  Behavior detected on one release can theoretically change
1747     with an updated release.  Not to mention that configure testing for it
1748     would require the test to be run on the host system, not the build system
1749     which makes cross compilation difficult. Therefore, we make no assumptions
1750     about behavior and try our best to handle every known case, including the
1751     case where wxVsnprintf returns a negative number and fails to set errno.
1752
1753     There is yet one more non-standard implementation and that is our own.
1754     Fortunately, that can be detected at compile-time.
1755
1756     On top of all that, ISO C99 explicitly defines snprintf to write a null
1757     character to the last position of the specified buffer.  That would be at
1758     at the given buffer size minus 1.  It is supposed to do this even if it
1759     turns out that the buffer is sized too small.
1760
1761     Darwin (tested on 10.5) follows the C99 behavior exactly.
1762
1763     Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1764     errno even when it fails.  However, it only seems to ever fail due
1765     to an undersized buffer.
1766 */
1767 #if wxUSE_UNICODE_UTF8
1768 template<typename BufferType>
1769 #else
1770 // we only need one version in non-UTF8 builds and at least two Windows
1771 // compilers have problems with this function template, so use just one
1772 // normal function here
1773 #endif
1774 static int DoStringPrintfV(wxString& str,
1775                            const wxString& format, va_list argptr)
1776 {
1777     int size = 1024;
1778
1779     for ( ;; )
1780     {
1781 #if wxUSE_UNICODE_UTF8
1782         BufferType tmp(str, size + 1);
1783         typename BufferType::CharType *buf = tmp;
1784 #else
1785         wxStringBuffer tmp(str, size + 1);
1786         wxChar *buf = tmp;
1787 #endif
1788
1789         if ( !buf )
1790         {
1791             // out of memory
1792
1793             // in UTF-8 build, leaving uninitialized junk in the buffer
1794             // could result in invalid non-empty UTF-8 string, so just
1795             // reset the string to empty on failure:
1796             buf[0] = '\0';
1797             return -1;
1798         }
1799
1800         // wxVsnprintf() may modify the original arg pointer, so pass it
1801         // only a copy
1802         va_list argptrcopy;
1803         wxVaCopy(argptrcopy, argptr);
1804
1805 #ifndef __WXWINCE__
1806         // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1807         errno = 0;
1808 #endif
1809         int len = wxVsnprintf(buf, size, format, argptrcopy);
1810         va_end(argptrcopy);
1811
1812         // some implementations of vsnprintf() don't NUL terminate
1813         // the string if there is not enough space for it so
1814         // always do it manually
1815         // FIXME: This really seems to be the wrong and would be an off-by-one
1816         // bug except the code above allocates an extra character.
1817         buf[size] = _T('\0');
1818
1819         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1820         // total number of characters which would have been written if the
1821         // buffer were large enough (newer standards such as Unix98)
1822         if ( len < 0 )
1823         {
1824             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1825             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1826             //     is true if *both* of them use our own implementation,
1827             //     otherwise we can't be sure
1828 #if wxUSE_WXVSNPRINTF
1829             // we know that our own implementation of wxVsnprintf() returns -1
1830             // only for a format error - thus there's something wrong with
1831             // the user's format string
1832             buf[0] = '\0';
1833             return -1;
1834 #else // possibly using system version
1835             // assume it only returns error if there is not enough space, but
1836             // as we don't know how much we need, double the current size of
1837             // the buffer
1838 #ifndef __WXWINCE__
1839             if( (errno == EILSEQ) || (errno == EINVAL) )
1840             // If errno was set to one of the two well-known hard errors
1841             // then fail immediately to avoid an infinite loop.
1842                 return -1;
1843             else
1844 #endif // __WXWINCE__
1845             // still not enough, as we don't know how much we need, double the
1846             // current size of the buffer
1847                 size *= 2;
1848 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1849         }
1850         else if ( len >= size )
1851         {
1852 #if wxUSE_WXVSNPRINTF
1853             // we know that our own implementation of wxVsnprintf() returns
1854             // size+1 when there's not enough space but that's not the size
1855             // of the required buffer!
1856             size *= 2;      // so we just double the current size of the buffer
1857 #else
1858             // some vsnprintf() implementations NUL-terminate the buffer and
1859             // some don't in len == size case, to be safe always add 1
1860             // FIXME: I don't quite understand this comment.  The vsnprintf
1861             // function is specifically defined to return the number of
1862             // characters printed not including the null terminator.
1863             // So OF COURSE you need to add 1 to get the right buffer size.
1864             // The following line is definitely correct, no question.
1865             size = len + 1;
1866 #endif
1867         }
1868         else // ok, there was enough space
1869         {
1870             break;
1871         }
1872     }
1873
1874     // we could have overshot
1875     str.Shrink();
1876
1877     return str.length();
1878 }
1879
1880 int wxString::PrintfV(const wxString& format, va_list argptr)
1881 {
1882 #if wxUSE_UNICODE_UTF8
1883     #if wxUSE_STL_BASED_WXSTRING
1884         typedef wxStringTypeBuffer<char> Utf8Buffer;
1885     #else
1886         typedef wxStringInternalBuffer Utf8Buffer;
1887     #endif
1888 #endif
1889
1890 #if wxUSE_UTF8_LOCALE_ONLY
1891     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1892 #else
1893     #if wxUSE_UNICODE_UTF8
1894     if ( wxLocaleIsUtf8 )
1895         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1896     else
1897         // wxChar* version
1898         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1899     #else
1900         return DoStringPrintfV(*this, format, argptr);
1901     #endif // UTF8/WCHAR
1902 #endif
1903 }
1904
1905 // ----------------------------------------------------------------------------
1906 // misc other operations
1907 // ----------------------------------------------------------------------------
1908
1909 // returns true if the string matches the pattern which may contain '*' and
1910 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1911 // of them)
1912 bool wxString::Matches(const wxString& mask) const
1913 {
1914     // I disable this code as it doesn't seem to be faster (in fact, it seems
1915     // to be much slower) than the old, hand-written code below and using it
1916     // here requires always linking with libregex even if the user code doesn't
1917     // use it
1918 #if 0 // wxUSE_REGEX
1919     // first translate the shell-like mask into a regex
1920     wxString pattern;
1921     pattern.reserve(wxStrlen(pszMask));
1922
1923     pattern += _T('^');
1924     while ( *pszMask )
1925     {
1926         switch ( *pszMask )
1927         {
1928             case _T('?'):
1929                 pattern += _T('.');
1930                 break;
1931
1932             case _T('*'):
1933                 pattern += _T(".*");
1934                 break;
1935
1936             case _T('^'):
1937             case _T('.'):
1938             case _T('$'):
1939             case _T('('):
1940             case _T(')'):
1941             case _T('|'):
1942             case _T('+'):
1943             case _T('\\'):
1944                 // these characters are special in a RE, quote them
1945                 // (however note that we don't quote '[' and ']' to allow
1946                 // using them for Unix shell like matching)
1947                 pattern += _T('\\');
1948                 // fall through
1949
1950             default:
1951                 pattern += *pszMask;
1952         }
1953
1954         pszMask++;
1955     }
1956     pattern += _T('$');
1957
1958     // and now use it
1959     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1960 #else // !wxUSE_REGEX
1961   // TODO: this is, of course, awfully inefficient...
1962
1963   // FIXME-UTF8: implement using iterators, remove #if
1964 #if wxUSE_UNICODE_UTF8
1965   wxWCharBuffer maskBuf = mask.wc_str();
1966   wxWCharBuffer txtBuf = wc_str();
1967   const wxChar *pszMask = maskBuf.data();
1968   const wxChar *pszTxt = txtBuf.data();
1969 #else
1970   const wxChar *pszMask = mask.wx_str();
1971   // the char currently being checked
1972   const wxChar *pszTxt = wx_str();
1973 #endif
1974
1975   // the last location where '*' matched
1976   const wxChar *pszLastStarInText = NULL;
1977   const wxChar *pszLastStarInMask = NULL;
1978
1979 match:
1980   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1981     switch ( *pszMask ) {
1982       case wxT('?'):
1983         if ( *pszTxt == wxT('\0') )
1984           return false;
1985
1986         // pszTxt and pszMask will be incremented in the loop statement
1987
1988         break;
1989
1990       case wxT('*'):
1991         {
1992           // remember where we started to be able to backtrack later
1993           pszLastStarInText = pszTxt;
1994           pszLastStarInMask = pszMask;
1995
1996           // ignore special chars immediately following this one
1997           // (should this be an error?)
1998           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1999             pszMask++;
2000
2001           // if there is nothing more, match
2002           if ( *pszMask == wxT('\0') )
2003             return true;
2004
2005           // are there any other metacharacters in the mask?
2006           size_t uiLenMask;
2007           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2008
2009           if ( pEndMask != NULL ) {
2010             // we have to match the string between two metachars
2011             uiLenMask = pEndMask - pszMask;
2012           }
2013           else {
2014             // we have to match the remainder of the string
2015             uiLenMask = wxStrlen(pszMask);
2016           }
2017
2018           wxString strToMatch(pszMask, uiLenMask);
2019           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2020           if ( pMatch == NULL )
2021             return false;
2022
2023           // -1 to compensate "++" in the loop
2024           pszTxt = pMatch + uiLenMask - 1;
2025           pszMask += uiLenMask - 1;
2026         }
2027         break;
2028
2029       default:
2030         if ( *pszMask != *pszTxt )
2031           return false;
2032         break;
2033     }
2034   }
2035
2036   // match only if nothing left
2037   if ( *pszTxt == wxT('\0') )
2038     return true;
2039
2040   // if we failed to match, backtrack if we can
2041   if ( pszLastStarInText ) {
2042     pszTxt = pszLastStarInText + 1;
2043     pszMask = pszLastStarInMask;
2044
2045     pszLastStarInText = NULL;
2046
2047     // don't bother resetting pszLastStarInMask, it's unnecessary
2048
2049     goto match;
2050   }
2051
2052   return false;
2053 #endif // wxUSE_REGEX/!wxUSE_REGEX
2054 }
2055
2056 // Count the number of chars
2057 int wxString::Freq(wxUniChar ch) const
2058 {
2059     int count = 0;
2060     for ( const_iterator i = begin(); i != end(); ++i )
2061     {
2062         if ( *i == ch )
2063             count ++;
2064     }
2065     return count;
2066 }
2067
2068 // ----------------------------------------------------------------------------
2069 // wxUTF8StringBuffer
2070 // ----------------------------------------------------------------------------
2071
2072 #if wxUSE_UNICODE_WCHAR
2073 wxUTF8StringBuffer::~wxUTF8StringBuffer()
2074 {
2075     wxMBConvStrictUTF8 conv;
2076     size_t wlen = conv.ToWChar(NULL, 0, m_buf);
2077     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2078
2079     wxStringInternalBuffer wbuf(m_str, wlen);
2080     conv.ToWChar(wbuf, wlen, m_buf);
2081 }
2082
2083 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2084 {
2085     wxCHECK_RET(m_lenSet, "length not set");
2086
2087     wxMBConvStrictUTF8 conv;
2088     size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
2089     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2090
2091     wxStringInternalBufferLength wbuf(m_str, wlen);
2092     conv.ToWChar(wbuf, wlen, m_buf, m_len);
2093     wbuf.SetLength(wlen);
2094 }
2095 #endif // wxUSE_UNICODE_WCHAR
2096
2097 // ----------------------------------------------------------------------------
2098 // wxCharBufferType<T>
2099 // ----------------------------------------------------------------------------
2100
2101 template<>
2102 wxCharTypeBuffer<char>::Data
2103 wxCharTypeBuffer<char>::NullData(NULL);
2104
2105 template<>
2106 wxCharTypeBuffer<wchar_t>::Data
2107 wxCharTypeBuffer<wchar_t>::NullData(NULL);