src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #include "wx/hashmap.h"
  39
  40 // string handling functions used by wxString:
  41 #if wxUSE_UNICODE_UTF8
  42     #define wxStringMemcpy   memcpy
  43     #define wxStringMemcmp   memcmp
  44     #define wxStringMemchr   memchr
  45     #define wxStringStrlen   strlen
  46 #else
  47     #define wxStringMemcpy   wxTmemcpy
  48     #define wxStringMemcmp   wxTmemcmp
  49     #define wxStringMemchr   wxTmemchr
  50     #define wxStringStrlen   wxStrlen
  51 #endif
  52
  53
  54 // ---------------------------------------------------------------------------
  55 // static class variables definition
  56 // ---------------------------------------------------------------------------
  57
  58 //According to STL _must_ be a -1 size_t
  59 const size_t wxString::npos = (size_t) -1;
  60
  61 #if wxUSE_UNICODE_UTF8
  62 wxString::PosToImplCache wxString::ms_cache;
  63 #endif // wxUSE_UNICODE_UTF8
  64
  65 // ----------------------------------------------------------------------------
  66 // global functions
  67 // ----------------------------------------------------------------------------
  68
  69 #if wxUSE_STD_IOSTREAM
  70
  71 #include <iostream>
  72
  73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
  74 {
  75 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
  76     return os << (const char *)str.AsCharBuf();
  77 #else
  78     return os << str.AsInternal();
  79 #endif
  80 }
  81
  82 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
  83 {
  84     return os << str.c_str();
  85 }
  86
  87 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
  88 {
  89     return os << str.data();
  90 }
  91
  92 #ifndef __BORLANDC__
  93 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
  94 {
  95     return os << str.data();
  96 }
  97 #endif
  98
  99 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 100
 101 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
 102 {
 103     return wos << str.wc_str();
 104 }
 105
 106 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
 107 {
 108     return wos << str.AsWChar();
 109 }
 110
 111 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
 112 {
 113     return wos << str.data();
 114 }
 115
 116 #endif  // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 117
 118 #endif // wxUSE_STD_IOSTREAM
 119
 120 // ===========================================================================
 121 // wxString class core
 122 // ===========================================================================
 123
 124 #if wxUSE_UNICODE_UTF8
 125
 126 void wxString::PosLenToImpl(size_t pos, size_t len,
 127                             size_t *implPos, size_t *implLen) const
 128 {
 129     if ( pos == npos )
 130         *implPos = npos;
 131     else
 132     {
 133         const_iterator i = begin() + pos;
 134         *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
 135         if ( len == npos )
 136             *implLen = npos;
 137         else
 138         {
 139             // too large length is interpreted as "to the end of the string"
 140             // FIXME-UTF8: verify this is the case in std::string, assert
 141             // otherwise
 142             if ( pos + len > length() )
 143                 len = length() - pos;
 144
 145             *implLen = (i + len).impl() - i.impl();
 146         }
 147     }
 148 }
 149
 150 #endif // wxUSE_UNICODE_UTF8
 151
 152 // ----------------------------------------------------------------------------
 153 // wxCStrData converted strings caching
 154 // ----------------------------------------------------------------------------
 155
 156 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 157 //             string objects; re-enable after fixing this bug and benchmarking
 158 //             performance to see if using a hash is a good idea at all
 159 #if 0
 160
 161 // For backward compatibility reasons, it must be possible to assign the value
 162 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 163 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 164 // because the memory would be freed immediately, but it has to be valid as long
 165 // as the string is not modified, so that code like this still works:
 166 //
 167 // const wxChar *s = str.c_str();
 168 // while ( s ) { ... }
 169
 170 // FIXME-UTF8: not thread safe!
 171 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 172 //             destroyed, but we should do it when the string is modified, to
 173 //             keep memory usage down
 174 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 175 //             invalidated the cache on every change, we could keep the previous
 176 //             conversion
 177 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 178 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 179
 180 template<typename T>
 181 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 182 {
 183     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 184     if ( i != hash.end() )
 185     {
 186         free(i->second);
 187         hash.erase(i);
 188     }
 189 }
 190
 191 #if wxUSE_UNICODE
 192 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 193 //     so we have to use wxString* here and const-cast when used
 194 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 195                     wxStringCharConversionCache);
 196 static wxStringCharConversionCache gs_stringsCharCache;
 197
 198 const char* wxCStrData::AsChar() const
 199 {
 200     // remove previously cache value, if any (see FIXMEs above):
 201     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 202
 203     // convert the string and keep it:
 204     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 205         m_str->mb_str().release();
 206
 207     return s + m_offset;
 208 }
 209 #endif // wxUSE_UNICODE
 210
 211 #if !wxUSE_UNICODE_WCHAR
 212 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 213                     wxStringWCharConversionCache);
 214 static wxStringWCharConversionCache gs_stringsWCharCache;
 215
 216 const wchar_t* wxCStrData::AsWChar() const
 217 {
 218     // remove previously cache value, if any (see FIXMEs above):
 219     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 220
 221     // convert the string and keep it:
 222     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 223         m_str->wc_str().release();
 224
 225     return s + m_offset;
 226 }
 227 #endif // !wxUSE_UNICODE_WCHAR
 228
 229 wxString::~wxString()
 230 {
 231 #if wxUSE_UNICODE
 232     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 233     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 234 #endif
 235 #if !wxUSE_UNICODE_WCHAR
 236     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 237 #endif
 238 }
 239 #endif
 240
 241 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 242 const char* wxCStrData::AsChar() const
 243 {
 244 #if wxUSE_UNICODE_UTF8
 245     if ( wxLocaleIsUtf8 )
 246         return AsInternal();
 247 #endif
 248     // under non-UTF8 locales, we have to convert the internal UTF-8
 249     // representation using wxConvLibc and cache the result
 250
 251     wxString *str = wxConstCast(m_str, wxString);
 252
 253     // convert the string:
 254     //
 255     // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
 256     //             have it) but it's unfortunately not obvious to implement
 257     //             because we don't know how big buffer do we need for the
 258     //             given string length (in case of multibyte encodings, e.g.
 259     //             ISO-2022-JP or UTF-8 when internal representation is wchar_t)
 260     //
 261     //             One idea would be to store more than just m_convertedToChar
 262     //             in wxString: then we could record the length of the string
 263     //             which was converted the last time and try to reuse the same
 264     //             buffer if the current length is not greater than it (this
 265     //             could still fail because string could have been modified in
 266     //             place but it would work most of the time, so we'd do it and
 267     //             only allocate the new buffer if in-place conversion returned
 268     //             an error). We could also store a bit saying if the string
 269     //             was modified since the last conversion (and update it in all
 270     //             operation modifying the string, of course) to avoid unneeded
 271     //             consequential conversions. But both of these ideas require
 272     //             adding more fields to wxString and require profiling results
 273     //             to be sure that we really gain enough from them to justify
 274     //             doing it.
 275     wxCharBuffer buf(str->mb_str());
 276
 277     // if it failed, return empty string and not NULL to avoid crashes in code
 278     // written with either wxWidgets 2 wxString or std::string behaviour in
 279     // mind: neither of them ever returns NULL and so we shouldn't neither
 280     if ( !buf )
 281         return "";
 282
 283     if ( str->m_convertedToChar &&
 284          strlen(buf) == strlen(str->m_convertedToChar) )
 285     {
 286         // keep the same buffer for as long as possible, so that several calls
 287         // to c_str() in a row still work:
 288         strcpy(str->m_convertedToChar, buf);
 289     }
 290     else
 291     {
 292         str->m_convertedToChar = buf.release();
 293     }
 294
 295     // and keep it:
 296     return str->m_convertedToChar + m_offset;
 297 }
 298 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 299
 300 #if !wxUSE_UNICODE_WCHAR
 301 const wchar_t* wxCStrData::AsWChar() const
 302 {
 303     wxString *str = wxConstCast(m_str, wxString);
 304
 305     // convert the string:
 306     wxWCharBuffer buf(str->wc_str());
 307
 308     // notice that here, unlike above in AsChar(), conversion can't fail as our
 309     // internal UTF-8 is always well-formed -- or the string was corrupted and
 310     // all bets are off anyhow
 311
 312     // FIXME-UTF8: do the conversion in-place in the existing buffer
 313     if ( str->m_convertedToWChar &&
 314          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 315     {
 316         // keep the same buffer for as long as possible, so that several calls
 317         // to c_str() in a row still work:
 318         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 319     }
 320     else
 321     {
 322         str->m_convertedToWChar = buf.release();
 323     }
 324
 325     // and keep it:
 326     return str->m_convertedToWChar + m_offset;
 327 }
 328 #endif // !wxUSE_UNICODE_WCHAR
 329
 330 // ===========================================================================
 331 // wxString class core
 332 // ===========================================================================
 333
 334 // ---------------------------------------------------------------------------
 335 // construction and conversion
 336 // ---------------------------------------------------------------------------
 337
 338 #if wxUSE_UNICODE_WCHAR
 339 /* static */
 340 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 341                                                const wxMBConv& conv)
 342 {
 343     // anything to do?
 344     if ( !psz || nLength == 0 )
 345         return SubstrBufFromMB(L"", 0);
 346
 347     if ( nLength == npos )
 348         nLength = wxNO_LEN;
 349
 350     size_t wcLen;
 351     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 352     if ( !wcLen )
 353         return SubstrBufFromMB(_T(""), 0);
 354     else
 355         return SubstrBufFromMB(wcBuf, wcLen);
 356 }
 357 #endif // wxUSE_UNICODE_WCHAR
 358
 359 #if wxUSE_UNICODE_UTF8
 360 /* static */
 361 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 362                                                const wxMBConv& conv)
 363 {
 364     // anything to do?
 365     if ( !psz || nLength == 0 )
 366         return SubstrBufFromMB("", 0);
 367
 368     // if psz is already in UTF-8, we don't have to do the roundtrip to
 369     // wchar_t* and back:
 370     if ( conv.IsUTF8() )
 371     {
 372         // we need to validate the input because UTF8 iterators assume valid
 373         // UTF-8 sequence and psz may be invalid:
 374         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 375         {
 376             // we must pass the real string length to SubstrBufFromMB ctor
 377             if ( nLength == npos )
 378                 nLength = psz ? strlen(psz) : 0;
 379             return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
 380         }
 381         // else: do the roundtrip through wchar_t*
 382     }
 383
 384     if ( nLength == npos )
 385         nLength = wxNO_LEN;
 386
 387     // first convert to wide string:
 388     size_t wcLen;
 389     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 390     if ( !wcLen )
 391         return SubstrBufFromMB("", 0);
 392
 393     // and then to UTF-8:
 394     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
 395     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 396     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 397
 398     return buf;
 399 }
 400 #endif // wxUSE_UNICODE_UTF8
 401
 402 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 403 /* static */
 404 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 405                                                const wxMBConv& conv)
 406 {
 407     // anything to do?
 408     if ( !pwz || nLength == 0 )
 409         return SubstrBufFromWC("", 0);
 410
 411     if ( nLength == npos )
 412         nLength = wxNO_LEN;
 413
 414     size_t mbLen;
 415     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 416     if ( !mbLen )
 417         return SubstrBufFromWC("", 0);
 418     else
 419         return SubstrBufFromWC(mbBuf, mbLen);
 420 }
 421 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 422
 423
 424 #if wxUSE_UNICODE_WCHAR
 425
 426 //Convert wxString in Unicode mode to a multi-byte string
 427 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 428 {
 429     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 430 }
 431
 432 #elif wxUSE_UNICODE_UTF8
 433
 434 const wxWCharBuffer wxString::wc_str() const
 435 {
 436     return wxMBConvStrictUTF8().cMB2WC
 437                                 (
 438                                     m_impl.c_str(),
 439                                     m_impl.length() + 1, // size, not length
 440                                     NULL
 441                                 );
 442 }
 443
 444 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 445 {
 446     if ( conv.IsUTF8() )
 447         return wxCharBuffer::CreateNonOwned(m_impl.c_str());
 448
 449     // FIXME-UTF8: use wc_str() here once we have buffers with length
 450
 451     size_t wcLen;
 452     wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
 453                                              (
 454                                                 m_impl.c_str(),
 455                                                 m_impl.length() + 1, // size
 456                                                 &wcLen
 457                                              ));
 458     if ( !wcLen )
 459         return wxCharBuffer("");
 460
 461     return conv.cWC2MB(wcBuf, wcLen+1, NULL);
 462 }
 463
 464 #else // ANSI
 465
 466 //Converts this string to a wide character string if unicode
 467 //mode is not enabled and wxUSE_WCHAR_T is enabled
 468 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 469 {
 470     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 471 }
 472
 473 #endif // Unicode/ANSI
 474
 475 // shrink to minimal size (releasing extra memory)
 476 bool wxString::Shrink()
 477 {
 478   wxString tmp(begin(), end());
 479   swap(tmp);
 480   return tmp.length() == length();
 481 }
 482
 483 // deprecated compatibility code:
 484 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 485 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 486 {
 487     return DoGetWriteBuf(nLen);
 488 }
 489
 490 void wxString::UngetWriteBuf()
 491 {
 492     DoUngetWriteBuf();
 493 }
 494
 495 void wxString::UngetWriteBuf(size_t nLen)
 496 {
 497     DoUngetWriteBuf(nLen);
 498 }
 499 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 500
 501
 502 // ---------------------------------------------------------------------------
 503 // data access
 504 // ---------------------------------------------------------------------------
 505
 506 // all functions are inline in string.h
 507
 508 // ---------------------------------------------------------------------------
 509 // concatenation operators
 510 // ---------------------------------------------------------------------------
 511
 512 /*
 513  * concatenation functions come in 5 flavours:
 514  *  string + string
 515  *  char   + string      and      string + char
 516  *  C str  + string      and      string + C str
 517  */
 518
 519 wxString operator+(const wxString& str1, const wxString& str2)
 520 {
 521 #if !wxUSE_STL_BASED_WXSTRING
 522     wxASSERT( str1.IsValid() );
 523     wxASSERT( str2.IsValid() );
 524 #endif
 525
 526     wxString s = str1;
 527     s += str2;
 528
 529     return s;
 530 }
 531
 532 wxString operator+(const wxString& str, wxUniChar ch)
 533 {
 534 #if !wxUSE_STL_BASED_WXSTRING
 535     wxASSERT( str.IsValid() );
 536 #endif
 537
 538     wxString s = str;
 539     s += ch;
 540
 541     return s;
 542 }
 543
 544 wxString operator+(wxUniChar ch, const wxString& str)
 545 {
 546 #if !wxUSE_STL_BASED_WXSTRING
 547     wxASSERT( str.IsValid() );
 548 #endif
 549
 550     wxString s = ch;
 551     s += str;
 552
 553     return s;
 554 }
 555
 556 wxString operator+(const wxString& str, const char *psz)
 557 {
 558 #if !wxUSE_STL_BASED_WXSTRING
 559     wxASSERT( str.IsValid() );
 560 #endif
 561
 562     wxString s;
 563     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 564         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 565     }
 566     s += str;
 567     s += psz;
 568
 569     return s;
 570 }
 571
 572 wxString operator+(const wxString& str, const wchar_t *pwz)
 573 {
 574 #if !wxUSE_STL_BASED_WXSTRING
 575     wxASSERT( str.IsValid() );
 576 #endif
 577
 578     wxString s;
 579     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 580         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 581     }
 582     s += str;
 583     s += pwz;
 584
 585     return s;
 586 }
 587
 588 wxString operator+(const char *psz, const wxString& str)
 589 {
 590 #if !wxUSE_STL_BASED_WXSTRING
 591     wxASSERT( str.IsValid() );
 592 #endif
 593
 594     wxString s;
 595     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 596         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 597     }
 598     s = psz;
 599     s += str;
 600
 601     return s;
 602 }
 603
 604 wxString operator+(const wchar_t *pwz, const wxString& str)
 605 {
 606 #if !wxUSE_STL_BASED_WXSTRING
 607     wxASSERT( str.IsValid() );
 608 #endif
 609
 610     wxString s;
 611     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 612         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 613     }
 614     s = pwz;
 615     s += str;
 616
 617     return s;
 618 }
 619
 620 // ---------------------------------------------------------------------------
 621 // string comparison
 622 // ---------------------------------------------------------------------------
 623
 624 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 625 {
 626     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 627                                : wxToupper(GetChar(0u)) == wxToupper(c));
 628 }
 629
 630 #ifdef HAVE_STD_STRING_COMPARE
 631
 632 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 633 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 634 //     sort strings in characters code point order by sorting the byte sequence
 635 //     in byte values order (i.e. what strcmp() and memcmp() do).
 636
 637 int wxString::compare(const wxString& str) const
 638 {
 639     return m_impl.compare(str.m_impl);
 640 }
 641
 642 int wxString::compare(size_t nStart, size_t nLen,
 643                       const wxString& str) const
 644 {
 645     size_t pos, len;
 646     PosLenToImpl(nStart, nLen, &pos, &len);
 647     return m_impl.compare(pos, len, str.m_impl);
 648 }
 649
 650 int wxString::compare(size_t nStart, size_t nLen,
 651                       const wxString& str,
 652                       size_t nStart2, size_t nLen2) const
 653 {
 654     size_t pos, len;
 655     PosLenToImpl(nStart, nLen, &pos, &len);
 656
 657     size_t pos2, len2;
 658     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 659
 660     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 661 }
 662
 663 int wxString::compare(const char* sz) const
 664 {
 665     return m_impl.compare(ImplStr(sz));
 666 }
 667
 668 int wxString::compare(const wchar_t* sz) const
 669 {
 670     return m_impl.compare(ImplStr(sz));
 671 }
 672
 673 int wxString::compare(size_t nStart, size_t nLen,
 674                       const char* sz, size_t nCount) const
 675 {
 676     size_t pos, len;
 677     PosLenToImpl(nStart, nLen, &pos, &len);
 678
 679     SubstrBufFromMB str(ImplStr(sz, nCount));
 680
 681     return m_impl.compare(pos, len, str.data, str.len);
 682 }
 683
 684 int wxString::compare(size_t nStart, size_t nLen,
 685                       const wchar_t* sz, size_t nCount) const
 686 {
 687     size_t pos, len;
 688     PosLenToImpl(nStart, nLen, &pos, &len);
 689
 690     SubstrBufFromWC str(ImplStr(sz, nCount));
 691
 692     return m_impl.compare(pos, len, str.data, str.len);
 693 }
 694
 695 #else // !HAVE_STD_STRING_COMPARE
 696
 697 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 698                           const wxStringCharType* s2, size_t l2)
 699 {
 700     if( l1 == l2 )
 701         return wxStringMemcmp(s1, s2, l1);
 702     else if( l1 < l2 )
 703     {
 704         int ret = wxStringMemcmp(s1, s2, l1);
 705         return ret == 0 ? -1 : ret;
 706     }
 707     else
 708     {
 709         int ret = wxStringMemcmp(s1, s2, l2);
 710         return ret == 0 ? +1 : ret;
 711     }
 712 }
 713
 714 int wxString::compare(const wxString& str) const
 715 {
 716     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 717                      str.m_impl.data(), str.m_impl.length());
 718 }
 719
 720 int wxString::compare(size_t nStart, size_t nLen,
 721                       const wxString& str) const
 722 {
 723     wxASSERT(nStart <= length());
 724     size_type strLen = length() - nStart;
 725     nLen = strLen < nLen ? strLen : nLen;
 726
 727     size_t pos, len;
 728     PosLenToImpl(nStart, nLen, &pos, &len);
 729
 730     return ::wxDoCmp(m_impl.data() + pos,  len,
 731                      str.m_impl.data(), str.m_impl.length());
 732 }
 733
 734 int wxString::compare(size_t nStart, size_t nLen,
 735                       const wxString& str,
 736                       size_t nStart2, size_t nLen2) const
 737 {
 738     wxASSERT(nStart <= length());
 739     wxASSERT(nStart2 <= str.length());
 740     size_type strLen  =     length() - nStart,
 741               strLen2 = str.length() - nStart2;
 742     nLen  = strLen  < nLen  ? strLen  : nLen;
 743     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 744
 745     size_t pos, len;
 746     PosLenToImpl(nStart, nLen, &pos, &len);
 747     size_t pos2, len2;
 748     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 749
 750     return ::wxDoCmp(m_impl.data() + pos, len,
 751                      str.m_impl.data() + pos2, len2);
 752 }
 753
 754 int wxString::compare(const char* sz) const
 755 {
 756     SubstrBufFromMB str(ImplStr(sz, npos));
 757     if ( str.len == npos )
 758         str.len = wxStringStrlen(str.data);
 759     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 760 }
 761
 762 int wxString::compare(const wchar_t* sz) const
 763 {
 764     SubstrBufFromWC str(ImplStr(sz, npos));
 765     if ( str.len == npos )
 766         str.len = wxStringStrlen(str.data);
 767     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 768 }
 769
 770 int wxString::compare(size_t nStart, size_t nLen,
 771                       const char* sz, size_t nCount) const
 772 {
 773     wxASSERT(nStart <= length());
 774     size_type strLen = length() - nStart;
 775     nLen = strLen < nLen ? strLen : nLen;
 776
 777     size_t pos, len;
 778     PosLenToImpl(nStart, nLen, &pos, &len);
 779
 780     SubstrBufFromMB str(ImplStr(sz, nCount));
 781     if ( str.len == npos )
 782         str.len = wxStringStrlen(str.data);
 783
 784     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 785 }
 786
 787 int wxString::compare(size_t nStart, size_t nLen,
 788                       const wchar_t* sz, size_t nCount) const
 789 {
 790     wxASSERT(nStart <= length());
 791     size_type strLen = length() - nStart;
 792     nLen = strLen < nLen ? strLen : nLen;
 793
 794     size_t pos, len;
 795     PosLenToImpl(nStart, nLen, &pos, &len);
 796
 797     SubstrBufFromWC str(ImplStr(sz, nCount));
 798     if ( str.len == npos )
 799         str.len = wxStringStrlen(str.data);
 800
 801     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 802 }
 803
 804 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 805
 806
 807 // ---------------------------------------------------------------------------
 808 // find_{first,last}_[not]_of functions
 809 // ---------------------------------------------------------------------------
 810
 811 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 812
 813 // NB: All these functions are implemented  with the argument being wxChar*,
 814 //     i.e. widechar string in any Unicode build, even though native string
 815 //     representation is char* in the UTF-8 build. This is because we couldn't
 816 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 817
 818 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 819 {
 820     return find_first_of(sz, nStart, wxStrlen(sz));
 821 }
 822
 823 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 824 {
 825     return find_first_not_of(sz, nStart, wxStrlen(sz));
 826 }
 827
 828 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 829 {
 830     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 831
 832     size_t idx = nStart;
 833     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 834     {
 835         if ( wxTmemchr(sz, *i, n) )
 836             return idx;
 837     }
 838
 839     return npos;
 840 }
 841
 842 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 843 {
 844     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 845
 846     size_t idx = nStart;
 847     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 848     {
 849         if ( !wxTmemchr(sz, *i, n) )
 850             return idx;
 851     }
 852
 853     return npos;
 854 }
 855
 856
 857 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 858 {
 859     return find_last_of(sz, nStart, wxStrlen(sz));
 860 }
 861
 862 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 863 {
 864     return find_last_not_of(sz, nStart, wxStrlen(sz));
 865 }
 866
 867 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 868 {
 869     size_t len = length();
 870
 871     if ( nStart == npos )
 872     {
 873         nStart = len - 1;
 874     }
 875     else
 876     {
 877         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 878     }
 879
 880     size_t idx = nStart;
 881     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 882           i != rend(); --idx, ++i )
 883     {
 884         if ( wxTmemchr(sz, *i, n) )
 885             return idx;
 886     }
 887
 888     return npos;
 889 }
 890
 891 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 892 {
 893     size_t len = length();
 894
 895     if ( nStart == npos )
 896     {
 897         nStart = len - 1;
 898     }
 899     else
 900     {
 901         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 902     }
 903
 904     size_t idx = nStart;
 905     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 906           i != rend(); --idx, ++i )
 907     {
 908         if ( !wxTmemchr(sz, *i, n) )
 909             return idx;
 910     }
 911
 912     return npos;
 913 }
 914
 915 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
 916 {
 917     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 918
 919     size_t idx = nStart;
 920     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 921     {
 922         if ( *i != ch )
 923             return idx;
 924     }
 925
 926     return npos;
 927 }
 928
 929 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
 930 {
 931     size_t len = length();
 932
 933     if ( nStart == npos )
 934     {
 935         nStart = len - 1;
 936     }
 937     else
 938     {
 939         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 940     }
 941
 942     size_t idx = nStart;
 943     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 944           i != rend(); --idx, ++i )
 945     {
 946         if ( *i != ch )
 947             return idx;
 948     }
 949
 950     return npos;
 951 }
 952
 953 // the functions above were implemented for wchar_t* arguments in Unicode
 954 // build and char* in ANSI build; below are implementations for the other
 955 // version:
 956 #if wxUSE_UNICODE
 957     #define wxOtherCharType char
 958     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
 959 #else
 960     #define wxOtherCharType wchar_t
 961     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
 962 #endif
 963
 964 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
 965     { return find_first_of(STRCONV(sz), nStart); }
 966
 967 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
 968                                size_t n) const
 969     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
 970 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
 971     { return find_last_of(STRCONV(sz), nStart); }
 972 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
 973                               size_t n) const
 974     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
 975 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
 976     { return find_first_not_of(STRCONV(sz), nStart); }
 977 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
 978                                    size_t n) const
 979     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
 980 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
 981     { return find_last_not_of(STRCONV(sz), nStart); }
 982 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
 983                                   size_t n) const
 984     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
 985
 986 #undef wxOtherCharType
 987 #undef STRCONV
 988
 989 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 990
 991 // ===========================================================================
 992 // other common string functions
 993 // ===========================================================================
 994
 995 int wxString::CmpNoCase(const wxString& s) const
 996 {
 997     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
 998
 999     const_iterator i1 = begin();
1000     const_iterator end1 = end();
1001     const_iterator i2 = s.begin();
1002     const_iterator end2 = s.end();
1003
1004     for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1005     {
1006         wxUniChar lower1 = (wxChar)wxTolower(*i1);
1007         wxUniChar lower2 = (wxChar)wxTolower(*i2);
1008         if ( lower1 != lower2 )
1009             return lower1 < lower2 ? -1 : 1;
1010     }
1011
1012     size_t len1 = length();
1013     size_t len2 = s.length();
1014
1015     if ( len1 < len2 )
1016         return -1;
1017     else if ( len1 > len2 )
1018         return 1;
1019     return 0;
1020 }
1021
1022
1023 #if wxUSE_UNICODE
1024
1025 #ifdef __MWERKS__
1026 #ifndef __SCHAR_MAX__
1027 #define __SCHAR_MAX__ 127
1028 #endif
1029 #endif
1030
1031 wxString wxString::FromAscii(const char *ascii, size_t len)
1032 {
1033     if (!ascii || len == 0)
1034        return wxEmptyString;
1035
1036     wxString res;
1037
1038     {
1039         wxStringInternalBuffer buf(res, len);
1040         wxStringCharType *dest = buf;
1041
1042         for ( ; len > 0; --len )
1043         {
1044             unsigned char c = (unsigned char)*ascii++;
1045             wxASSERT_MSG( c < 0x80,
1046                           _T("Non-ASCII value passed to FromAscii().") );
1047
1048             *dest++ = (wchar_t)c;
1049         }
1050     }
1051
1052     return res;
1053 }
1054
1055 wxString wxString::FromAscii(const char *ascii)
1056 {
1057     return FromAscii(ascii, wxStrlen(ascii));
1058 }
1059
1060 wxString wxString::FromAscii(char ascii)
1061 {
1062     // What do we do with '\0' ?
1063
1064     unsigned char c = (unsigned char)ascii;
1065
1066     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1067
1068     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1069     return wxString(wxUniChar((wchar_t)c));
1070 }
1071
1072 const wxCharBuffer wxString::ToAscii() const
1073 {
1074     // this will allocate enough space for the terminating NUL too
1075     wxCharBuffer buffer(length());
1076     char *dest = buffer.data();
1077
1078     for ( const_iterator i = begin(); i != end(); ++i )
1079     {
1080         wxUniChar c(*i);
1081         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1082         *dest++ = c.IsAscii() ? (char)c : '_';
1083
1084         // the output string can't have embedded NULs anyhow, so we can safely
1085         // stop at first of them even if we do have any
1086         if ( !c )
1087             break;
1088     }
1089
1090     return buffer;
1091 }
1092
1093 #endif // wxUSE_UNICODE
1094
1095 // extract string of length nCount starting at nFirst
1096 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1097 {
1098     size_t nLen = length();
1099
1100     // default value of nCount is npos and means "till the end"
1101     if ( nCount == npos )
1102     {
1103         nCount = nLen - nFirst;
1104     }
1105
1106     // out-of-bounds requests return sensible things
1107     if ( nFirst + nCount > nLen )
1108     {
1109         nCount = nLen - nFirst;
1110     }
1111
1112     if ( nFirst > nLen )
1113     {
1114         // AllocCopy() will return empty string
1115         return wxEmptyString;
1116     }
1117
1118     wxString dest(*this, nFirst, nCount);
1119     if ( dest.length() != nCount )
1120     {
1121         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1122     }
1123
1124     return dest;
1125 }
1126
1127 // check that the string starts with prefix and return the rest of the string
1128 // in the provided pointer if it is not NULL, otherwise return false
1129 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1130 {
1131     if ( compare(0, prefix.length(), prefix) != 0 )
1132         return false;
1133
1134     if ( rest )
1135     {
1136         // put the rest of the string into provided pointer
1137         rest->assign(*this, prefix.length(), npos);
1138     }
1139
1140     return true;
1141 }
1142
1143
1144 // check that the string ends with suffix and return the rest of it in the
1145 // provided pointer if it is not NULL, otherwise return false
1146 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1147 {
1148     int start = length() - suffix.length();
1149
1150     if ( start < 0 || compare(start, npos, suffix) != 0 )
1151         return false;
1152
1153     if ( rest )
1154     {
1155         // put the rest of the string into provided pointer
1156         rest->assign(*this, 0, start);
1157     }
1158
1159     return true;
1160 }
1161
1162
1163 // extract nCount last (rightmost) characters
1164 wxString wxString::Right(size_t nCount) const
1165 {
1166   if ( nCount > length() )
1167     nCount = length();
1168
1169   wxString dest(*this, length() - nCount, nCount);
1170   if ( dest.length() != nCount ) {
1171     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1172   }
1173   return dest;
1174 }
1175
1176 // get all characters after the last occurence of ch
1177 // (returns the whole string if ch not found)
1178 wxString wxString::AfterLast(wxUniChar ch) const
1179 {
1180   wxString str;
1181   int iPos = Find(ch, true);
1182   if ( iPos == wxNOT_FOUND )
1183     str = *this;
1184   else
1185     str = wx_str() + iPos + 1;
1186
1187   return str;
1188 }
1189
1190 // extract nCount first (leftmost) characters
1191 wxString wxString::Left(size_t nCount) const
1192 {
1193   if ( nCount > length() )
1194     nCount = length();
1195
1196   wxString dest(*this, 0, nCount);
1197   if ( dest.length() != nCount ) {
1198     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1199   }
1200   return dest;
1201 }
1202
1203 // get all characters before the first occurence of ch
1204 // (returns the whole string if ch not found)
1205 wxString wxString::BeforeFirst(wxUniChar ch) const
1206 {
1207   int iPos = Find(ch);
1208   if ( iPos == wxNOT_FOUND ) iPos = length();
1209   return wxString(*this, 0, iPos);
1210 }
1211
1212 /// get all characters before the last occurence of ch
1213 /// (returns empty string if ch not found)
1214 wxString wxString::BeforeLast(wxUniChar ch) const
1215 {
1216   wxString str;
1217   int iPos = Find(ch, true);
1218   if ( iPos != wxNOT_FOUND && iPos != 0 )
1219     str = wxString(c_str(), iPos);
1220
1221   return str;
1222 }
1223
1224 /// get all characters after the first occurence of ch
1225 /// (returns empty string if ch not found)
1226 wxString wxString::AfterFirst(wxUniChar ch) const
1227 {
1228   wxString str;
1229   int iPos = Find(ch);
1230   if ( iPos != wxNOT_FOUND )
1231     str = wx_str() + iPos + 1;
1232
1233   return str;
1234 }
1235
1236 // replace first (or all) occurences of some substring with another one
1237 size_t wxString::Replace(const wxString& strOld,
1238                          const wxString& strNew, bool bReplaceAll)
1239 {
1240     // if we tried to replace an empty string we'd enter an infinite loop below
1241     wxCHECK_MSG( !strOld.empty(), 0,
1242                  _T("wxString::Replace(): invalid parameter") );
1243
1244     wxSTRING_INVALIDATE_INDEX_CACHE();
1245
1246     size_t uiCount = 0;   // count of replacements made
1247
1248     // optimize the special common case: replacement of one character by
1249     // another one (in UTF-8 case we can only do this for ASCII characters)
1250     //
1251     // benchmarks show that this special version is around 3 times faster
1252     // (depending on the proportion of matching characters and UTF-8/wchar_t
1253     // build)
1254     if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1255     {
1256         const wxStringCharType chOld = strOld.m_impl[0],
1257                                chNew = strNew.m_impl[0];
1258
1259         // this loop is the simplified version of the one below
1260         for ( size_t pos = 0; ; )
1261         {
1262             pos = m_impl.find(chOld, pos);
1263             if ( pos == npos )
1264                 break;
1265
1266             m_impl[pos++] = chNew;
1267
1268             uiCount++;
1269
1270             if ( !bReplaceAll )
1271                 break;
1272         }
1273     }
1274     else // general case
1275     {
1276         const size_t uiOldLen = strOld.m_impl.length();
1277         const size_t uiNewLen = strNew.m_impl.length();
1278
1279         for ( size_t pos = 0; ; )
1280         {
1281             pos = m_impl.find(strOld.m_impl, pos);
1282             if ( pos == npos )
1283                 break;
1284
1285             // replace this occurrence of the old string with the new one
1286             m_impl.replace(pos, uiOldLen, strNew.m_impl);
1287
1288             // move up pos past the string that was replaced
1289             pos += uiNewLen;
1290
1291             // increase replace count
1292             uiCount++;
1293
1294             // stop after the first one?
1295             if ( !bReplaceAll )
1296                 break;
1297         }
1298     }
1299
1300     return uiCount;
1301 }
1302
1303 bool wxString::IsAscii() const
1304 {
1305     for ( const_iterator i = begin(); i != end(); ++i )
1306     {
1307         if ( !(*i).IsAscii() )
1308             return false;
1309     }
1310
1311     return true;
1312 }
1313
1314 bool wxString::IsWord() const
1315 {
1316     for ( const_iterator i = begin(); i != end(); ++i )
1317     {
1318         if ( !wxIsalpha(*i) )
1319             return false;
1320     }
1321
1322     return true;
1323 }
1324
1325 bool wxString::IsNumber() const
1326 {
1327     if ( empty() )
1328         return true;
1329
1330     const_iterator i = begin();
1331
1332     if ( *i == _T('-') || *i == _T('+') )
1333         ++i;
1334
1335     for ( ; i != end(); ++i )
1336     {
1337         if ( !wxIsdigit(*i) )
1338             return false;
1339     }
1340
1341     return true;
1342 }
1343
1344 wxString wxString::Strip(stripType w) const
1345 {
1346     wxString s = *this;
1347     if ( w & leading ) s.Trim(false);
1348     if ( w & trailing ) s.Trim(true);
1349     return s;
1350 }
1351
1352 // ---------------------------------------------------------------------------
1353 // case conversion
1354 // ---------------------------------------------------------------------------
1355
1356 wxString& wxString::MakeUpper()
1357 {
1358   for ( iterator it = begin(), en = end(); it != en; ++it )
1359     *it = (wxChar)wxToupper(*it);
1360
1361   return *this;
1362 }
1363
1364 wxString& wxString::MakeLower()
1365 {
1366   for ( iterator it = begin(), en = end(); it != en; ++it )
1367     *it = (wxChar)wxTolower(*it);
1368
1369   return *this;
1370 }
1371
1372 wxString& wxString::MakeCapitalized()
1373 {
1374     const iterator en = end();
1375     iterator it = begin();
1376     if ( it != en )
1377     {
1378         *it = (wxChar)wxToupper(*it);
1379         for ( ++it; it != en; ++it )
1380             *it = (wxChar)wxTolower(*it);
1381     }
1382
1383     return *this;
1384 }
1385
1386 // ---------------------------------------------------------------------------
1387 // trimming and padding
1388 // ---------------------------------------------------------------------------
1389
1390 // some compilers (VC++ 6.0 not to name them) return true for a call to
1391 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1392 // to live with this by checking that the character is a 7 bit one - even if
1393 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1394 // space-like symbols somewhere except in the first 128 chars), it is arguably
1395 // still better than trimming away accented letters
1396 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1397
1398 // trims spaces (in the sense of isspace) from left or right side
1399 wxString& wxString::Trim(bool bFromRight)
1400 {
1401     // first check if we're going to modify the string at all
1402     if ( !empty() &&
1403          (
1404           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1405           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1406          )
1407        )
1408     {
1409         if ( bFromRight )
1410         {
1411             // find last non-space character
1412             reverse_iterator psz = rbegin();
1413             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1414                 ++psz;
1415
1416             // truncate at trailing space start
1417             erase(psz.base(), end());
1418         }
1419         else
1420         {
1421             // find first non-space character
1422             iterator psz = begin();
1423             while ( (psz != end()) && wxSafeIsspace(*psz) )
1424                 ++psz;
1425
1426             // fix up data and length
1427             erase(begin(), psz);
1428         }
1429     }
1430
1431     return *this;
1432 }
1433
1434 // adds nCount characters chPad to the string from either side
1435 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1436 {
1437     wxString s(chPad, nCount);
1438
1439     if ( bFromRight )
1440         *this += s;
1441     else
1442     {
1443         s += *this;
1444         swap(s);
1445     }
1446
1447     return *this;
1448 }
1449
1450 // truncate the string
1451 wxString& wxString::Truncate(size_t uiLen)
1452 {
1453     if ( uiLen < length() )
1454     {
1455         erase(begin() + uiLen, end());
1456     }
1457     //else: nothing to do, string is already short enough
1458
1459     return *this;
1460 }
1461
1462 // ---------------------------------------------------------------------------
1463 // finding (return wxNOT_FOUND if not found and index otherwise)
1464 // ---------------------------------------------------------------------------
1465
1466 // find a character
1467 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1468 {
1469     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1470
1471     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1472 }
1473
1474 // ----------------------------------------------------------------------------
1475 // conversion to numbers
1476 // ----------------------------------------------------------------------------
1477
1478 // The implementation of all the functions below is exactly the same so factor
1479 // it out. Note that number extraction works correctly on UTF-8 strings, so
1480 // we can use wxStringCharType and wx_str() for maximum efficiency.
1481
1482 #ifndef __WXWINCE__
1483     #define DO_IF_NOT_WINCE(x) x
1484 #else
1485     #define DO_IF_NOT_WINCE(x)
1486 #endif
1487
1488 #define WX_STRING_TO_INT_TYPE(out, base, func, T)                           \
1489     wxCHECK_MSG( out, false, _T("NULL output pointer") );                   \
1490     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );  \
1491                                                                             \
1492     DO_IF_NOT_WINCE( errno = 0; )                                           \
1493                                                                             \
1494     const wxStringCharType *start = wx_str();                               \
1495     wxStringCharType *end;                                                  \
1496     T val = func(start, &end, base);                                        \
1497                                                                             \
1498     /* return true only if scan was stopped by the terminating NUL and */   \
1499     /* if the string was not empty to start with and no under/overflow */   \
1500     /* occurred: */                                                         \
1501     if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )         \
1502         return false;                                                       \
1503     *out = val;                                                             \
1504     return true
1505
1506 bool wxString::ToLong(long *pVal, int base) const
1507 {
1508     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
1509 }
1510
1511 bool wxString::ToULong(unsigned long *pVal, int base) const
1512 {
1513     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
1514 }
1515
1516 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1517 {
1518     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
1519 }
1520
1521 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1522 {
1523     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
1524 }
1525
1526 bool wxString::ToDouble(double *pVal) const
1527 {
1528     wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
1529
1530     DO_IF_NOT_WINCE( errno = 0; )
1531
1532     const wxChar *start = c_str();
1533     wxChar *end;
1534     double val = wxStrtod(start, &end);
1535
1536     // return true only if scan was stopped by the terminating NUL and if the
1537     // string was not empty to start with and no under/overflow occurred
1538     if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1539         return false;
1540
1541     *pVal = val;
1542
1543     return true;
1544 }
1545
1546 // ---------------------------------------------------------------------------
1547 // formatted output
1548 // ---------------------------------------------------------------------------
1549
1550 #if !wxUSE_UTF8_LOCALE_ONLY
1551 /* static */
1552 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1553 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1554 #else
1555 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1556 #endif
1557 {
1558     va_list argptr;
1559     va_start(argptr, format);
1560
1561     wxString s;
1562     s.PrintfV(format, argptr);
1563
1564     va_end(argptr);
1565
1566     return s;
1567 }
1568 #endif // !wxUSE_UTF8_LOCALE_ONLY
1569
1570 #if wxUSE_UNICODE_UTF8
1571 /* static */
1572 wxString wxString::DoFormatUtf8(const char *format, ...)
1573 {
1574     va_list argptr;
1575     va_start(argptr, format);
1576
1577     wxString s;
1578     s.PrintfV(format, argptr);
1579
1580     va_end(argptr);
1581
1582     return s;
1583 }
1584 #endif // wxUSE_UNICODE_UTF8
1585
1586 /* static */
1587 wxString wxString::FormatV(const wxString& format, va_list argptr)
1588 {
1589     wxString s;
1590     s.PrintfV(format, argptr);
1591     return s;
1592 }
1593
1594 #if !wxUSE_UTF8_LOCALE_ONLY
1595 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1596 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1597 #else
1598 int wxString::DoPrintfWchar(const wxChar *format, ...)
1599 #endif
1600 {
1601     va_list argptr;
1602     va_start(argptr, format);
1603
1604 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1605     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1606     // because it's the only cast that works safely for downcasting when
1607     // multiple inheritance is used:
1608     wxString *str = static_cast<wxString*>(this);
1609 #else
1610     wxString *str = this;
1611 #endif
1612
1613     int iLen = str->PrintfV(format, argptr);
1614
1615     va_end(argptr);
1616
1617     return iLen;
1618 }
1619 #endif // !wxUSE_UTF8_LOCALE_ONLY
1620
1621 #if wxUSE_UNICODE_UTF8
1622 int wxString::DoPrintfUtf8(const char *format, ...)
1623 {
1624     va_list argptr;
1625     va_start(argptr, format);
1626
1627     int iLen = PrintfV(format, argptr);
1628
1629     va_end(argptr);
1630
1631     return iLen;
1632 }
1633 #endif // wxUSE_UNICODE_UTF8
1634
1635 /*
1636     Uses wxVsnprintf and places the result into the this string.
1637
1638     In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1639     it is vswprintf.  Due to a discrepancy between vsnprintf and vswprintf in
1640     the ISO C99 (and thus SUSv3) standard the return value for the case of
1641     an undersized buffer is inconsistent.  For conforming vsnprintf
1642     implementations the function must return the number of characters that
1643     would have been printed had the buffer been large enough.  For conforming
1644     vswprintf implementations the function must return a negative number
1645     and set errno.
1646
1647     What vswprintf sets errno to is undefined but Darwin seems to set it to
1648     EOVERFLOW.  The only expected errno are EILSEQ and EINVAL.  Both of
1649     those are defined in the standard and backed up by several conformance
1650     statements.  Note that ENOMEM mentioned in the manual page does not
1651     apply to swprintf, only wprintf and fwprintf.
1652
1653     Official manual page:
1654     http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1655
1656     Some conformance statements (AIX, Solaris):
1657     http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1658     http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1659
1660     Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1661     EILSEQ and EINVAL are specifically defined to mean the error is other than
1662     an undersized buffer and no other errno are defined we treat those two
1663     as meaning hard errors and everything else gets the old behavior which
1664     is to keep looping and increasing buffer size until the function succeeds.
1665
1666     In practice it's impossible to determine before compilation which behavior
1667     may be used.  The vswprintf function may have vsnprintf-like behavior or
1668     vice-versa.  Behavior detected on one release can theoretically change
1669     with an updated release.  Not to mention that configure testing for it
1670     would require the test to be run on the host system, not the build system
1671     which makes cross compilation difficult. Therefore, we make no assumptions
1672     about behavior and try our best to handle every known case, including the
1673     case where wxVsnprintf returns a negative number and fails to set errno.
1674
1675     There is yet one more non-standard implementation and that is our own.
1676     Fortunately, that can be detected at compile-time.
1677
1678     On top of all that, ISO C99 explicitly defines snprintf to write a null
1679     character to the last position of the specified buffer.  That would be at
1680     at the given buffer size minus 1.  It is supposed to do this even if it
1681     turns out that the buffer is sized too small.
1682
1683     Darwin (tested on 10.5) follows the C99 behavior exactly.
1684
1685     Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1686     errno even when it fails.  However, it only seems to ever fail due
1687     to an undersized buffer.
1688 */
1689 #if wxUSE_UNICODE_UTF8
1690 template<typename BufferType>
1691 #else
1692 // we only need one version in non-UTF8 builds and at least two Windows
1693 // compilers have problems with this function template, so use just one
1694 // normal function here
1695 #endif
1696 static int DoStringPrintfV(wxString& str,
1697                            const wxString& format, va_list argptr)
1698 {
1699     int size = 1024;
1700
1701     for ( ;; )
1702     {
1703 #if wxUSE_UNICODE_UTF8
1704         BufferType tmp(str, size + 1);
1705         typename BufferType::CharType *buf = tmp;
1706 #else
1707         wxStringBuffer tmp(str, size + 1);
1708         wxChar *buf = tmp;
1709 #endif
1710
1711         if ( !buf )
1712         {
1713             // out of memory
1714
1715             // in UTF-8 build, leaving uninitialized junk in the buffer
1716             // could result in invalid non-empty UTF-8 string, so just
1717             // reset the string to empty on failure:
1718             buf[0] = '\0';
1719             return -1;
1720         }
1721
1722         // wxVsnprintf() may modify the original arg pointer, so pass it
1723         // only a copy
1724         va_list argptrcopy;
1725         wxVaCopy(argptrcopy, argptr);
1726
1727 #ifndef __WXWINCE__
1728         // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1729         errno = 0;
1730 #endif
1731         int len = wxVsnprintf(buf, size, format, argptrcopy);
1732         va_end(argptrcopy);
1733
1734         // some implementations of vsnprintf() don't NUL terminate
1735         // the string if there is not enough space for it so
1736         // always do it manually
1737         // FIXME: This really seems to be the wrong and would be an off-by-one
1738         // bug except the code above allocates an extra character.
1739         buf[size] = _T('\0');
1740
1741         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1742         // total number of characters which would have been written if the
1743         // buffer were large enough (newer standards such as Unix98)
1744         if ( len < 0 )
1745         {
1746             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1747             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1748             //     is true if *both* of them use our own implementation,
1749             //     otherwise we can't be sure
1750 #if wxUSE_WXVSNPRINTF
1751             // we know that our own implementation of wxVsnprintf() returns -1
1752             // only for a format error - thus there's something wrong with
1753             // the user's format string
1754             buf[0] = '\0';
1755             return -1;
1756 #else // possibly using system version
1757             // assume it only returns error if there is not enough space, but
1758             // as we don't know how much we need, double the current size of
1759             // the buffer
1760 #ifndef __WXWINCE__
1761             if( (errno == EILSEQ) || (errno == EINVAL) )
1762             // If errno was set to one of the two well-known hard errors
1763             // then fail immediately to avoid an infinite loop.
1764                 return -1;
1765             else
1766 #endif // __WXWINCE__
1767             // still not enough, as we don't know how much we need, double the
1768             // current size of the buffer
1769                 size *= 2;
1770 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1771         }
1772         else if ( len >= size )
1773         {
1774 #if wxUSE_WXVSNPRINTF
1775             // we know that our own implementation of wxVsnprintf() returns
1776             // size+1 when there's not enough space but that's not the size
1777             // of the required buffer!
1778             size *= 2;      // so we just double the current size of the buffer
1779 #else
1780             // some vsnprintf() implementations NUL-terminate the buffer and
1781             // some don't in len == size case, to be safe always add 1
1782             // FIXME: I don't quite understand this comment.  The vsnprintf
1783             // function is specifically defined to return the number of
1784             // characters printed not including the null terminator.
1785             // So OF COURSE you need to add 1 to get the right buffer size.
1786             // The following line is definitely correct, no question.
1787             size = len + 1;
1788 #endif
1789         }
1790         else // ok, there was enough space
1791         {
1792             break;
1793         }
1794     }
1795
1796     // we could have overshot
1797     str.Shrink();
1798
1799     return str.length();
1800 }
1801
1802 int wxString::PrintfV(const wxString& format, va_list argptr)
1803 {
1804 #if wxUSE_UNICODE_UTF8
1805     #if wxUSE_STL_BASED_WXSTRING
1806         typedef wxStringTypeBuffer<char> Utf8Buffer;
1807     #else
1808         typedef wxStringInternalBuffer Utf8Buffer;
1809     #endif
1810 #endif
1811
1812 #if wxUSE_UTF8_LOCALE_ONLY
1813     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1814 #else
1815     #if wxUSE_UNICODE_UTF8
1816     if ( wxLocaleIsUtf8 )
1817         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1818     else
1819         // wxChar* version
1820         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1821     #else
1822         return DoStringPrintfV(*this, format, argptr);
1823     #endif // UTF8/WCHAR
1824 #endif
1825 }
1826
1827 // ----------------------------------------------------------------------------
1828 // misc other operations
1829 // ----------------------------------------------------------------------------
1830
1831 // returns true if the string matches the pattern which may contain '*' and
1832 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1833 // of them)
1834 bool wxString::Matches(const wxString& mask) const
1835 {
1836     // I disable this code as it doesn't seem to be faster (in fact, it seems
1837     // to be much slower) than the old, hand-written code below and using it
1838     // here requires always linking with libregex even if the user code doesn't
1839     // use it
1840 #if 0 // wxUSE_REGEX
1841     // first translate the shell-like mask into a regex
1842     wxString pattern;
1843     pattern.reserve(wxStrlen(pszMask));
1844
1845     pattern += _T('^');
1846     while ( *pszMask )
1847     {
1848         switch ( *pszMask )
1849         {
1850             case _T('?'):
1851                 pattern += _T('.');
1852                 break;
1853
1854             case _T('*'):
1855                 pattern += _T(".*");
1856                 break;
1857
1858             case _T('^'):
1859             case _T('.'):
1860             case _T('$'):
1861             case _T('('):
1862             case _T(')'):
1863             case _T('|'):
1864             case _T('+'):
1865             case _T('\\'):
1866                 // these characters are special in a RE, quote them
1867                 // (however note that we don't quote '[' and ']' to allow
1868                 // using them for Unix shell like matching)
1869                 pattern += _T('\\');
1870                 // fall through
1871
1872             default:
1873                 pattern += *pszMask;
1874         }
1875
1876         pszMask++;
1877     }
1878     pattern += _T('$');
1879
1880     // and now use it
1881     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1882 #else // !wxUSE_REGEX
1883   // TODO: this is, of course, awfully inefficient...
1884
1885   // FIXME-UTF8: implement using iterators, remove #if
1886 #if wxUSE_UNICODE_UTF8
1887   wxWCharBuffer maskBuf = mask.wc_str();
1888   wxWCharBuffer txtBuf = wc_str();
1889   const wxChar *pszMask = maskBuf.data();
1890   const wxChar *pszTxt = txtBuf.data();
1891 #else
1892   const wxChar *pszMask = mask.wx_str();
1893   // the char currently being checked
1894   const wxChar *pszTxt = wx_str();
1895 #endif
1896
1897   // the last location where '*' matched
1898   const wxChar *pszLastStarInText = NULL;
1899   const wxChar *pszLastStarInMask = NULL;
1900
1901 match:
1902   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1903     switch ( *pszMask ) {
1904       case wxT('?'):
1905         if ( *pszTxt == wxT('\0') )
1906           return false;
1907
1908         // pszTxt and pszMask will be incremented in the loop statement
1909
1910         break;
1911
1912       case wxT('*'):
1913         {
1914           // remember where we started to be able to backtrack later
1915           pszLastStarInText = pszTxt;
1916           pszLastStarInMask = pszMask;
1917
1918           // ignore special chars immediately following this one
1919           // (should this be an error?)
1920           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1921             pszMask++;
1922
1923           // if there is nothing more, match
1924           if ( *pszMask == wxT('\0') )
1925             return true;
1926
1927           // are there any other metacharacters in the mask?
1928           size_t uiLenMask;
1929           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1930
1931           if ( pEndMask != NULL ) {
1932             // we have to match the string between two metachars
1933             uiLenMask = pEndMask - pszMask;
1934           }
1935           else {
1936             // we have to match the remainder of the string
1937             uiLenMask = wxStrlen(pszMask);
1938           }
1939
1940           wxString strToMatch(pszMask, uiLenMask);
1941           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1942           if ( pMatch == NULL )
1943             return false;
1944
1945           // -1 to compensate "++" in the loop
1946           pszTxt = pMatch + uiLenMask - 1;
1947           pszMask += uiLenMask - 1;
1948         }
1949         break;
1950
1951       default:
1952         if ( *pszMask != *pszTxt )
1953           return false;
1954         break;
1955     }
1956   }
1957
1958   // match only if nothing left
1959   if ( *pszTxt == wxT('\0') )
1960     return true;
1961
1962   // if we failed to match, backtrack if we can
1963   if ( pszLastStarInText ) {
1964     pszTxt = pszLastStarInText + 1;
1965     pszMask = pszLastStarInMask;
1966
1967     pszLastStarInText = NULL;
1968
1969     // don't bother resetting pszLastStarInMask, it's unnecessary
1970
1971     goto match;
1972   }
1973
1974   return false;
1975 #endif // wxUSE_REGEX/!wxUSE_REGEX
1976 }
1977
1978 // Count the number of chars
1979 int wxString::Freq(wxUniChar ch) const
1980 {
1981     int count = 0;
1982     for ( const_iterator i = begin(); i != end(); ++i )
1983     {
1984         if ( *i == ch )
1985             count ++;
1986     }
1987     return count;
1988 }
1989
1990 // ----------------------------------------------------------------------------
1991 // wxUTF8StringBuffer
1992 // ----------------------------------------------------------------------------
1993
1994 #if wxUSE_UNICODE_WCHAR
1995 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1996 {
1997     wxMBConvStrictUTF8 conv;
1998     size_t wlen = conv.ToWChar(NULL, 0, m_buf);
1999     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2000
2001     wxStringInternalBuffer wbuf(m_str, wlen);
2002     conv.ToWChar(wbuf, wlen, m_buf);
2003 }
2004
2005 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2006 {
2007     wxCHECK_RET(m_lenSet, "length not set");
2008
2009     wxMBConvStrictUTF8 conv;
2010     size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
2011     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2012
2013     wxStringInternalBufferLength wbuf(m_str, wlen);
2014     conv.ToWChar(wbuf, wlen, m_buf, m_len);
2015     wbuf.SetLength(wlen);
2016 }
2017 #endif // wxUSE_UNICODE_WCHAR