src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #include "wx/hashmap.h"
  39
  40 // string handling functions used by wxString:
  41 #if wxUSE_UNICODE_UTF8
  42     #define wxStringMemcpy   memcpy
  43     #define wxStringMemcmp   memcmp
  44     #define wxStringMemchr   memchr
  45     #define wxStringStrlen   strlen
  46 #else
  47     #define wxStringMemcpy   wxTmemcpy
  48     #define wxStringMemcmp   wxTmemcmp
  49     #define wxStringMemchr   wxTmemchr
  50     #define wxStringStrlen   wxStrlen
  51 #endif
  52
  53
  54 // ---------------------------------------------------------------------------
  55 // static class variables definition
  56 // ---------------------------------------------------------------------------
  57
  58 //According to STL _must_ be a -1 size_t
  59 const size_t wxString::npos = (size_t) -1;
  60
  61 // ----------------------------------------------------------------------------
  62 // global functions
  63 // ----------------------------------------------------------------------------
  64
  65 #if wxUSE_STD_IOSTREAM
  66
  67 #include <iostream>
  68
  69 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
  70 {
  71 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
  72     return os << (const char *)str.AsCharBuf();
  73 #else
  74     return os << str.AsInternal();
  75 #endif
  76 }
  77
  78 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
  79 {
  80     return os << str.c_str();
  81 }
  82
  83 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
  84 {
  85     return os << str.data();
  86 }
  87
  88 #ifndef __BORLANDC__
  89 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
  90 {
  91     return os << str.data();
  92 }
  93 #endif
  94
  95 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
  96
  97 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
  98 {
  99     return wos << str.wc_str();
 100 }
 101
 102 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
 103 {
 104     return wos << str.AsWChar();
 105 }
 106
 107 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
 108 {
 109     return wos << str.data();
 110 }
 111
 112 #endif  // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 113
 114 #endif // wxUSE_STD_IOSTREAM
 115
 116 // ===========================================================================
 117 // wxString class core
 118 // ===========================================================================
 119
 120 #if wxUSE_UNICODE_UTF8
 121
 122 void wxString::PosLenToImpl(size_t pos, size_t len,
 123                             size_t *implPos, size_t *implLen) const
 124 {
 125     if ( pos == npos )
 126         *implPos = npos;
 127     else
 128     {
 129         const_iterator i = begin() + pos;
 130         *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
 131         if ( len == npos )
 132             *implLen = npos;
 133         else
 134         {
 135             // too large length is interpreted as "to the end of the string"
 136             // FIXME-UTF8: verify this is the case in std::string, assert
 137             // otherwise
 138             if ( pos + len > length() )
 139                 len = length() - pos;
 140
 141             *implLen = (i + len).impl() - i.impl();
 142         }
 143     }
 144 }
 145
 146 #endif // wxUSE_UNICODE_UTF8
 147
 148 // ----------------------------------------------------------------------------
 149 // wxCStrData converted strings caching
 150 // ----------------------------------------------------------------------------
 151
 152 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 153 //             string objects; re-enable after fixing this bug and benchmarking
 154 //             performance to see if using a hash is a good idea at all
 155 #if 0
 156
 157 // For backward compatibility reasons, it must be possible to assign the value
 158 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 159 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 160 // because the memory would be freed immediately, but it has to be valid as long
 161 // as the string is not modified, so that code like this still works:
 162 //
 163 // const wxChar *s = str.c_str();
 164 // while ( s ) { ... }
 165
 166 // FIXME-UTF8: not thread safe!
 167 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 168 //             destroyed, but we should do it when the string is modified, to
 169 //             keep memory usage down
 170 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 171 //             invalidated the cache on every change, we could keep the previous
 172 //             conversion
 173 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 174 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 175
 176 template<typename T>
 177 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 178 {
 179     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 180     if ( i != hash.end() )
 181     {
 182         free(i->second);
 183         hash.erase(i);
 184     }
 185 }
 186
 187 #if wxUSE_UNICODE
 188 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 189 //     so we have to use wxString* here and const-cast when used
 190 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 191                     wxStringCharConversionCache);
 192 static wxStringCharConversionCache gs_stringsCharCache;
 193
 194 const char* wxCStrData::AsChar() const
 195 {
 196     // remove previously cache value, if any (see FIXMEs above):
 197     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 198
 199     // convert the string and keep it:
 200     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 201         m_str->mb_str().release();
 202
 203     return s + m_offset;
 204 }
 205 #endif // wxUSE_UNICODE
 206
 207 #if !wxUSE_UNICODE_WCHAR
 208 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 209                     wxStringWCharConversionCache);
 210 static wxStringWCharConversionCache gs_stringsWCharCache;
 211
 212 const wchar_t* wxCStrData::AsWChar() const
 213 {
 214     // remove previously cache value, if any (see FIXMEs above):
 215     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 216
 217     // convert the string and keep it:
 218     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 219         m_str->wc_str().release();
 220
 221     return s + m_offset;
 222 }
 223 #endif // !wxUSE_UNICODE_WCHAR
 224
 225 wxString::~wxString()
 226 {
 227 #if wxUSE_UNICODE
 228     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 229     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 230 #endif
 231 #if !wxUSE_UNICODE_WCHAR
 232     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 233 #endif
 234 }
 235 #endif
 236
 237 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 238 const char* wxCStrData::AsChar() const
 239 {
 240 #if wxUSE_UNICODE_UTF8
 241     if ( wxLocaleIsUtf8 )
 242         return AsInternal();
 243 #endif
 244     // under non-UTF8 locales, we have to convert the internal UTF-8
 245     // representation using wxConvLibc and cache the result
 246
 247     wxString *str = wxConstCast(m_str, wxString);
 248
 249     // convert the string:
 250     //
 251     // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
 252     //             have it) but it's unfortunately not obvious to implement
 253     //             because we don't know how big buffer do we need for the
 254     //             given string length (in case of multibyte encodings, e.g.
 255     //             ISO-2022-JP or UTF-8 when internal representation is wchar_t)
 256     //
 257     //             One idea would be to store more than just m_convertedToChar
 258     //             in wxString: then we could record the length of the string
 259     //             which was converted the last time and try to reuse the same
 260     //             buffer if the current length is not greater than it (this
 261     //             could still fail because string could have been modified in
 262     //             place but it would work most of the time, so we'd do it and
 263     //             only allocate the new buffer if in-place conversion returned
 264     //             an error). We could also store a bit saying if the string
 265     //             was modified since the last conversion (and update it in all
 266     //             operation modifying the string, of course) to avoid unneeded
 267     //             consequential conversions. But both of these ideas require
 268     //             adding more fields to wxString and require profiling results
 269     //             to be sure that we really gain enough from them to justify
 270     //             doing it.
 271     wxCharBuffer buf(str->mb_str());
 272
 273     // if it failed, return empty string and not NULL to avoid crashes in code
 274     // written with either wxWidgets 2 wxString or std::string behaviour in
 275     // mind: neither of them ever returns NULL and so we shouldn't neither
 276     if ( !buf )
 277         return "";
 278
 279     if ( str->m_convertedToChar &&
 280          strlen(buf) == strlen(str->m_convertedToChar) )
 281     {
 282         // keep the same buffer for as long as possible, so that several calls
 283         // to c_str() in a row still work:
 284         strcpy(str->m_convertedToChar, buf);
 285     }
 286     else
 287     {
 288         str->m_convertedToChar = buf.release();
 289     }
 290
 291     // and keep it:
 292     return str->m_convertedToChar + m_offset;
 293 }
 294 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 295
 296 #if !wxUSE_UNICODE_WCHAR
 297 const wchar_t* wxCStrData::AsWChar() const
 298 {
 299     wxString *str = wxConstCast(m_str, wxString);
 300
 301     // convert the string:
 302     wxWCharBuffer buf(str->wc_str());
 303
 304     // notice that here, unlike above in AsChar(), conversion can't fail as our
 305     // internal UTF-8 is always well-formed -- or the string was corrupted and
 306     // all bets are off anyhow
 307
 308     // FIXME-UTF8: do the conversion in-place in the existing buffer
 309     if ( str->m_convertedToWChar &&
 310          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 311     {
 312         // keep the same buffer for as long as possible, so that several calls
 313         // to c_str() in a row still work:
 314         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 315     }
 316     else
 317     {
 318         str->m_convertedToWChar = buf.release();
 319     }
 320
 321     // and keep it:
 322     return str->m_convertedToWChar + m_offset;
 323 }
 324 #endif // !wxUSE_UNICODE_WCHAR
 325
 326 // ===========================================================================
 327 // wxString class core
 328 // ===========================================================================
 329
 330 // ---------------------------------------------------------------------------
 331 // construction and conversion
 332 // ---------------------------------------------------------------------------
 333
 334 #if wxUSE_UNICODE_WCHAR
 335 /* static */
 336 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 337                                                const wxMBConv& conv)
 338 {
 339     // anything to do?
 340     if ( !psz || nLength == 0 )
 341         return SubstrBufFromMB(L"", 0);
 342
 343     if ( nLength == npos )
 344         nLength = wxNO_LEN;
 345
 346     size_t wcLen;
 347     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 348     if ( !wcLen )
 349         return SubstrBufFromMB(_T(""), 0);
 350     else
 351         return SubstrBufFromMB(wcBuf, wcLen);
 352 }
 353 #endif // wxUSE_UNICODE_WCHAR
 354
 355 #if wxUSE_UNICODE_UTF8
 356 /* static */
 357 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 358                                                const wxMBConv& conv)
 359 {
 360     // anything to do?
 361     if ( !psz || nLength == 0 )
 362         return SubstrBufFromMB("", 0);
 363
 364     // if psz is already in UTF-8, we don't have to do the roundtrip to
 365     // wchar_t* and back:
 366     if ( conv.IsUTF8() )
 367     {
 368         // we need to validate the input because UTF8 iterators assume valid
 369         // UTF-8 sequence and psz may be invalid:
 370         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 371         {
 372             // we must pass the real string length to SubstrBufFromMB ctor
 373             if ( nLength == npos )
 374                 nLength = psz ? strlen(psz) : 0;
 375             return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
 376         }
 377         // else: do the roundtrip through wchar_t*
 378     }
 379
 380     if ( nLength == npos )
 381         nLength = wxNO_LEN;
 382
 383     // first convert to wide string:
 384     size_t wcLen;
 385     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 386     if ( !wcLen )
 387         return SubstrBufFromMB("", 0);
 388
 389     // and then to UTF-8:
 390     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
 391     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 392     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 393
 394     return buf;
 395 }
 396 #endif // wxUSE_UNICODE_UTF8
 397
 398 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 399 /* static */
 400 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 401                                                const wxMBConv& conv)
 402 {
 403     // anything to do?
 404     if ( !pwz || nLength == 0 )
 405         return SubstrBufFromWC("", 0);
 406
 407     if ( nLength == npos )
 408         nLength = wxNO_LEN;
 409
 410     size_t mbLen;
 411     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 412     if ( !mbLen )
 413         return SubstrBufFromWC("", 0);
 414     else
 415         return SubstrBufFromWC(mbBuf, mbLen);
 416 }
 417 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 418
 419
 420 #if wxUSE_UNICODE_WCHAR
 421
 422 //Convert wxString in Unicode mode to a multi-byte string
 423 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 424 {
 425     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 426 }
 427
 428 #elif wxUSE_UNICODE_UTF8
 429
 430 const wxWCharBuffer wxString::wc_str() const
 431 {
 432     return wxMBConvStrictUTF8().cMB2WC
 433                                 (
 434                                     m_impl.c_str(),
 435                                     m_impl.length() + 1, // size, not length
 436                                     NULL
 437                                 );
 438 }
 439
 440 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 441 {
 442     if ( conv.IsUTF8() )
 443         return wxCharBuffer::CreateNonOwned(m_impl.c_str());
 444
 445     // FIXME-UTF8: use wc_str() here once we have buffers with length
 446
 447     size_t wcLen;
 448     wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
 449                                              (
 450                                                 m_impl.c_str(),
 451                                                 m_impl.length() + 1, // size
 452                                                 &wcLen
 453                                              ));
 454     if ( !wcLen )
 455         return wxCharBuffer("");
 456
 457     return conv.cWC2MB(wcBuf, wcLen+1, NULL);
 458 }
 459
 460 #else // ANSI
 461
 462 //Converts this string to a wide character string if unicode
 463 //mode is not enabled and wxUSE_WCHAR_T is enabled
 464 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 465 {
 466     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 467 }
 468
 469 #endif // Unicode/ANSI
 470
 471 // shrink to minimal size (releasing extra memory)
 472 bool wxString::Shrink()
 473 {
 474   wxString tmp(begin(), end());
 475   swap(tmp);
 476   return tmp.length() == length();
 477 }
 478
 479 // deprecated compatibility code:
 480 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 481 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 482 {
 483     return DoGetWriteBuf(nLen);
 484 }
 485
 486 void wxString::UngetWriteBuf()
 487 {
 488     DoUngetWriteBuf();
 489 }
 490
 491 void wxString::UngetWriteBuf(size_t nLen)
 492 {
 493     DoUngetWriteBuf(nLen);
 494 }
 495 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 496
 497
 498 // ---------------------------------------------------------------------------
 499 // data access
 500 // ---------------------------------------------------------------------------
 501
 502 // all functions are inline in string.h
 503
 504 // ---------------------------------------------------------------------------
 505 // concatenation operators
 506 // ---------------------------------------------------------------------------
 507
 508 /*
 509  * concatenation functions come in 5 flavours:
 510  *  string + string
 511  *  char   + string      and      string + char
 512  *  C str  + string      and      string + C str
 513  */
 514
 515 wxString operator+(const wxString& str1, const wxString& str2)
 516 {
 517 #if !wxUSE_STL_BASED_WXSTRING
 518     wxASSERT( str1.IsValid() );
 519     wxASSERT( str2.IsValid() );
 520 #endif
 521
 522     wxString s = str1;
 523     s += str2;
 524
 525     return s;
 526 }
 527
 528 wxString operator+(const wxString& str, wxUniChar ch)
 529 {
 530 #if !wxUSE_STL_BASED_WXSTRING
 531     wxASSERT( str.IsValid() );
 532 #endif
 533
 534     wxString s = str;
 535     s += ch;
 536
 537     return s;
 538 }
 539
 540 wxString operator+(wxUniChar ch, const wxString& str)
 541 {
 542 #if !wxUSE_STL_BASED_WXSTRING
 543     wxASSERT( str.IsValid() );
 544 #endif
 545
 546     wxString s = ch;
 547     s += str;
 548
 549     return s;
 550 }
 551
 552 wxString operator+(const wxString& str, const char *psz)
 553 {
 554 #if !wxUSE_STL_BASED_WXSTRING
 555     wxASSERT( str.IsValid() );
 556 #endif
 557
 558     wxString s;
 559     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 560         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 561     }
 562     s += str;
 563     s += psz;
 564
 565     return s;
 566 }
 567
 568 wxString operator+(const wxString& str, const wchar_t *pwz)
 569 {
 570 #if !wxUSE_STL_BASED_WXSTRING
 571     wxASSERT( str.IsValid() );
 572 #endif
 573
 574     wxString s;
 575     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 576         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 577     }
 578     s += str;
 579     s += pwz;
 580
 581     return s;
 582 }
 583
 584 wxString operator+(const char *psz, const wxString& str)
 585 {
 586 #if !wxUSE_STL_BASED_WXSTRING
 587     wxASSERT( str.IsValid() );
 588 #endif
 589
 590     wxString s;
 591     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 592         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 593     }
 594     s = psz;
 595     s += str;
 596
 597     return s;
 598 }
 599
 600 wxString operator+(const wchar_t *pwz, const wxString& str)
 601 {
 602 #if !wxUSE_STL_BASED_WXSTRING
 603     wxASSERT( str.IsValid() );
 604 #endif
 605
 606     wxString s;
 607     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 608         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 609     }
 610     s = pwz;
 611     s += str;
 612
 613     return s;
 614 }
 615
 616 // ---------------------------------------------------------------------------
 617 // string comparison
 618 // ---------------------------------------------------------------------------
 619
 620 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 621 {
 622     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 623                                : wxToupper(GetChar(0u)) == wxToupper(c));
 624 }
 625
 626 #ifdef HAVE_STD_STRING_COMPARE
 627
 628 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 629 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 630 //     sort strings in characters code point order by sorting the byte sequence
 631 //     in byte values order (i.e. what strcmp() and memcmp() do).
 632
 633 int wxString::compare(const wxString& str) const
 634 {
 635     return m_impl.compare(str.m_impl);
 636 }
 637
 638 int wxString::compare(size_t nStart, size_t nLen,
 639                       const wxString& str) const
 640 {
 641     size_t pos, len;
 642     PosLenToImpl(nStart, nLen, &pos, &len);
 643     return m_impl.compare(pos, len, str.m_impl);
 644 }
 645
 646 int wxString::compare(size_t nStart, size_t nLen,
 647                       const wxString& str,
 648                       size_t nStart2, size_t nLen2) const
 649 {
 650     size_t pos, len;
 651     PosLenToImpl(nStart, nLen, &pos, &len);
 652
 653     size_t pos2, len2;
 654     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 655
 656     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 657 }
 658
 659 int wxString::compare(const char* sz) const
 660 {
 661     return m_impl.compare(ImplStr(sz));
 662 }
 663
 664 int wxString::compare(const wchar_t* sz) const
 665 {
 666     return m_impl.compare(ImplStr(sz));
 667 }
 668
 669 int wxString::compare(size_t nStart, size_t nLen,
 670                       const char* sz, size_t nCount) const
 671 {
 672     size_t pos, len;
 673     PosLenToImpl(nStart, nLen, &pos, &len);
 674
 675     SubstrBufFromMB str(ImplStr(sz, nCount));
 676
 677     return m_impl.compare(pos, len, str.data, str.len);
 678 }
 679
 680 int wxString::compare(size_t nStart, size_t nLen,
 681                       const wchar_t* sz, size_t nCount) const
 682 {
 683     size_t pos, len;
 684     PosLenToImpl(nStart, nLen, &pos, &len);
 685
 686     SubstrBufFromWC str(ImplStr(sz, nCount));
 687
 688     return m_impl.compare(pos, len, str.data, str.len);
 689 }
 690
 691 #else // !HAVE_STD_STRING_COMPARE
 692
 693 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 694                           const wxStringCharType* s2, size_t l2)
 695 {
 696     if( l1 == l2 )
 697         return wxStringMemcmp(s1, s2, l1);
 698     else if( l1 < l2 )
 699     {
 700         int ret = wxStringMemcmp(s1, s2, l1);
 701         return ret == 0 ? -1 : ret;
 702     }
 703     else
 704     {
 705         int ret = wxStringMemcmp(s1, s2, l2);
 706         return ret == 0 ? +1 : ret;
 707     }
 708 }
 709
 710 int wxString::compare(const wxString& str) const
 711 {
 712     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 713                      str.m_impl.data(), str.m_impl.length());
 714 }
 715
 716 int wxString::compare(size_t nStart, size_t nLen,
 717                       const wxString& str) const
 718 {
 719     wxASSERT(nStart <= length());
 720     size_type strLen = length() - nStart;
 721     nLen = strLen < nLen ? strLen : nLen;
 722
 723     size_t pos, len;
 724     PosLenToImpl(nStart, nLen, &pos, &len);
 725
 726     return ::wxDoCmp(m_impl.data() + pos,  len,
 727                      str.m_impl.data(), str.m_impl.length());
 728 }
 729
 730 int wxString::compare(size_t nStart, size_t nLen,
 731                       const wxString& str,
 732                       size_t nStart2, size_t nLen2) const
 733 {
 734     wxASSERT(nStart <= length());
 735     wxASSERT(nStart2 <= str.length());
 736     size_type strLen  =     length() - nStart,
 737               strLen2 = str.length() - nStart2;
 738     nLen  = strLen  < nLen  ? strLen  : nLen;
 739     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 740
 741     size_t pos, len;
 742     PosLenToImpl(nStart, nLen, &pos, &len);
 743     size_t pos2, len2;
 744     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 745
 746     return ::wxDoCmp(m_impl.data() + pos, len,
 747                      str.m_impl.data() + pos2, len2);
 748 }
 749
 750 int wxString::compare(const char* sz) const
 751 {
 752     SubstrBufFromMB str(ImplStr(sz, npos));
 753     if ( str.len == npos )
 754         str.len = wxStringStrlen(str.data);
 755     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 756 }
 757
 758 int wxString::compare(const wchar_t* sz) const
 759 {
 760     SubstrBufFromWC str(ImplStr(sz, npos));
 761     if ( str.len == npos )
 762         str.len = wxStringStrlen(str.data);
 763     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 764 }
 765
 766 int wxString::compare(size_t nStart, size_t nLen,
 767                       const char* sz, size_t nCount) const
 768 {
 769     wxASSERT(nStart <= length());
 770     size_type strLen = length() - nStart;
 771     nLen = strLen < nLen ? strLen : nLen;
 772
 773     size_t pos, len;
 774     PosLenToImpl(nStart, nLen, &pos, &len);
 775
 776     SubstrBufFromMB str(ImplStr(sz, nCount));
 777     if ( str.len == npos )
 778         str.len = wxStringStrlen(str.data);
 779
 780     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 781 }
 782
 783 int wxString::compare(size_t nStart, size_t nLen,
 784                       const wchar_t* sz, size_t nCount) const
 785 {
 786     wxASSERT(nStart <= length());
 787     size_type strLen = length() - nStart;
 788     nLen = strLen < nLen ? strLen : nLen;
 789
 790     size_t pos, len;
 791     PosLenToImpl(nStart, nLen, &pos, &len);
 792
 793     SubstrBufFromWC str(ImplStr(sz, nCount));
 794     if ( str.len == npos )
 795         str.len = wxStringStrlen(str.data);
 796
 797     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 798 }
 799
 800 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 801
 802
 803 // ---------------------------------------------------------------------------
 804 // find_{first,last}_[not]_of functions
 805 // ---------------------------------------------------------------------------
 806
 807 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 808
 809 // NB: All these functions are implemented  with the argument being wxChar*,
 810 //     i.e. widechar string in any Unicode build, even though native string
 811 //     representation is char* in the UTF-8 build. This is because we couldn't
 812 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 813
 814 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 815 {
 816     return find_first_of(sz, nStart, wxStrlen(sz));
 817 }
 818
 819 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 820 {
 821     return find_first_not_of(sz, nStart, wxStrlen(sz));
 822 }
 823
 824 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 825 {
 826     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 827
 828     size_t idx = nStart;
 829     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 830     {
 831         if ( wxTmemchr(sz, *i, n) )
 832             return idx;
 833     }
 834
 835     return npos;
 836 }
 837
 838 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 839 {
 840     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 841
 842     size_t idx = nStart;
 843     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 844     {
 845         if ( !wxTmemchr(sz, *i, n) )
 846             return idx;
 847     }
 848
 849     return npos;
 850 }
 851
 852
 853 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 854 {
 855     return find_last_of(sz, nStart, wxStrlen(sz));
 856 }
 857
 858 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 859 {
 860     return find_last_not_of(sz, nStart, wxStrlen(sz));
 861 }
 862
 863 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 864 {
 865     size_t len = length();
 866
 867     if ( nStart == npos )
 868     {
 869         nStart = len - 1;
 870     }
 871     else
 872     {
 873         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 874     }
 875
 876     size_t idx = nStart;
 877     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 878           i != rend(); --idx, ++i )
 879     {
 880         if ( wxTmemchr(sz, *i, n) )
 881             return idx;
 882     }
 883
 884     return npos;
 885 }
 886
 887 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 888 {
 889     size_t len = length();
 890
 891     if ( nStart == npos )
 892     {
 893         nStart = len - 1;
 894     }
 895     else
 896     {
 897         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 898     }
 899
 900     size_t idx = nStart;
 901     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 902           i != rend(); --idx, ++i )
 903     {
 904         if ( !wxTmemchr(sz, *i, n) )
 905             return idx;
 906     }
 907
 908     return npos;
 909 }
 910
 911 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
 912 {
 913     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 914
 915     size_t idx = nStart;
 916     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 917     {
 918         if ( *i != ch )
 919             return idx;
 920     }
 921
 922     return npos;
 923 }
 924
 925 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
 926 {
 927     size_t len = length();
 928
 929     if ( nStart == npos )
 930     {
 931         nStart = len - 1;
 932     }
 933     else
 934     {
 935         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 936     }
 937
 938     size_t idx = nStart;
 939     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 940           i != rend(); --idx, ++i )
 941     {
 942         if ( *i != ch )
 943             return idx;
 944     }
 945
 946     return npos;
 947 }
 948
 949 // the functions above were implemented for wchar_t* arguments in Unicode
 950 // build and char* in ANSI build; below are implementations for the other
 951 // version:
 952 #if wxUSE_UNICODE
 953     #define wxOtherCharType char
 954     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
 955 #else
 956     #define wxOtherCharType wchar_t
 957     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
 958 #endif
 959
 960 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
 961     { return find_first_of(STRCONV(sz), nStart); }
 962
 963 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
 964                                size_t n) const
 965     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
 966 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
 967     { return find_last_of(STRCONV(sz), nStart); }
 968 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
 969                               size_t n) const
 970     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
 971 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
 972     { return find_first_not_of(STRCONV(sz), nStart); }
 973 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
 974                                    size_t n) const
 975     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
 976 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
 977     { return find_last_not_of(STRCONV(sz), nStart); }
 978 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
 979                                   size_t n) const
 980     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
 981
 982 #undef wxOtherCharType
 983 #undef STRCONV
 984
 985 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 986
 987 // ===========================================================================
 988 // other common string functions
 989 // ===========================================================================
 990
 991 int wxString::CmpNoCase(const wxString& s) const
 992 {
 993     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
 994
 995     const_iterator i1 = begin();
 996     const_iterator end1 = end();
 997     const_iterator i2 = s.begin();
 998     const_iterator end2 = s.end();
 999
1000     for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1001     {
1002         wxUniChar lower1 = (wxChar)wxTolower(*i1);
1003         wxUniChar lower2 = (wxChar)wxTolower(*i2);
1004         if ( lower1 != lower2 )
1005             return lower1 < lower2 ? -1 : 1;
1006     }
1007
1008     size_t len1 = length();
1009     size_t len2 = s.length();
1010
1011     if ( len1 < len2 )
1012         return -1;
1013     else if ( len1 > len2 )
1014         return 1;
1015     return 0;
1016 }
1017
1018
1019 #if wxUSE_UNICODE
1020
1021 #ifdef __MWERKS__
1022 #ifndef __SCHAR_MAX__
1023 #define __SCHAR_MAX__ 127
1024 #endif
1025 #endif
1026
1027 wxString wxString::FromAscii(const char *ascii, size_t len)
1028 {
1029     if (!ascii || len == 0)
1030        return wxEmptyString;
1031
1032     wxString res;
1033
1034     {
1035         wxStringInternalBuffer buf(res, len);
1036         wxStringCharType *dest = buf;
1037
1038         for ( ; len > 0; --len )
1039         {
1040             unsigned char c = (unsigned char)*ascii++;
1041             wxASSERT_MSG( c < 0x80,
1042                           _T("Non-ASCII value passed to FromAscii().") );
1043
1044             *dest++ = (wchar_t)c;
1045         }
1046     }
1047
1048     return res;
1049 }
1050
1051 wxString wxString::FromAscii(const char *ascii)
1052 {
1053     return FromAscii(ascii, wxStrlen(ascii));
1054 }
1055
1056 wxString wxString::FromAscii(char ascii)
1057 {
1058     // What do we do with '\0' ?
1059
1060     unsigned char c = (unsigned char)ascii;
1061
1062     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1063
1064     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1065     return wxString(wxUniChar((wchar_t)c));
1066 }
1067
1068 const wxCharBuffer wxString::ToAscii() const
1069 {
1070     // this will allocate enough space for the terminating NUL too
1071     wxCharBuffer buffer(length());
1072     char *dest = buffer.data();
1073
1074     for ( const_iterator i = begin(); i != end(); ++i )
1075     {
1076         wxUniChar c(*i);
1077         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1078         *dest++ = c.IsAscii() ? (char)c : '_';
1079
1080         // the output string can't have embedded NULs anyhow, so we can safely
1081         // stop at first of them even if we do have any
1082         if ( !c )
1083             break;
1084     }
1085
1086     return buffer;
1087 }
1088
1089 #endif // wxUSE_UNICODE
1090
1091 // extract string of length nCount starting at nFirst
1092 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1093 {
1094     size_t nLen = length();
1095
1096     // default value of nCount is npos and means "till the end"
1097     if ( nCount == npos )
1098     {
1099         nCount = nLen - nFirst;
1100     }
1101
1102     // out-of-bounds requests return sensible things
1103     if ( nFirst + nCount > nLen )
1104     {
1105         nCount = nLen - nFirst;
1106     }
1107
1108     if ( nFirst > nLen )
1109     {
1110         // AllocCopy() will return empty string
1111         return wxEmptyString;
1112     }
1113
1114     wxString dest(*this, nFirst, nCount);
1115     if ( dest.length() != nCount )
1116     {
1117         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1118     }
1119
1120     return dest;
1121 }
1122
1123 // check that the string starts with prefix and return the rest of the string
1124 // in the provided pointer if it is not NULL, otherwise return false
1125 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1126 {
1127     if ( compare(0, prefix.length(), prefix) != 0 )
1128         return false;
1129
1130     if ( rest )
1131     {
1132         // put the rest of the string into provided pointer
1133         rest->assign(*this, prefix.length(), npos);
1134     }
1135
1136     return true;
1137 }
1138
1139
1140 // check that the string ends with suffix and return the rest of it in the
1141 // provided pointer if it is not NULL, otherwise return false
1142 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1143 {
1144     int start = length() - suffix.length();
1145
1146     if ( start < 0 || compare(start, npos, suffix) != 0 )
1147         return false;
1148
1149     if ( rest )
1150     {
1151         // put the rest of the string into provided pointer
1152         rest->assign(*this, 0, start);
1153     }
1154
1155     return true;
1156 }
1157
1158
1159 // extract nCount last (rightmost) characters
1160 wxString wxString::Right(size_t nCount) const
1161 {
1162   if ( nCount > length() )
1163     nCount = length();
1164
1165   wxString dest(*this, length() - nCount, nCount);
1166   if ( dest.length() != nCount ) {
1167     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1168   }
1169   return dest;
1170 }
1171
1172 // get all characters after the last occurence of ch
1173 // (returns the whole string if ch not found)
1174 wxString wxString::AfterLast(wxUniChar ch) const
1175 {
1176   wxString str;
1177   int iPos = Find(ch, true);
1178   if ( iPos == wxNOT_FOUND )
1179     str = *this;
1180   else
1181     str = wx_str() + iPos + 1;
1182
1183   return str;
1184 }
1185
1186 // extract nCount first (leftmost) characters
1187 wxString wxString::Left(size_t nCount) const
1188 {
1189   if ( nCount > length() )
1190     nCount = length();
1191
1192   wxString dest(*this, 0, nCount);
1193   if ( dest.length() != nCount ) {
1194     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1195   }
1196   return dest;
1197 }
1198
1199 // get all characters before the first occurence of ch
1200 // (returns the whole string if ch not found)
1201 wxString wxString::BeforeFirst(wxUniChar ch) const
1202 {
1203   int iPos = Find(ch);
1204   if ( iPos == wxNOT_FOUND ) iPos = length();
1205   return wxString(*this, 0, iPos);
1206 }
1207
1208 /// get all characters before the last occurence of ch
1209 /// (returns empty string if ch not found)
1210 wxString wxString::BeforeLast(wxUniChar ch) const
1211 {
1212   wxString str;
1213   int iPos = Find(ch, true);
1214   if ( iPos != wxNOT_FOUND && iPos != 0 )
1215     str = wxString(c_str(), iPos);
1216
1217   return str;
1218 }
1219
1220 /// get all characters after the first occurence of ch
1221 /// (returns empty string if ch not found)
1222 wxString wxString::AfterFirst(wxUniChar ch) const
1223 {
1224   wxString str;
1225   int iPos = Find(ch);
1226   if ( iPos != wxNOT_FOUND )
1227     str = wx_str() + iPos + 1;
1228
1229   return str;
1230 }
1231
1232 // replace first (or all) occurences of some substring with another one
1233 size_t wxString::Replace(const wxString& strOld,
1234                          const wxString& strNew, bool bReplaceAll)
1235 {
1236     // if we tried to replace an empty string we'd enter an infinite loop below
1237     wxCHECK_MSG( !strOld.empty(), 0,
1238                  _T("wxString::Replace(): invalid parameter") );
1239
1240     size_t uiCount = 0;   // count of replacements made
1241
1242     // optimize the special common case: replacement of one character by
1243     // another one (in UTF-8 case we can only do this for ASCII characters)
1244     //
1245     // benchmarks show that this special version is around 3 times faster
1246     // (depending on the proportion of matching characters and UTF-8/wchar_t
1247     // build)
1248     if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1249     {
1250         const wxStringCharType chOld = strOld.m_impl[0],
1251                                chNew = strNew.m_impl[0];
1252
1253         // this loop is the simplified version of the one below
1254         for ( size_t pos = 0; ; )
1255         {
1256             pos = m_impl.find(chOld, pos);
1257             if ( pos == npos )
1258                 break;
1259
1260             m_impl[pos++] = chNew;
1261
1262             uiCount++;
1263
1264             if ( !bReplaceAll )
1265                 break;
1266         }
1267     }
1268     else // general case
1269     {
1270         const size_t uiOldLen = strOld.m_impl.length();
1271         const size_t uiNewLen = strNew.m_impl.length();
1272
1273         for ( size_t pos = 0; ; )
1274         {
1275             pos = m_impl.find(strOld.m_impl, pos);
1276             if ( pos == npos )
1277                 break;
1278
1279             // replace this occurrence of the old string with the new one
1280             m_impl.replace(pos, uiOldLen, strNew.m_impl);
1281
1282             // move up pos past the string that was replaced
1283             pos += uiNewLen;
1284
1285             // increase replace count
1286             uiCount++;
1287
1288             // stop after the first one?
1289             if ( !bReplaceAll )
1290                 break;
1291         }
1292     }
1293
1294     return uiCount;
1295 }
1296
1297 bool wxString::IsAscii() const
1298 {
1299     for ( const_iterator i = begin(); i != end(); ++i )
1300     {
1301         if ( !(*i).IsAscii() )
1302             return false;
1303     }
1304
1305     return true;
1306 }
1307
1308 bool wxString::IsWord() const
1309 {
1310     for ( const_iterator i = begin(); i != end(); ++i )
1311     {
1312         if ( !wxIsalpha(*i) )
1313             return false;
1314     }
1315
1316     return true;
1317 }
1318
1319 bool wxString::IsNumber() const
1320 {
1321     if ( empty() )
1322         return true;
1323
1324     const_iterator i = begin();
1325
1326     if ( *i == _T('-') || *i == _T('+') )
1327         ++i;
1328
1329     for ( ; i != end(); ++i )
1330     {
1331         if ( !wxIsdigit(*i) )
1332             return false;
1333     }
1334
1335     return true;
1336 }
1337
1338 wxString wxString::Strip(stripType w) const
1339 {
1340     wxString s = *this;
1341     if ( w & leading ) s.Trim(false);
1342     if ( w & trailing ) s.Trim(true);
1343     return s;
1344 }
1345
1346 // ---------------------------------------------------------------------------
1347 // case conversion
1348 // ---------------------------------------------------------------------------
1349
1350 wxString& wxString::MakeUpper()
1351 {
1352   for ( iterator it = begin(), en = end(); it != en; ++it )
1353     *it = (wxChar)wxToupper(*it);
1354
1355   return *this;
1356 }
1357
1358 wxString& wxString::MakeLower()
1359 {
1360   for ( iterator it = begin(), en = end(); it != en; ++it )
1361     *it = (wxChar)wxTolower(*it);
1362
1363   return *this;
1364 }
1365
1366 wxString& wxString::MakeCapitalized()
1367 {
1368     const iterator en = end();
1369     iterator it = begin();
1370     if ( it != en )
1371     {
1372         *it = (wxChar)wxToupper(*it);
1373         for ( ++it; it != en; ++it )
1374             *it = (wxChar)wxTolower(*it);
1375     }
1376
1377     return *this;
1378 }
1379
1380 // ---------------------------------------------------------------------------
1381 // trimming and padding
1382 // ---------------------------------------------------------------------------
1383
1384 // some compilers (VC++ 6.0 not to name them) return true for a call to
1385 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1386 // to live with this by checking that the character is a 7 bit one - even if
1387 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1388 // space-like symbols somewhere except in the first 128 chars), it is arguably
1389 // still better than trimming away accented letters
1390 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1391
1392 // trims spaces (in the sense of isspace) from left or right side
1393 wxString& wxString::Trim(bool bFromRight)
1394 {
1395     // first check if we're going to modify the string at all
1396     if ( !empty() &&
1397          (
1398           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1399           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1400          )
1401        )
1402     {
1403         if ( bFromRight )
1404         {
1405             // find last non-space character
1406             reverse_iterator psz = rbegin();
1407             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1408                 ++psz;
1409
1410             // truncate at trailing space start
1411             erase(psz.base(), end());
1412         }
1413         else
1414         {
1415             // find first non-space character
1416             iterator psz = begin();
1417             while ( (psz != end()) && wxSafeIsspace(*psz) )
1418                 ++psz;
1419
1420             // fix up data and length
1421             erase(begin(), psz);
1422         }
1423     }
1424
1425     return *this;
1426 }
1427
1428 // adds nCount characters chPad to the string from either side
1429 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1430 {
1431     wxString s(chPad, nCount);
1432
1433     if ( bFromRight )
1434         *this += s;
1435     else
1436     {
1437         s += *this;
1438         swap(s);
1439     }
1440
1441     return *this;
1442 }
1443
1444 // truncate the string
1445 wxString& wxString::Truncate(size_t uiLen)
1446 {
1447     if ( uiLen < length() )
1448     {
1449         erase(begin() + uiLen, end());
1450     }
1451     //else: nothing to do, string is already short enough
1452
1453     return *this;
1454 }
1455
1456 // ---------------------------------------------------------------------------
1457 // finding (return wxNOT_FOUND if not found and index otherwise)
1458 // ---------------------------------------------------------------------------
1459
1460 // find a character
1461 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1462 {
1463     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1464
1465     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1466 }
1467
1468 // ----------------------------------------------------------------------------
1469 // conversion to numbers
1470 // ----------------------------------------------------------------------------
1471
1472 // The implementation of all the functions below is exactly the same so factor
1473 // it out. Note that number extraction works correctly on UTF-8 strings, so
1474 // we can use wxStringCharType and wx_str() for maximum efficiency.
1475
1476 #ifndef __WXWINCE__
1477     #define DO_IF_NOT_WINCE(x) x
1478 #else
1479     #define DO_IF_NOT_WINCE(x)
1480 #endif
1481
1482 #define WX_STRING_TO_INT_TYPE(out, base, func, T)                           \
1483     wxCHECK_MSG( out, false, _T("NULL output pointer") );                   \
1484     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );  \
1485                                                                             \
1486     DO_IF_NOT_WINCE( errno = 0; )                                           \
1487                                                                             \
1488     const wxStringCharType *start = wx_str();                               \
1489     wxStringCharType *end;                                                  \
1490     T val = func(start, &end, base);                                        \
1491                                                                             \
1492     /* return true only if scan was stopped by the terminating NUL and */   \
1493     /* if the string was not empty to start with and no under/overflow */   \
1494     /* occurred: */                                                         \
1495     if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )         \
1496         return false;                                                       \
1497     *out = val;                                                             \
1498     return true
1499
1500 bool wxString::ToLong(long *pVal, int base) const
1501 {
1502     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
1503 }
1504
1505 bool wxString::ToULong(unsigned long *pVal, int base) const
1506 {
1507     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
1508 }
1509
1510 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1511 {
1512     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
1513 }
1514
1515 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1516 {
1517     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
1518 }
1519
1520 bool wxString::ToDouble(double *pVal) const
1521 {
1522     wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
1523
1524     DO_IF_NOT_WINCE( errno = 0; )
1525
1526     const wxChar *start = c_str();
1527     wxChar *end;
1528     double val = wxStrtod(start, &end);
1529
1530     // return true only if scan was stopped by the terminating NUL and if the
1531     // string was not empty to start with and no under/overflow occurred
1532     if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1533         return false;
1534
1535     *pVal = val;
1536
1537     return true;
1538 }
1539
1540 // ---------------------------------------------------------------------------
1541 // formatted output
1542 // ---------------------------------------------------------------------------
1543
1544 #if !wxUSE_UTF8_LOCALE_ONLY
1545 /* static */
1546 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1547 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1548 #else
1549 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1550 #endif
1551 {
1552     va_list argptr;
1553     va_start(argptr, format);
1554
1555     wxString s;
1556     s.PrintfV(format, argptr);
1557
1558     va_end(argptr);
1559
1560     return s;
1561 }
1562 #endif // !wxUSE_UTF8_LOCALE_ONLY
1563
1564 #if wxUSE_UNICODE_UTF8
1565 /* static */
1566 wxString wxString::DoFormatUtf8(const char *format, ...)
1567 {
1568     va_list argptr;
1569     va_start(argptr, format);
1570
1571     wxString s;
1572     s.PrintfV(format, argptr);
1573
1574     va_end(argptr);
1575
1576     return s;
1577 }
1578 #endif // wxUSE_UNICODE_UTF8
1579
1580 /* static */
1581 wxString wxString::FormatV(const wxString& format, va_list argptr)
1582 {
1583     wxString s;
1584     s.PrintfV(format, argptr);
1585     return s;
1586 }
1587
1588 #if !wxUSE_UTF8_LOCALE_ONLY
1589 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1590 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1591 #else
1592 int wxString::DoPrintfWchar(const wxChar *format, ...)
1593 #endif
1594 {
1595     va_list argptr;
1596     va_start(argptr, format);
1597
1598 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1599     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1600     // because it's the only cast that works safely for downcasting when
1601     // multiple inheritance is used:
1602     wxString *str = static_cast<wxString*>(this);
1603 #else
1604     wxString *str = this;
1605 #endif
1606
1607     int iLen = str->PrintfV(format, argptr);
1608
1609     va_end(argptr);
1610
1611     return iLen;
1612 }
1613 #endif // !wxUSE_UTF8_LOCALE_ONLY
1614
1615 #if wxUSE_UNICODE_UTF8
1616 int wxString::DoPrintfUtf8(const char *format, ...)
1617 {
1618     va_list argptr;
1619     va_start(argptr, format);
1620
1621     int iLen = PrintfV(format, argptr);
1622
1623     va_end(argptr);
1624
1625     return iLen;
1626 }
1627 #endif // wxUSE_UNICODE_UTF8
1628
1629 /*
1630     Uses wxVsnprintf and places the result into the this string.
1631
1632     In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1633     it is vswprintf.  Due to a discrepancy between vsnprintf and vswprintf in
1634     the ISO C99 (and thus SUSv3) standard the return value for the case of
1635     an undersized buffer is inconsistent.  For conforming vsnprintf
1636     implementations the function must return the number of characters that
1637     would have been printed had the buffer been large enough.  For conforming
1638     vswprintf implementations the function must return a negative number
1639     and set errno.
1640
1641     What vswprintf sets errno to is undefined but Darwin seems to set it to
1642     EOVERFLOW.  The only expected errno are EILSEQ and EINVAL.  Both of
1643     those are defined in the standard and backed up by several conformance
1644     statements.  Note that ENOMEM mentioned in the manual page does not
1645     apply to swprintf, only wprintf and fwprintf.
1646
1647     Official manual page:
1648     http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1649
1650     Some conformance statements (AIX, Solaris):
1651     http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1652     http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1653
1654     Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1655     EILSEQ and EINVAL are specifically defined to mean the error is other than
1656     an undersized buffer and no other errno are defined we treat those two
1657     as meaning hard errors and everything else gets the old behavior which
1658     is to keep looping and increasing buffer size until the function succeeds.
1659
1660     In practice it's impossible to determine before compilation which behavior
1661     may be used.  The vswprintf function may have vsnprintf-like behavior or
1662     vice-versa.  Behavior detected on one release can theoretically change
1663     with an updated release.  Not to mention that configure testing for it
1664     would require the test to be run on the host system, not the build system
1665     which makes cross compilation difficult. Therefore, we make no assumptions
1666     about behavior and try our best to handle every known case, including the
1667     case where wxVsnprintf returns a negative number and fails to set errno.
1668
1669     There is yet one more non-standard implementation and that is our own.
1670     Fortunately, that can be detected at compile-time.
1671
1672     On top of all that, ISO C99 explicitly defines snprintf to write a null
1673     character to the last position of the specified buffer.  That would be at
1674     at the given buffer size minus 1.  It is supposed to do this even if it
1675     turns out that the buffer is sized too small.
1676
1677     Darwin (tested on 10.5) follows the C99 behavior exactly.
1678
1679     Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1680     errno even when it fails.  However, it only seems to ever fail due
1681     to an undersized buffer.
1682 */
1683 #if wxUSE_UNICODE_UTF8
1684 template<typename BufferType>
1685 #else
1686 // we only need one version in non-UTF8 builds and at least two Windows
1687 // compilers have problems with this function template, so use just one
1688 // normal function here
1689 #endif
1690 static int DoStringPrintfV(wxString& str,
1691                            const wxString& format, va_list argptr)
1692 {
1693     int size = 1024;
1694
1695     for ( ;; )
1696     {
1697 #if wxUSE_UNICODE_UTF8
1698         BufferType tmp(str, size + 1);
1699         typename BufferType::CharType *buf = tmp;
1700 #else
1701         wxStringBuffer tmp(str, size + 1);
1702         wxChar *buf = tmp;
1703 #endif
1704
1705         if ( !buf )
1706         {
1707             // out of memory
1708
1709             // in UTF-8 build, leaving uninitialized junk in the buffer
1710             // could result in invalid non-empty UTF-8 string, so just
1711             // reset the string to empty on failure:
1712             buf[0] = '\0';
1713             return -1;
1714         }
1715
1716         // wxVsnprintf() may modify the original arg pointer, so pass it
1717         // only a copy
1718         va_list argptrcopy;
1719         wxVaCopy(argptrcopy, argptr);
1720
1721 #ifndef __WXWINCE__
1722         // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1723         errno = 0;
1724 #endif
1725         int len = wxVsnprintf(buf, size, format, argptrcopy);
1726         va_end(argptrcopy);
1727
1728         // some implementations of vsnprintf() don't NUL terminate
1729         // the string if there is not enough space for it so
1730         // always do it manually
1731         // FIXME: This really seems to be the wrong and would be an off-by-one
1732         // bug except the code above allocates an extra character.
1733         buf[size] = _T('\0');
1734
1735         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1736         // total number of characters which would have been written if the
1737         // buffer were large enough (newer standards such as Unix98)
1738         if ( len < 0 )
1739         {
1740             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1741             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1742             //     is true if *both* of them use our own implementation,
1743             //     otherwise we can't be sure
1744 #if wxUSE_WXVSNPRINTF
1745             // we know that our own implementation of wxVsnprintf() returns -1
1746             // only for a format error - thus there's something wrong with
1747             // the user's format string
1748             buf[0] = '\0';
1749             return -1;
1750 #else // possibly using system version
1751             // assume it only returns error if there is not enough space, but
1752             // as we don't know how much we need, double the current size of
1753             // the buffer
1754 #ifndef __WXWINCE__
1755             if( (errno == EILSEQ) || (errno == EINVAL) )
1756             // If errno was set to one of the two well-known hard errors
1757             // then fail immediately to avoid an infinite loop.
1758                 return -1;
1759             else
1760 #endif // __WXWINCE__
1761             // still not enough, as we don't know how much we need, double the
1762             // current size of the buffer
1763                 size *= 2;
1764 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1765         }
1766         else if ( len >= size )
1767         {
1768 #if wxUSE_WXVSNPRINTF
1769             // we know that our own implementation of wxVsnprintf() returns
1770             // size+1 when there's not enough space but that's not the size
1771             // of the required buffer!
1772             size *= 2;      // so we just double the current size of the buffer
1773 #else
1774             // some vsnprintf() implementations NUL-terminate the buffer and
1775             // some don't in len == size case, to be safe always add 1
1776             // FIXME: I don't quite understand this comment.  The vsnprintf
1777             // function is specifically defined to return the number of
1778             // characters printed not including the null terminator.
1779             // So OF COURSE you need to add 1 to get the right buffer size.
1780             // The following line is definitely correct, no question.
1781             size = len + 1;
1782 #endif
1783         }
1784         else // ok, there was enough space
1785         {
1786             break;
1787         }
1788     }
1789
1790     // we could have overshot
1791     str.Shrink();
1792
1793     return str.length();
1794 }
1795
1796 int wxString::PrintfV(const wxString& format, va_list argptr)
1797 {
1798 #if wxUSE_UNICODE_UTF8
1799     #if wxUSE_STL_BASED_WXSTRING
1800         typedef wxStringTypeBuffer<char> Utf8Buffer;
1801     #else
1802         typedef wxStringInternalBuffer Utf8Buffer;
1803     #endif
1804 #endif
1805
1806 #if wxUSE_UTF8_LOCALE_ONLY
1807     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1808 #else
1809     #if wxUSE_UNICODE_UTF8
1810     if ( wxLocaleIsUtf8 )
1811         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1812     else
1813         // wxChar* version
1814         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1815     #else
1816         return DoStringPrintfV(*this, format, argptr);
1817     #endif // UTF8/WCHAR
1818 #endif
1819 }
1820
1821 // ----------------------------------------------------------------------------
1822 // misc other operations
1823 // ----------------------------------------------------------------------------
1824
1825 // returns true if the string matches the pattern which may contain '*' and
1826 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1827 // of them)
1828 bool wxString::Matches(const wxString& mask) const
1829 {
1830     // I disable this code as it doesn't seem to be faster (in fact, it seems
1831     // to be much slower) than the old, hand-written code below and using it
1832     // here requires always linking with libregex even if the user code doesn't
1833     // use it
1834 #if 0 // wxUSE_REGEX
1835     // first translate the shell-like mask into a regex
1836     wxString pattern;
1837     pattern.reserve(wxStrlen(pszMask));
1838
1839     pattern += _T('^');
1840     while ( *pszMask )
1841     {
1842         switch ( *pszMask )
1843         {
1844             case _T('?'):
1845                 pattern += _T('.');
1846                 break;
1847
1848             case _T('*'):
1849                 pattern += _T(".*");
1850                 break;
1851
1852             case _T('^'):
1853             case _T('.'):
1854             case _T('$'):
1855             case _T('('):
1856             case _T(')'):
1857             case _T('|'):
1858             case _T('+'):
1859             case _T('\\'):
1860                 // these characters are special in a RE, quote them
1861                 // (however note that we don't quote '[' and ']' to allow
1862                 // using them for Unix shell like matching)
1863                 pattern += _T('\\');
1864                 // fall through
1865
1866             default:
1867                 pattern += *pszMask;
1868         }
1869
1870         pszMask++;
1871     }
1872     pattern += _T('$');
1873
1874     // and now use it
1875     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1876 #else // !wxUSE_REGEX
1877   // TODO: this is, of course, awfully inefficient...
1878
1879   // FIXME-UTF8: implement using iterators, remove #if
1880 #if wxUSE_UNICODE_UTF8
1881   wxWCharBuffer maskBuf = mask.wc_str();
1882   wxWCharBuffer txtBuf = wc_str();
1883   const wxChar *pszMask = maskBuf.data();
1884   const wxChar *pszTxt = txtBuf.data();
1885 #else
1886   const wxChar *pszMask = mask.wx_str();
1887   // the char currently being checked
1888   const wxChar *pszTxt = wx_str();
1889 #endif
1890
1891   // the last location where '*' matched
1892   const wxChar *pszLastStarInText = NULL;
1893   const wxChar *pszLastStarInMask = NULL;
1894
1895 match:
1896   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1897     switch ( *pszMask ) {
1898       case wxT('?'):
1899         if ( *pszTxt == wxT('\0') )
1900           return false;
1901
1902         // pszTxt and pszMask will be incremented in the loop statement
1903
1904         break;
1905
1906       case wxT('*'):
1907         {
1908           // remember where we started to be able to backtrack later
1909           pszLastStarInText = pszTxt;
1910           pszLastStarInMask = pszMask;
1911
1912           // ignore special chars immediately following this one
1913           // (should this be an error?)
1914           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1915             pszMask++;
1916
1917           // if there is nothing more, match
1918           if ( *pszMask == wxT('\0') )
1919             return true;
1920
1921           // are there any other metacharacters in the mask?
1922           size_t uiLenMask;
1923           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1924
1925           if ( pEndMask != NULL ) {
1926             // we have to match the string between two metachars
1927             uiLenMask = pEndMask - pszMask;
1928           }
1929           else {
1930             // we have to match the remainder of the string
1931             uiLenMask = wxStrlen(pszMask);
1932           }
1933
1934           wxString strToMatch(pszMask, uiLenMask);
1935           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1936           if ( pMatch == NULL )
1937             return false;
1938
1939           // -1 to compensate "++" in the loop
1940           pszTxt = pMatch + uiLenMask - 1;
1941           pszMask += uiLenMask - 1;
1942         }
1943         break;
1944
1945       default:
1946         if ( *pszMask != *pszTxt )
1947           return false;
1948         break;
1949     }
1950   }
1951
1952   // match only if nothing left
1953   if ( *pszTxt == wxT('\0') )
1954     return true;
1955
1956   // if we failed to match, backtrack if we can
1957   if ( pszLastStarInText ) {
1958     pszTxt = pszLastStarInText + 1;
1959     pszMask = pszLastStarInMask;
1960
1961     pszLastStarInText = NULL;
1962
1963     // don't bother resetting pszLastStarInMask, it's unnecessary
1964
1965     goto match;
1966   }
1967
1968   return false;
1969 #endif // wxUSE_REGEX/!wxUSE_REGEX
1970 }
1971
1972 // Count the number of chars
1973 int wxString::Freq(wxUniChar ch) const
1974 {
1975     int count = 0;
1976     for ( const_iterator i = begin(); i != end(); ++i )
1977     {
1978         if ( *i == ch )
1979             count ++;
1980     }
1981     return count;
1982 }
1983
1984 // ----------------------------------------------------------------------------
1985 // wxUTF8StringBuffer
1986 // ----------------------------------------------------------------------------
1987
1988 #if wxUSE_UNICODE_WCHAR
1989 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1990 {
1991     wxMBConvStrictUTF8 conv;
1992     size_t wlen = conv.ToWChar(NULL, 0, m_buf);
1993     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1994
1995     wxStringInternalBuffer wbuf(m_str, wlen);
1996     conv.ToWChar(wbuf, wlen, m_buf);
1997 }
1998
1999 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2000 {
2001     wxCHECK_RET(m_lenSet, "length not set");
2002
2003     wxMBConvStrictUTF8 conv;
2004     size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
2005     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2006
2007     wxStringInternalBufferLength wbuf(m_str, wlen);
2008     conv.ToWChar(wbuf, wlen, m_buf, m_len);
2009     wbuf.SetLength(wlen);
2010 }
2011 #endif // wxUSE_UNICODE_WCHAR
2012
2013 // ----------------------------------------------------------------------------
2014 // wxCharBufferType<T>
2015 // ----------------------------------------------------------------------------
2016
2017 template<>
2018 wxCharTypeBuffer<char>::Data
2019 wxCharTypeBuffer<char>::NullData(NULL);
2020
2021 template<>
2022 wxCharTypeBuffer<wchar_t>::Data
2023 wxCharTypeBuffer<wchar_t>::NullData(NULL);