src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26 #endif
  27
  28 #include <ctype.h>
  29
  30 #ifndef __WXWINCE__
  31     #include <errno.h>
  32 #endif
  33
  34 #include <string.h>
  35 #include <stdlib.h>
  36
  37 #ifdef __SALFORDC__
  38     #include <clib.h>
  39 #endif
  40
  41 #include "wx/hashmap.h"
  42
  43 // string handling functions used by wxString:
  44 #if wxUSE_UNICODE_UTF8
  45     #define wxStringMemcpy   memcpy
  46     #define wxStringMemcmp   memcmp
  47     #define wxStringMemchr   memchr
  48     #define wxStringStrlen   strlen
  49 #else
  50     #define wxStringMemcpy   wxTmemcpy
  51     #define wxStringMemcmp   wxTmemcmp
  52     #define wxStringMemchr   wxTmemchr
  53     #define wxStringStrlen   wxStrlen
  54 #endif
  55
  56
  57 // ---------------------------------------------------------------------------
  58 // static class variables definition
  59 // ---------------------------------------------------------------------------
  60
  61 //According to STL _must_ be a -1 size_t
  62 const size_t wxString::npos = (size_t) -1;
  63
  64 // ----------------------------------------------------------------------------
  65 // global functions
  66 // ----------------------------------------------------------------------------
  67
  68 #if wxUSE_STD_IOSTREAM
  69
  70 #include <iostream>
  71
  72 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
  73 {
  74 // FIXME-UTF8: always, not only if wxUSE_UNICODE
  75 #if wxUSE_UNICODE && !defined(__BORLANDC__)
  76     return os << (const wchar_t*)str.AsWCharBuf();
  77 #else
  78     return os << (const char*)str.AsCharBuf();
  79 #endif
  80 }
  81
  82 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
  83 {
  84     return os << str.c_str();
  85 }
  86
  87 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
  88 {
  89     return os << str.data();
  90 }
  91
  92 #ifndef __BORLANDC__
  93 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
  94 {
  95     return os << str.data();
  96 }
  97 #endif
  98
  99 #endif // wxUSE_STD_IOSTREAM
 100
 101 // ===========================================================================
 102 // wxString class core
 103 // ===========================================================================
 104
 105 #if wxUSE_UNICODE_UTF8
 106
 107 void wxString::PosLenToImpl(size_t pos, size_t len,
 108                             size_t *implPos, size_t *implLen) const
 109 {
 110     if ( pos == npos )
 111         *implPos = npos;
 112     else
 113     {
 114         const_iterator i = begin() + pos;
 115         *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
 116         if ( len == npos )
 117             *implLen = npos;
 118         else
 119         {
 120             // too large length is interpreted as "to the end of the string"
 121             // FIXME-UTF8: verify this is the case in std::string, assert
 122             // otherwise
 123             if ( pos + len > length() )
 124                 len = length() - pos;
 125
 126             *implLen = (i + len).impl() - i.impl();
 127         }
 128     }
 129 }
 130
 131 #endif // wxUSE_UNICODE_UTF8
 132
 133 // ----------------------------------------------------------------------------
 134 // wxCStrData converted strings caching
 135 // ----------------------------------------------------------------------------
 136
 137 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 138 //             string objects; re-enable after fixing this bug and benchmarking
 139 //             performance to see if using a hash is a good idea at all
 140 #if 0
 141
 142 // For backward compatibility reasons, it must be possible to assign the value
 143 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 144 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 145 // because the memory would be freed immediately, but it has to be valid as long
 146 // as the string is not modified, so that code like this still works:
 147 //
 148 // const wxChar *s = str.c_str();
 149 // while ( s ) { ... }
 150
 151 // FIXME-UTF8: not thread safe!
 152 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 153 //             destroyed, but we should do it when the string is modified, to
 154 //             keep memory usage down
 155 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 156 //             invalidated the cache on every change, we could keep the previous
 157 //             conversion
 158 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 159 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 160
 161 template<typename T>
 162 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 163 {
 164     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 165     if ( i != hash.end() )
 166     {
 167         free(i->second);
 168         hash.erase(i);
 169     }
 170 }
 171
 172 #if wxUSE_UNICODE
 173 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 174 //     so we have to use wxString* here and const-cast when used
 175 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 176                     wxStringCharConversionCache);
 177 static wxStringCharConversionCache gs_stringsCharCache;
 178
 179 const char* wxCStrData::AsChar() const
 180 {
 181     // remove previously cache value, if any (see FIXMEs above):
 182     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 183
 184     // convert the string and keep it:
 185     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 186         m_str->mb_str().release();
 187
 188     return s + m_offset;
 189 }
 190 #endif // wxUSE_UNICODE
 191
 192 #if !wxUSE_UNICODE_WCHAR
 193 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 194                     wxStringWCharConversionCache);
 195 static wxStringWCharConversionCache gs_stringsWCharCache;
 196
 197 const wchar_t* wxCStrData::AsWChar() const
 198 {
 199     // remove previously cache value, if any (see FIXMEs above):
 200     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 201
 202     // convert the string and keep it:
 203     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 204         m_str->wc_str().release();
 205
 206     return s + m_offset;
 207 }
 208 #endif // !wxUSE_UNICODE_WCHAR
 209
 210 wxString::~wxString()
 211 {
 212 #if wxUSE_UNICODE
 213     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 214     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 215 #endif
 216 #if !wxUSE_UNICODE_WCHAR
 217     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 218 #endif
 219 }
 220 #endif
 221
 222 #if wxUSE_UNICODE
 223 const char* wxCStrData::AsChar() const
 224 {
 225     wxString *str = wxConstCast(m_str, wxString);
 226
 227     // convert the string:
 228     wxCharBuffer buf(str->mb_str());
 229
 230     // FIXME-UTF8: do the conversion in-place in the existing buffer
 231     if ( str->m_convertedToChar &&
 232          strlen(buf) == strlen(str->m_convertedToChar) )
 233     {
 234         // keep the same buffer for as long as possible, so that several calls
 235         // to c_str() in a row still work:
 236         strcpy(str->m_convertedToChar, buf);
 237     }
 238     else
 239     {
 240         str->m_convertedToChar = buf.release();
 241     }
 242
 243     // and keep it:
 244     return str->m_convertedToChar + m_offset;
 245 }
 246 #endif // wxUSE_UNICODE
 247
 248 #if !wxUSE_UNICODE_WCHAR
 249 const wchar_t* wxCStrData::AsWChar() const
 250 {
 251     wxString *str = wxConstCast(m_str, wxString);
 252
 253     // convert the string:
 254     wxWCharBuffer buf(str->wc_str());
 255
 256     // FIXME-UTF8: do the conversion in-place in the existing buffer
 257     if ( str->m_convertedToWChar &&
 258          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 259     {
 260         // keep the same buffer for as long as possible, so that several calls
 261         // to c_str() in a row still work:
 262         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 263     }
 264     else
 265     {
 266         str->m_convertedToWChar = buf.release();
 267     }
 268
 269     // and keep it:
 270     return str->m_convertedToWChar + m_offset;
 271 }
 272 #endif // !wxUSE_UNICODE_WCHAR
 273
 274 // ===========================================================================
 275 // wxString class core
 276 // ===========================================================================
 277
 278 // ---------------------------------------------------------------------------
 279 // construction and conversion
 280 // ---------------------------------------------------------------------------
 281
 282 #if wxUSE_UNICODE_WCHAR
 283 /* static */
 284 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 285                                                const wxMBConv& conv)
 286 {
 287     // anything to do?
 288     if ( !psz || nLength == 0 )
 289         return SubstrBufFromMB(L"", 0);
 290
 291     if ( nLength == npos )
 292         nLength = wxNO_LEN;
 293
 294     size_t wcLen;
 295     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 296     if ( !wcLen )
 297         return SubstrBufFromMB(_T(""), 0);
 298     else
 299         return SubstrBufFromMB(wcBuf, wcLen);
 300 }
 301 #endif // wxUSE_UNICODE_WCHAR
 302
 303 #if wxUSE_UNICODE_UTF8
 304 /* static */
 305 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 306                                                const wxMBConv& conv)
 307 {
 308     // FIXME-UTF8: return as-is without copying under UTF8 locale, return
 309     //             converted string under other locales - needs wxCharBuffer
 310     //             changes
 311
 312     // anything to do?
 313     if ( !psz || nLength == 0 )
 314         return SubstrBufFromMB("", 0);
 315
 316     if ( nLength == npos )
 317         nLength = wxNO_LEN;
 318
 319     // first convert to wide string:
 320     size_t wcLen;
 321     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 322     if ( !wcLen )
 323         return SubstrBufFromMB("", 0);
 324
 325     // and then to UTF-8:
 326     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxConvUTF8));
 327     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 328     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 329
 330     return buf;
 331 }
 332 #endif // wxUSE_UNICODE_UTF8
 333
 334 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 335 /* static */
 336 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 337                                                const wxMBConv& conv)
 338 {
 339     // anything to do?
 340     if ( !pwz || nLength == 0 )
 341         return SubstrBufFromWC("", 0);
 342
 343     if ( nLength == npos )
 344         nLength = wxNO_LEN;
 345
 346     size_t mbLen;
 347     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 348     if ( !mbLen )
 349         return SubstrBufFromWC("", 0);
 350     else
 351         return SubstrBufFromWC(mbBuf, mbLen);
 352 }
 353 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 354
 355
 356 #if wxUSE_UNICODE_WCHAR
 357
 358 //Convert wxString in Unicode mode to a multi-byte string
 359 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 360 {
 361     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 362 }
 363
 364 #elif wxUSE_UNICODE_UTF8
 365
 366 const wxWCharBuffer wxString::wc_str() const
 367 {
 368     return wxConvUTF8.cMB2WC(m_impl.c_str(),
 369                              m_impl.length() + 1 /* size, not length */,
 370                              NULL);
 371 }
 372
 373 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 374 {
 375     // FIXME-UTF8: optimize the case when conv==wxConvUTF8 or wxConvLibc
 376     //             under UTF8 locale
 377     // FIXME-UTF8: use wc_str() here once we have buffers with length
 378
 379     size_t wcLen;
 380     wxWCharBuffer wcBuf(
 381             wxConvUTF8.cMB2WC(m_impl.c_str(),
 382                               m_impl.length() + 1 /* size, not length */,
 383                               &wcLen));
 384     if ( !wcLen )
 385         return wxCharBuffer("");
 386
 387     return conv.cWC2MB(wcBuf, wcLen, NULL);
 388 }
 389
 390 #else // ANSI
 391
 392 //Converts this string to a wide character string if unicode
 393 //mode is not enabled and wxUSE_WCHAR_T is enabled
 394 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 395 {
 396     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 397 }
 398
 399 #endif // Unicode/ANSI
 400
 401 // shrink to minimal size (releasing extra memory)
 402 bool wxString::Shrink()
 403 {
 404   wxString tmp(begin(), end());
 405   swap(tmp);
 406   return tmp.length() == length();
 407 }
 408
 409 // deprecated compatibility code:
 410 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 411 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 412 {
 413     return DoGetWriteBuf(nLen);
 414 }
 415
 416 void wxString::UngetWriteBuf()
 417 {
 418     DoUngetWriteBuf();
 419 }
 420
 421 void wxString::UngetWriteBuf(size_t nLen)
 422 {
 423     DoUngetWriteBuf(nLen);
 424 }
 425 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 426
 427
 428 // ---------------------------------------------------------------------------
 429 // data access
 430 // ---------------------------------------------------------------------------
 431
 432 // all functions are inline in string.h
 433
 434 // ---------------------------------------------------------------------------
 435 // concatenation operators
 436 // ---------------------------------------------------------------------------
 437
 438 /*
 439  * concatenation functions come in 5 flavours:
 440  *  string + string
 441  *  char   + string      and      string + char
 442  *  C str  + string      and      string + C str
 443  */
 444
 445 wxString operator+(const wxString& str1, const wxString& str2)
 446 {
 447 #if !wxUSE_STL_BASED_WXSTRING
 448     wxASSERT( str1.IsValid() );
 449     wxASSERT( str2.IsValid() );
 450 #endif
 451
 452     wxString s = str1;
 453     s += str2;
 454
 455     return s;
 456 }
 457
 458 wxString operator+(const wxString& str, wxUniChar ch)
 459 {
 460 #if !wxUSE_STL_BASED_WXSTRING
 461     wxASSERT( str.IsValid() );
 462 #endif
 463
 464     wxString s = str;
 465     s += ch;
 466
 467     return s;
 468 }
 469
 470 wxString operator+(wxUniChar ch, const wxString& str)
 471 {
 472 #if !wxUSE_STL_BASED_WXSTRING
 473     wxASSERT( str.IsValid() );
 474 #endif
 475
 476     wxString s = ch;
 477     s += str;
 478
 479     return s;
 480 }
 481
 482 wxString operator+(const wxString& str, const char *psz)
 483 {
 484 #if !wxUSE_STL_BASED_WXSTRING
 485     wxASSERT( str.IsValid() );
 486 #endif
 487
 488     wxString s;
 489     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 490         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 491     }
 492     s += str;
 493     s += psz;
 494
 495     return s;
 496 }
 497
 498 wxString operator+(const wxString& str, const wchar_t *pwz)
 499 {
 500 #if !wxUSE_STL_BASED_WXSTRING
 501     wxASSERT( str.IsValid() );
 502 #endif
 503
 504     wxString s;
 505     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 506         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 507     }
 508     s += str;
 509     s += pwz;
 510
 511     return s;
 512 }
 513
 514 wxString operator+(const char *psz, const wxString& str)
 515 {
 516 #if !wxUSE_STL_BASED_WXSTRING
 517     wxASSERT( str.IsValid() );
 518 #endif
 519
 520     wxString s;
 521     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 522         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 523     }
 524     s = psz;
 525     s += str;
 526
 527     return s;
 528 }
 529
 530 wxString operator+(const wchar_t *pwz, const wxString& str)
 531 {
 532 #if !wxUSE_STL_BASED_WXSTRING
 533     wxASSERT( str.IsValid() );
 534 #endif
 535
 536     wxString s;
 537     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 538         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 539     }
 540     s = pwz;
 541     s += str;
 542
 543     return s;
 544 }
 545
 546 // ---------------------------------------------------------------------------
 547 // string comparison
 548 // ---------------------------------------------------------------------------
 549
 550 #ifdef HAVE_STD_STRING_COMPARE
 551
 552 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 553 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 554 //     sort strings in characters code point order by sorting the byte sequence
 555 //     in byte values order (i.e. what strcmp() and memcmp() do).
 556
 557 int wxString::compare(const wxString& str) const
 558 {
 559     return m_impl.compare(str.m_impl);
 560 }
 561
 562 int wxString::compare(size_t nStart, size_t nLen,
 563                       const wxString& str) const
 564 {
 565     size_t pos, len;
 566     PosLenToImpl(nStart, nLen, &pos, &len);
 567     return m_impl.compare(pos, len, str.m_impl);
 568 }
 569
 570 int wxString::compare(size_t nStart, size_t nLen,
 571                       const wxString& str,
 572                       size_t nStart2, size_t nLen2) const
 573 {
 574     size_t pos, len;
 575     PosLenToImpl(nStart, nLen, &pos, &len);
 576
 577     size_t pos2, len2;
 578     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 579
 580     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 581 }
 582
 583 int wxString::compare(const char* sz) const
 584 {
 585     return m_impl.compare(ImplStr(sz));
 586 }
 587
 588 int wxString::compare(const wchar_t* sz) const
 589 {
 590     return m_impl.compare(ImplStr(sz));
 591 }
 592
 593 int wxString::compare(size_t nStart, size_t nLen,
 594                       const char* sz, size_t nCount) const
 595 {
 596     size_t pos, len;
 597     PosLenToImpl(nStart, nLen, &pos, &len);
 598
 599     SubstrBufFromMB str(ImplStr(sz, nCount));
 600
 601     return m_impl.compare(pos, len, str.data, str.len);
 602 }
 603
 604 int wxString::compare(size_t nStart, size_t nLen,
 605                       const wchar_t* sz, size_t nCount) const
 606 {
 607     size_t pos, len;
 608     PosLenToImpl(nStart, nLen, &pos, &len);
 609
 610     SubstrBufFromWC str(ImplStr(sz, nCount));
 611
 612     return m_impl.compare(pos, len, str.data, str.len);
 613 }
 614
 615 #else // !HAVE_STD_STRING_COMPARE
 616
 617 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 618                           const wxStringCharType* s2, size_t l2)
 619 {
 620     if( l1 == l2 )
 621         return wxStringMemcmp(s1, s2, l1);
 622     else if( l1 < l2 )
 623     {
 624         int ret = wxStringMemcmp(s1, s2, l1);
 625         return ret == 0 ? -1 : ret;
 626     }
 627     else
 628     {
 629         int ret = wxStringMemcmp(s1, s2, l2);
 630         return ret == 0 ? +1 : ret;
 631     }
 632 }
 633
 634 int wxString::compare(const wxString& str) const
 635 {
 636     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 637                      str.m_impl.data(), str.m_impl.length());
 638 }
 639
 640 int wxString::compare(size_t nStart, size_t nLen,
 641                       const wxString& str) const
 642 {
 643     wxASSERT(nStart <= length());
 644     size_type strLen = length() - nStart;
 645     nLen = strLen < nLen ? strLen : nLen;
 646
 647     size_t pos, len;
 648     PosLenToImpl(nStart, nLen, &pos, &len);
 649
 650     return ::wxDoCmp(m_impl.data() + pos,  len,
 651                      str.m_impl.data(), str.m_impl.length());
 652 }
 653
 654 int wxString::compare(size_t nStart, size_t nLen,
 655                       const wxString& str,
 656                       size_t nStart2, size_t nLen2) const
 657 {
 658     wxASSERT(nStart <= length());
 659     wxASSERT(nStart2 <= str.length());
 660     size_type strLen  =     length() - nStart,
 661               strLen2 = str.length() - nStart2;
 662     nLen  = strLen  < nLen  ? strLen  : nLen;
 663     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 664
 665     size_t pos, len;
 666     PosLenToImpl(nStart, nLen, &pos, &len);
 667     size_t pos2, len2;
 668     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 669
 670     return ::wxDoCmp(m_impl.data() + pos, len,
 671                      str.m_impl.data() + pos2, len2);
 672 }
 673
 674 int wxString::compare(const char* sz) const
 675 {
 676     SubstrBufFromMB str(ImplStr(sz, npos));
 677     if ( str.len == npos )
 678         str.len = wxStringStrlen(str.data);
 679     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 680 }
 681
 682 int wxString::compare(const wchar_t* sz) const
 683 {
 684     SubstrBufFromWC str(ImplStr(sz, npos));
 685     if ( str.len == npos )
 686         str.len = wxStringStrlen(str.data);
 687     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 688 }
 689
 690 int wxString::compare(size_t nStart, size_t nLen,
 691                       const char* sz, size_t nCount) const
 692 {
 693     wxASSERT(nStart <= length());
 694     size_type strLen = length() - nStart;
 695     nLen = strLen < nLen ? strLen : nLen;
 696
 697     size_t pos, len;
 698     PosLenToImpl(nStart, nLen, &pos, &len);
 699
 700     SubstrBufFromMB str(ImplStr(sz, nCount));
 701     if ( str.len == npos )
 702         str.len = wxStringStrlen(str.data);
 703
 704     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 705 }
 706
 707 int wxString::compare(size_t nStart, size_t nLen,
 708                       const wchar_t* sz, size_t nCount) const
 709 {
 710     wxASSERT(nStart <= length());
 711     size_type strLen = length() - nStart;
 712     nLen = strLen < nLen ? strLen : nLen;
 713
 714     size_t pos, len;
 715     PosLenToImpl(nStart, nLen, &pos, &len);
 716
 717     SubstrBufFromWC str(ImplStr(sz, nCount));
 718     if ( str.len == npos )
 719         str.len = wxStringStrlen(str.data);
 720
 721     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 722 }
 723
 724 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 725
 726
 727 // ---------------------------------------------------------------------------
 728 // find_{first,last}_[not]_of functions
 729 // ---------------------------------------------------------------------------
 730
 731 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 732
 733 // NB: All these functions are implemented  with the argument being wxChar*,
 734 //     i.e. widechar string in any Unicode build, even though native string
 735 //     representation is char* in the UTF-8 build. This is because we couldn't
 736 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 737
 738 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 739 {
 740     return find_first_of(sz, nStart, wxStrlen(sz));
 741 }
 742
 743 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 744 {
 745     return find_first_not_of(sz, nStart, wxStrlen(sz));
 746 }
 747
 748 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 749 {
 750     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 751
 752     size_t idx = nStart;
 753     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 754     {
 755         if ( wxTmemchr(sz, *i, n) )
 756             return idx;
 757     }
 758
 759     return npos;
 760 }
 761
 762 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 763 {
 764     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 765
 766     size_t idx = nStart;
 767     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 768     {
 769         if ( !wxTmemchr(sz, *i, n) )
 770             return idx;
 771     }
 772
 773     return npos;
 774 }
 775
 776
 777 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 778 {
 779     return find_last_of(sz, nStart, wxStrlen(sz));
 780 }
 781
 782 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 783 {
 784     return find_last_not_of(sz, nStart, wxStrlen(sz));
 785 }
 786
 787 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 788 {
 789     size_t len = length();
 790
 791     if ( nStart == npos )
 792     {
 793         nStart = len - 1;
 794     }
 795     else
 796     {
 797         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 798     }
 799
 800     size_t idx = nStart;
 801     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 802           i != rend(); --idx, ++i )
 803     {
 804         if ( wxTmemchr(sz, *i, n) )
 805             return idx;
 806     }
 807
 808     return npos;
 809 }
 810
 811 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 812 {
 813     size_t len = length();
 814
 815     if ( nStart == npos )
 816     {
 817         nStart = len - 1;
 818     }
 819     else
 820     {
 821         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 822     }
 823
 824     size_t idx = nStart;
 825     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 826           i != rend(); --idx, ++i )
 827     {
 828         if ( !wxTmemchr(sz, *i, n) )
 829             return idx;
 830     }
 831
 832     return npos;
 833 }
 834
 835 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
 836 {
 837     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 838
 839     size_t idx = nStart;
 840     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 841     {
 842         if ( *i != ch )
 843             return idx;
 844     }
 845
 846     return npos;
 847 }
 848
 849 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
 850 {
 851     size_t len = length();
 852
 853     if ( nStart == npos )
 854     {
 855         nStart = len - 1;
 856     }
 857     else
 858     {
 859         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 860     }
 861
 862     size_t idx = nStart;
 863     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 864           i != rend(); --idx, ++i )
 865     {
 866         if ( *i != ch )
 867             return idx;
 868     }
 869
 870     return npos;
 871 }
 872
 873 // the functions above were implemented for wchar_t* arguments in Unicode
 874 // build and char* in ANSI build; below are implementations for the other
 875 // version:
 876 #if wxUSE_UNICODE
 877     #define wxOtherCharType char
 878     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
 879 #else
 880     #define wxOtherCharType wchar_t
 881     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
 882 #endif
 883
 884 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
 885     { return find_first_of(STRCONV(sz), nStart); }
 886
 887 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
 888                                size_t n) const
 889     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
 890 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
 891     { return find_last_of(STRCONV(sz), nStart); }
 892 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
 893                               size_t n) const
 894     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
 895 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
 896     { return find_first_not_of(STRCONV(sz), nStart); }
 897 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
 898                                    size_t n) const
 899     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
 900 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
 901     { return find_last_not_of(STRCONV(sz), nStart); }
 902 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
 903                                   size_t n) const
 904     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
 905
 906 #undef wxOtherCharType
 907 #undef STRCONV
 908
 909 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 910
 911 // ===========================================================================
 912 // other common string functions
 913 // ===========================================================================
 914
 915 int wxString::CmpNoCase(const wxString& s) const
 916 {
 917     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
 918
 919     size_t idx = 0;
 920     const_iterator i1 = begin();
 921     const_iterator end1 = end();
 922     const_iterator i2 = s.begin();
 923     const_iterator end2 = s.end();
 924
 925     for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
 926     {
 927         wxUniChar lower1 = (wxChar)wxTolower(*i1);
 928         wxUniChar lower2 = (wxChar)wxTolower(*i2);
 929         if ( lower1 != lower2 )
 930             return lower1 < lower2 ? -1 : 1;
 931     }
 932
 933     size_t len1 = length();
 934     size_t len2 = s.length();
 935
 936     if ( len1 < len2 )
 937         return -1;
 938     else if ( len1 > len2 )
 939         return 1;
 940     return 0;
 941 }
 942
 943
 944 #if wxUSE_UNICODE
 945
 946 #ifdef __MWERKS__
 947 #ifndef __SCHAR_MAX__
 948 #define __SCHAR_MAX__ 127
 949 #endif
 950 #endif
 951
 952 wxString wxString::FromAscii(const char *ascii)
 953 {
 954     if (!ascii)
 955        return wxEmptyString;
 956
 957     size_t len = strlen( ascii );
 958     wxString res;
 959
 960     if ( len )
 961     {
 962         wxStringBuffer buf(res, len);
 963
 964         wchar_t *dest = buf;
 965
 966         for ( ;; )
 967         {
 968            if ( (*dest++ = (wchar_t)(unsigned char)*ascii++) == L'\0' )
 969                break;
 970         }
 971     }
 972
 973     return res;
 974 }
 975
 976 wxString wxString::FromAscii(const char ascii)
 977 {
 978     // What do we do with '\0' ?
 979
 980     wxString res;
 981     res += (wchar_t)(unsigned char) ascii;
 982
 983     return res;
 984 }
 985
 986 const wxCharBuffer wxString::ToAscii() const
 987 {
 988     // this will allocate enough space for the terminating NUL too
 989     wxCharBuffer buffer(length());
 990
 991
 992     char *dest = buffer.data();
 993
 994     const wchar_t *pwc = c_str();
 995     for ( ;; )
 996     {
 997         *dest++ = (char)(*pwc > SCHAR_MAX ? wxT('_') : *pwc);
 998
 999         // the output string can't have embedded NULs anyhow, so we can safely
1000         // stop at first of them even if we do have any
1001         if ( !*pwc++ )
1002             break;
1003     }
1004
1005     return buffer;
1006 }
1007
1008 #endif // Unicode
1009
1010 // extract string of length nCount starting at nFirst
1011 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1012 {
1013     size_t nLen = length();
1014
1015     // default value of nCount is npos and means "till the end"
1016     if ( nCount == npos )
1017     {
1018         nCount = nLen - nFirst;
1019     }
1020
1021     // out-of-bounds requests return sensible things
1022     if ( nFirst + nCount > nLen )
1023     {
1024         nCount = nLen - nFirst;
1025     }
1026
1027     if ( nFirst > nLen )
1028     {
1029         // AllocCopy() will return empty string
1030         return wxEmptyString;
1031     }
1032
1033     wxString dest(*this, nFirst, nCount);
1034     if ( dest.length() != nCount )
1035     {
1036         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1037     }
1038
1039     return dest;
1040 }
1041
1042 // check that the string starts with prefix and return the rest of the string
1043 // in the provided pointer if it is not NULL, otherwise return false
1044 bool wxString::StartsWith(const wxChar *prefix, wxString *rest) const
1045 {
1046     wxASSERT_MSG( prefix, _T("invalid parameter in wxString::StartsWith") );
1047
1048     // first check if the beginning of the string matches the prefix: note
1049     // that we don't have to check that we don't run out of this string as
1050     // when we reach the terminating NUL, either prefix string ends too (and
1051     // then it's ok) or we break out of the loop because there is no match
1052     const wxChar *p = c_str();
1053     while ( *prefix )
1054     {
1055         if ( *prefix++ != *p++ )
1056         {
1057             // no match
1058             return false;
1059         }
1060     }
1061
1062     if ( rest )
1063     {
1064         // put the rest of the string into provided pointer
1065         *rest = p;
1066     }
1067
1068     return true;
1069 }
1070
1071
1072 // check that the string ends with suffix and return the rest of it in the
1073 // provided pointer if it is not NULL, otherwise return false
1074 bool wxString::EndsWith(const wxChar *suffix, wxString *rest) const
1075 {
1076     wxASSERT_MSG( suffix, _T("invalid parameter in wxString::EndssWith") );
1077
1078     int start = length() - wxStrlen(suffix);
1079
1080     if ( start < 0 || compare(start, npos, suffix) != 0 )
1081         return false;
1082
1083     if ( rest )
1084     {
1085         // put the rest of the string into provided pointer
1086         rest->assign(*this, 0, start);
1087     }
1088
1089     return true;
1090 }
1091
1092
1093 // extract nCount last (rightmost) characters
1094 wxString wxString::Right(size_t nCount) const
1095 {
1096   if ( nCount > length() )
1097     nCount = length();
1098
1099   wxString dest(*this, length() - nCount, nCount);
1100   if ( dest.length() != nCount ) {
1101     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1102   }
1103   return dest;
1104 }
1105
1106 // get all characters after the last occurence of ch
1107 // (returns the whole string if ch not found)
1108 wxString wxString::AfterLast(wxUniChar ch) const
1109 {
1110   wxString str;
1111   int iPos = Find(ch, true);
1112   if ( iPos == wxNOT_FOUND )
1113     str = *this;
1114   else
1115     str = wx_str() + iPos + 1;
1116
1117   return str;
1118 }
1119
1120 // extract nCount first (leftmost) characters
1121 wxString wxString::Left(size_t nCount) const
1122 {
1123   if ( nCount > length() )
1124     nCount = length();
1125
1126   wxString dest(*this, 0, nCount);
1127   if ( dest.length() != nCount ) {
1128     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1129   }
1130   return dest;
1131 }
1132
1133 // get all characters before the first occurence of ch
1134 // (returns the whole string if ch not found)
1135 wxString wxString::BeforeFirst(wxUniChar ch) const
1136 {
1137   int iPos = Find(ch);
1138   if ( iPos == wxNOT_FOUND ) iPos = length();
1139   return wxString(*this, 0, iPos);
1140 }
1141
1142 /// get all characters before the last occurence of ch
1143 /// (returns empty string if ch not found)
1144 wxString wxString::BeforeLast(wxUniChar ch) const
1145 {
1146   wxString str;
1147   int iPos = Find(ch, true);
1148   if ( iPos != wxNOT_FOUND && iPos != 0 )
1149     str = wxString(c_str(), iPos);
1150
1151   return str;
1152 }
1153
1154 /// get all characters after the first occurence of ch
1155 /// (returns empty string if ch not found)
1156 wxString wxString::AfterFirst(wxUniChar ch) const
1157 {
1158   wxString str;
1159   int iPos = Find(ch);
1160   if ( iPos != wxNOT_FOUND )
1161     str = wx_str() + iPos + 1;
1162
1163   return str;
1164 }
1165
1166 // replace first (or all) occurences of some substring with another one
1167 size_t wxString::Replace(const wxString& strOld,
1168                          const wxString& strNew, bool bReplaceAll)
1169 {
1170     // if we tried to replace an empty string we'd enter an infinite loop below
1171     wxCHECK_MSG( !strOld.empty(), 0,
1172                  _T("wxString::Replace(): invalid parameter") );
1173
1174     size_t uiCount = 0;   // count of replacements made
1175
1176     size_t uiOldLen = strOld.length();
1177     size_t uiNewLen = strNew.length();
1178
1179     size_t dwPos = 0;
1180
1181     while ( (*this)[dwPos] != wxT('\0') )
1182     {
1183         //DO NOT USE STRSTR HERE
1184         //this string can contain embedded null characters,
1185         //so strstr will function incorrectly
1186         dwPos = find(strOld, dwPos);
1187         if ( dwPos == npos )
1188             break;                  // exit the loop
1189         else
1190         {
1191             //replace this occurance of the old string with the new one
1192             replace(dwPos, uiOldLen, strNew, uiNewLen);
1193
1194             //move up pos past the string that was replaced
1195             dwPos += uiNewLen;
1196
1197             //increase replace count
1198             ++uiCount;
1199
1200             // stop now?
1201             if ( !bReplaceAll )
1202                 break;                  // exit the loop
1203         }
1204     }
1205
1206     return uiCount;
1207 }
1208
1209 bool wxString::IsAscii() const
1210 {
1211     for ( const_iterator i = begin(); i != end(); ++i )
1212     {
1213         if ( !(*i).IsAscii() )
1214             return false;
1215     }
1216
1217     return true;
1218 }
1219
1220 bool wxString::IsWord() const
1221 {
1222     for ( const_iterator i = begin(); i != end(); ++i )
1223     {
1224         if ( !wxIsalpha(*i) )
1225             return false;
1226     }
1227
1228     return true;
1229 }
1230
1231 bool wxString::IsNumber() const
1232 {
1233     if ( empty() )
1234         return true;
1235
1236     const_iterator i = begin();
1237
1238     if ( *i == _T('-') || *i == _T('+') )
1239         ++i;
1240
1241     for ( ; i != end(); ++i )
1242     {
1243         if ( !wxIsdigit(*i) )
1244             return false;
1245     }
1246
1247     return true;
1248 }
1249
1250 wxString wxString::Strip(stripType w) const
1251 {
1252     wxString s = *this;
1253     if ( w & leading ) s.Trim(false);
1254     if ( w & trailing ) s.Trim(true);
1255     return s;
1256 }
1257
1258 // ---------------------------------------------------------------------------
1259 // case conversion
1260 // ---------------------------------------------------------------------------
1261
1262 wxString& wxString::MakeUpper()
1263 {
1264   for ( iterator it = begin(), en = end(); it != en; ++it )
1265     *it = (wxChar)wxToupper(*it);
1266
1267   return *this;
1268 }
1269
1270 wxString& wxString::MakeLower()
1271 {
1272   for ( iterator it = begin(), en = end(); it != en; ++it )
1273     *it = (wxChar)wxTolower(*it);
1274
1275   return *this;
1276 }
1277
1278 // ---------------------------------------------------------------------------
1279 // trimming and padding
1280 // ---------------------------------------------------------------------------
1281
1282 // some compilers (VC++ 6.0 not to name them) return true for a call to
1283 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1284 // live with this by checking that the character is a 7 bit one - even if this
1285 // may fail to detect some spaces (I don't know if Unicode doesn't have
1286 // space-like symbols somewhere except in the first 128 chars), it is arguably
1287 // still better than trimming away accented letters
1288 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1289
1290 // trims spaces (in the sense of isspace) from left or right side
1291 wxString& wxString::Trim(bool bFromRight)
1292 {
1293     // first check if we're going to modify the string at all
1294     if ( !empty() &&
1295          (
1296           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1297           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1298          )
1299        )
1300     {
1301         if ( bFromRight )
1302         {
1303             // find last non-space character
1304             reverse_iterator psz = rbegin();
1305             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1306                 psz++;
1307
1308             // truncate at trailing space start
1309             erase(psz.base(), end());
1310         }
1311         else
1312         {
1313             // find first non-space character
1314             iterator psz = begin();
1315             while ( (psz != end()) && wxSafeIsspace(*psz) )
1316                 psz++;
1317
1318             // fix up data and length
1319             erase(begin(), psz);
1320         }
1321     }
1322
1323     return *this;
1324 }
1325
1326 // adds nCount characters chPad to the string from either side
1327 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1328 {
1329     wxString s(chPad, nCount);
1330
1331     if ( bFromRight )
1332         *this += s;
1333     else
1334     {
1335         s += *this;
1336         swap(s);
1337     }
1338
1339     return *this;
1340 }
1341
1342 // truncate the string
1343 wxString& wxString::Truncate(size_t uiLen)
1344 {
1345     if ( uiLen < length() )
1346     {
1347         erase(begin() + uiLen, end());
1348     }
1349     //else: nothing to do, string is already short enough
1350
1351     return *this;
1352 }
1353
1354 // ---------------------------------------------------------------------------
1355 // finding (return wxNOT_FOUND if not found and index otherwise)
1356 // ---------------------------------------------------------------------------
1357
1358 // find a character
1359 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1360 {
1361     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1362
1363     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1364 }
1365
1366 // ----------------------------------------------------------------------------
1367 // conversion to numbers
1368 // ----------------------------------------------------------------------------
1369
1370 // the implementation of all the functions below is exactly the same so factor
1371 // it out
1372
1373 template <typename T, typename F>
1374 bool wxStringToIntType(const wxChar *start,
1375                        T *val,
1376                        int base,
1377                        F func)
1378 {
1379     wxCHECK_MSG( val, false, _T("NULL output pointer") );
1380     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1381
1382 #ifndef __WXWINCE__
1383     errno = 0;
1384 #endif
1385
1386     wxChar *end;
1387     *val = (*func)(start, &end, base);
1388
1389     // return true only if scan was stopped by the terminating NUL and if the
1390     // string was not empty to start with and no under/overflow occurred
1391     return !*end && (end != start)
1392 #ifndef __WXWINCE__
1393         && (errno != ERANGE)
1394 #endif
1395     ;
1396 }
1397
1398 bool wxString::ToLong(long *val, int base) const
1399 {
1400     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtol);
1401 }
1402
1403 bool wxString::ToULong(unsigned long *val, int base) const
1404 {
1405     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoul);
1406 }
1407
1408 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1409 {
1410     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoll);
1411 }
1412
1413 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1414 {
1415     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoull);
1416 }
1417
1418 bool wxString::ToDouble(double *val) const
1419 {
1420     wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1421
1422 #ifndef __WXWINCE__
1423     errno = 0;
1424 #endif
1425
1426     const wxChar *start = c_str();
1427     wxChar *end;
1428     *val = wxStrtod(start, &end);
1429
1430     // return true only if scan was stopped by the terminating NUL and if the
1431     // string was not empty to start with and no under/overflow occurred
1432     return !*end && (end != start)
1433 #ifndef __WXWINCE__
1434         && (errno != ERANGE)
1435 #endif
1436     ;
1437 }
1438
1439 // ---------------------------------------------------------------------------
1440 // formatted output
1441 // ---------------------------------------------------------------------------
1442
1443 /* static */
1444 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1445 wxString wxStringPrintfMixinBase::DoFormat(const wxChar *format, ...)
1446 #else
1447 wxString wxString::DoFormat(const wxChar *format, ...)
1448 #endif
1449 {
1450     va_list argptr;
1451     va_start(argptr, format);
1452
1453     wxString s;
1454     s.PrintfV(format, argptr);
1455
1456     va_end(argptr);
1457
1458     return s;
1459 }
1460
1461 /* static */
1462 wxString wxString::FormatV(const wxString& format, va_list argptr)
1463 {
1464     wxString s;
1465     s.PrintfV(format, argptr);
1466     return s;
1467 }
1468
1469 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1470 int wxStringPrintfMixinBase::DoPrintf(const wxChar *format, ...)
1471 #else
1472 int wxString::DoPrintf(const wxChar *format, ...)
1473 #endif
1474 {
1475     va_list argptr;
1476     va_start(argptr, format);
1477
1478 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1479     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1480     // because it's the only cast that works safely for downcasting when
1481     // multiple inheritance is used:
1482     wxString *str = static_cast<wxString*>(this);
1483 #else
1484     wxString *str = this;
1485 #endif
1486
1487     int iLen = str->PrintfV(format, argptr);
1488
1489     va_end(argptr);
1490
1491     return iLen;
1492 }
1493
1494 int wxString::PrintfV(const wxString& format, va_list argptr)
1495 {
1496     int size = 1024;
1497
1498     for ( ;; )
1499     {
1500         wxStringBuffer tmp(*this, size + 1);
1501         wxChar *buf = tmp;
1502
1503         if ( !buf )
1504         {
1505             // out of memory
1506             return -1;
1507         }
1508
1509         // wxVsnprintf() may modify the original arg pointer, so pass it
1510         // only a copy
1511         va_list argptrcopy;
1512         wxVaCopy(argptrcopy, argptr);
1513         int len = wxVsnprintf(buf, size, (const wxChar*)/*FIXME-UTF8*/format, argptrcopy);
1514         va_end(argptrcopy);
1515
1516         // some implementations of vsnprintf() don't NUL terminate
1517         // the string if there is not enough space for it so
1518         // always do it manually
1519         buf[size] = _T('\0');
1520
1521         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1522         // total number of characters which would have been written if the
1523         // buffer were large enough (newer standards such as Unix98)
1524         if ( len < 0 )
1525         {
1526 #if wxUSE_WXVSNPRINTF
1527             // we know that our own implementation of wxVsnprintf() returns -1
1528             // only for a format error - thus there's something wrong with
1529             // the user's format string
1530             return -1;
1531 #else // assume that system version only returns error if not enough space
1532             // still not enough, as we don't know how much we need, double the
1533             // current size of the buffer
1534             size *= 2;
1535 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1536         }
1537         else if ( len >= size )
1538         {
1539 #if wxUSE_WXVSNPRINTF
1540             // we know that our own implementation of wxVsnprintf() returns
1541             // size+1 when there's not enough space but that's not the size
1542             // of the required buffer!
1543             size *= 2;      // so we just double the current size of the buffer
1544 #else
1545             // some vsnprintf() implementations NUL-terminate the buffer and
1546             // some don't in len == size case, to be safe always add 1
1547             size = len + 1;
1548 #endif
1549         }
1550         else // ok, there was enough space
1551         {
1552             break;
1553         }
1554     }
1555
1556     // we could have overshot
1557     Shrink();
1558
1559     return length();
1560 }
1561
1562 // ----------------------------------------------------------------------------
1563 // misc other operations
1564 // ----------------------------------------------------------------------------
1565
1566 // returns true if the string matches the pattern which may contain '*' and
1567 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1568 // of them)
1569 bool wxString::Matches(const wxString& mask) const
1570 {
1571     // I disable this code as it doesn't seem to be faster (in fact, it seems
1572     // to be much slower) than the old, hand-written code below and using it
1573     // here requires always linking with libregex even if the user code doesn't
1574     // use it
1575 #if 0 // wxUSE_REGEX
1576     // first translate the shell-like mask into a regex
1577     wxString pattern;
1578     pattern.reserve(wxStrlen(pszMask));
1579
1580     pattern += _T('^');
1581     while ( *pszMask )
1582     {
1583         switch ( *pszMask )
1584         {
1585             case _T('?'):
1586                 pattern += _T('.');
1587                 break;
1588
1589             case _T('*'):
1590                 pattern += _T(".*");
1591                 break;
1592
1593             case _T('^'):
1594             case _T('.'):
1595             case _T('$'):
1596             case _T('('):
1597             case _T(')'):
1598             case _T('|'):
1599             case _T('+'):
1600             case _T('\\'):
1601                 // these characters are special in a RE, quote them
1602                 // (however note that we don't quote '[' and ']' to allow
1603                 // using them for Unix shell like matching)
1604                 pattern += _T('\\');
1605                 // fall through
1606
1607             default:
1608                 pattern += *pszMask;
1609         }
1610
1611         pszMask++;
1612     }
1613     pattern += _T('$');
1614
1615     // and now use it
1616     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1617 #else // !wxUSE_REGEX
1618   // TODO: this is, of course, awfully inefficient...
1619
1620   // FIXME-UTF8: implement using iterators, remove #if
1621 #if wxUSE_UNICODE_UTF8
1622   wxWCharBuffer maskBuf = mask.wc_str();
1623   wxWCharBuffer txtBuf = wc_str();
1624   const wxChar *pszMask = maskBuf.data();
1625   const wxChar *pszTxt = txtBuf.data();
1626 #else
1627   const wxChar *pszMask = mask.wx_str();
1628   // the char currently being checked
1629   const wxChar *pszTxt = wx_str();
1630 #endif
1631
1632   // the last location where '*' matched
1633   const wxChar *pszLastStarInText = NULL;
1634   const wxChar *pszLastStarInMask = NULL;
1635
1636 match:
1637   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1638     switch ( *pszMask ) {
1639       case wxT('?'):
1640         if ( *pszTxt == wxT('\0') )
1641           return false;
1642
1643         // pszTxt and pszMask will be incremented in the loop statement
1644
1645         break;
1646
1647       case wxT('*'):
1648         {
1649           // remember where we started to be able to backtrack later
1650           pszLastStarInText = pszTxt;
1651           pszLastStarInMask = pszMask;
1652
1653           // ignore special chars immediately following this one
1654           // (should this be an error?)
1655           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1656             pszMask++;
1657
1658           // if there is nothing more, match
1659           if ( *pszMask == wxT('\0') )
1660             return true;
1661
1662           // are there any other metacharacters in the mask?
1663           size_t uiLenMask;
1664           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1665
1666           if ( pEndMask != NULL ) {
1667             // we have to match the string between two metachars
1668             uiLenMask = pEndMask - pszMask;
1669           }
1670           else {
1671             // we have to match the remainder of the string
1672             uiLenMask = wxStrlen(pszMask);
1673           }
1674
1675           wxString strToMatch(pszMask, uiLenMask);
1676           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1677           if ( pMatch == NULL )
1678             return false;
1679
1680           // -1 to compensate "++" in the loop
1681           pszTxt = pMatch + uiLenMask - 1;
1682           pszMask += uiLenMask - 1;
1683         }
1684         break;
1685
1686       default:
1687         if ( *pszMask != *pszTxt )
1688           return false;
1689         break;
1690     }
1691   }
1692
1693   // match only if nothing left
1694   if ( *pszTxt == wxT('\0') )
1695     return true;
1696
1697   // if we failed to match, backtrack if we can
1698   if ( pszLastStarInText ) {
1699     pszTxt = pszLastStarInText + 1;
1700     pszMask = pszLastStarInMask;
1701
1702     pszLastStarInText = NULL;
1703
1704     // don't bother resetting pszLastStarInMask, it's unnecessary
1705
1706     goto match;
1707   }
1708
1709   return false;
1710 #endif // wxUSE_REGEX/!wxUSE_REGEX
1711 }
1712
1713 // Count the number of chars
1714 int wxString::Freq(wxUniChar ch) const
1715 {
1716     int count = 0;
1717     for ( const_iterator i = begin(); i != end(); ++i )
1718     {
1719         if ( *i == ch )
1720             count ++;
1721     }
1722     return count;
1723 }
1724
1725 // convert to upper case, return the copy of the string
1726 wxString wxString::Upper() const
1727 { wxString s(*this); return s.MakeUpper(); }
1728
1729 // convert to lower case, return the copy of the string
1730 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }