src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #ifdef __SALFORDC__
  39     #include <clib.h>
  40 #endif
  41
  42 #include "wx/hashmap.h"
  43
  44 // string handling functions used by wxString:
  45 #if wxUSE_UNICODE_UTF8
  46     #define wxStringMemcpy   memcpy
  47     #define wxStringMemcmp   memcmp
  48     #define wxStringMemchr   memchr
  49     #define wxStringStrlen   strlen
  50 #else
  51     #define wxStringMemcpy   wxTmemcpy
  52     #define wxStringMemcmp   wxTmemcmp
  53     #define wxStringMemchr   wxTmemchr
  54     #define wxStringStrlen   wxStrlen
  55 #endif
  56
  57
  58 // ---------------------------------------------------------------------------
  59 // static class variables definition
  60 // ---------------------------------------------------------------------------
  61
  62 //According to STL _must_ be a -1 size_t
  63 const size_t wxString::npos = (size_t) -1;
  64
  65 // ----------------------------------------------------------------------------
  66 // global functions
  67 // ----------------------------------------------------------------------------
  68
  69 #if wxUSE_STD_IOSTREAM
  70
  71 #include <iostream>
  72
  73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
  74 {
  75 // FIXME-UTF8: always, not only if wxUSE_UNICODE
  76 #if wxUSE_UNICODE && !defined(__BORLANDC__)
  77     return os << (const wchar_t*)str.AsWCharBuf();
  78 #else
  79     return os << (const char*)str.AsCharBuf();
  80 #endif
  81 }
  82
  83 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
  84 {
  85     return os << str.c_str();
  86 }
  87
  88 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
  89 {
  90     return os << str.data();
  91 }
  92
  93 #ifndef __BORLANDC__
  94 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
  95 {
  96     return os << str.data();
  97 }
  98 #endif
  99
 100 #endif // wxUSE_STD_IOSTREAM
 101
 102 // ===========================================================================
 103 // wxString class core
 104 // ===========================================================================
 105
 106 #if wxUSE_UNICODE_UTF8
 107
 108 void wxString::PosLenToImpl(size_t pos, size_t len,
 109                             size_t *implPos, size_t *implLen) const
 110 {
 111     if ( pos == npos )
 112         *implPos = npos;
 113     else
 114     {
 115         const_iterator i = begin() + pos;
 116         *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
 117         if ( len == npos )
 118             *implLen = npos;
 119         else
 120         {
 121             // too large length is interpreted as "to the end of the string"
 122             // FIXME-UTF8: verify this is the case in std::string, assert
 123             // otherwise
 124             if ( pos + len > length() )
 125                 len = length() - pos;
 126
 127             *implLen = (i + len).impl() - i.impl();
 128         }
 129     }
 130 }
 131
 132 #endif // wxUSE_UNICODE_UTF8
 133
 134 // ----------------------------------------------------------------------------
 135 // wxCStrData converted strings caching
 136 // ----------------------------------------------------------------------------
 137
 138 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 139 //             string objects; re-enable after fixing this bug and benchmarking
 140 //             performance to see if using a hash is a good idea at all
 141 #if 0
 142
 143 // For backward compatibility reasons, it must be possible to assign the value
 144 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 145 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 146 // because the memory would be freed immediately, but it has to be valid as long
 147 // as the string is not modified, so that code like this still works:
 148 //
 149 // const wxChar *s = str.c_str();
 150 // while ( s ) { ... }
 151
 152 // FIXME-UTF8: not thread safe!
 153 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 154 //             destroyed, but we should do it when the string is modified, to
 155 //             keep memory usage down
 156 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 157 //             invalidated the cache on every change, we could keep the previous
 158 //             conversion
 159 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 160 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 161
 162 template<typename T>
 163 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 164 {
 165     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 166     if ( i != hash.end() )
 167     {
 168         free(i->second);
 169         hash.erase(i);
 170     }
 171 }
 172
 173 #if wxUSE_UNICODE
 174 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 175 //     so we have to use wxString* here and const-cast when used
 176 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 177                     wxStringCharConversionCache);
 178 static wxStringCharConversionCache gs_stringsCharCache;
 179
 180 const char* wxCStrData::AsChar() const
 181 {
 182     // remove previously cache value, if any (see FIXMEs above):
 183     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 184
 185     // convert the string and keep it:
 186     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 187         m_str->mb_str().release();
 188
 189     return s + m_offset;
 190 }
 191 #endif // wxUSE_UNICODE
 192
 193 #if !wxUSE_UNICODE_WCHAR
 194 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 195                     wxStringWCharConversionCache);
 196 static wxStringWCharConversionCache gs_stringsWCharCache;
 197
 198 const wchar_t* wxCStrData::AsWChar() const
 199 {
 200     // remove previously cache value, if any (see FIXMEs above):
 201     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 202
 203     // convert the string and keep it:
 204     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 205         m_str->wc_str().release();
 206
 207     return s + m_offset;
 208 }
 209 #endif // !wxUSE_UNICODE_WCHAR
 210
 211 wxString::~wxString()
 212 {
 213 #if wxUSE_UNICODE
 214     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 215     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 216 #endif
 217 #if !wxUSE_UNICODE_WCHAR
 218     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 219 #endif
 220 }
 221 #endif
 222
 223 #if wxUSE_UNICODE
 224 const char* wxCStrData::AsChar() const
 225 {
 226     wxString *str = wxConstCast(m_str, wxString);
 227
 228     // convert the string:
 229     wxCharBuffer buf(str->mb_str());
 230
 231     // FIXME-UTF8: do the conversion in-place in the existing buffer
 232     if ( str->m_convertedToChar &&
 233          strlen(buf) == strlen(str->m_convertedToChar) )
 234     {
 235         // keep the same buffer for as long as possible, so that several calls
 236         // to c_str() in a row still work:
 237         strcpy(str->m_convertedToChar, buf);
 238     }
 239     else
 240     {
 241         str->m_convertedToChar = buf.release();
 242     }
 243
 244     // and keep it:
 245     return str->m_convertedToChar + m_offset;
 246 }
 247 #endif // wxUSE_UNICODE
 248
 249 #if !wxUSE_UNICODE_WCHAR
 250 const wchar_t* wxCStrData::AsWChar() const
 251 {
 252     wxString *str = wxConstCast(m_str, wxString);
 253
 254     // convert the string:
 255     wxWCharBuffer buf(str->wc_str());
 256
 257     // FIXME-UTF8: do the conversion in-place in the existing buffer
 258     if ( str->m_convertedToWChar &&
 259          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 260     {
 261         // keep the same buffer for as long as possible, so that several calls
 262         // to c_str() in a row still work:
 263         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 264     }
 265     else
 266     {
 267         str->m_convertedToWChar = buf.release();
 268     }
 269
 270     // and keep it:
 271     return str->m_convertedToWChar + m_offset;
 272 }
 273 #endif // !wxUSE_UNICODE_WCHAR
 274
 275 // ===========================================================================
 276 // wxString class core
 277 // ===========================================================================
 278
 279 // ---------------------------------------------------------------------------
 280 // construction and conversion
 281 // ---------------------------------------------------------------------------
 282
 283 #if wxUSE_UNICODE_WCHAR
 284 /* static */
 285 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 286                                                const wxMBConv& conv)
 287 {
 288     // anything to do?
 289     if ( !psz || nLength == 0 )
 290         return SubstrBufFromMB(L"", 0);
 291
 292     if ( nLength == npos )
 293         nLength = wxNO_LEN;
 294
 295     size_t wcLen;
 296     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 297     if ( !wcLen )
 298         return SubstrBufFromMB(_T(""), 0);
 299     else
 300         return SubstrBufFromMB(wcBuf, wcLen);
 301 }
 302 #endif // wxUSE_UNICODE_WCHAR
 303
 304 #if wxUSE_UNICODE_UTF8
 305 /* static */
 306 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 307                                                const wxMBConv& conv)
 308 {
 309     // FIXME-UTF8: return as-is without copying under UTF8 locale, return
 310     //             converted string under other locales - needs wxCharBuffer
 311     //             changes
 312
 313     // anything to do?
 314     if ( !psz || nLength == 0 )
 315         return SubstrBufFromMB("", 0);
 316
 317     if ( nLength == npos )
 318         nLength = wxNO_LEN;
 319
 320     // first convert to wide string:
 321     size_t wcLen;
 322     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 323     if ( !wcLen )
 324         return SubstrBufFromMB("", 0);
 325
 326     // and then to UTF-8:
 327     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxConvUTF8));
 328     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 329     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 330
 331     return buf;
 332 }
 333 #endif // wxUSE_UNICODE_UTF8
 334
 335 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 336 /* static */
 337 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 338                                                const wxMBConv& conv)
 339 {
 340     // anything to do?
 341     if ( !pwz || nLength == 0 )
 342         return SubstrBufFromWC("", 0);
 343
 344     if ( nLength == npos )
 345         nLength = wxNO_LEN;
 346
 347     size_t mbLen;
 348     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 349     if ( !mbLen )
 350         return SubstrBufFromWC("", 0);
 351     else
 352         return SubstrBufFromWC(mbBuf, mbLen);
 353 }
 354 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 355
 356
 357 #if wxUSE_UNICODE_WCHAR
 358
 359 //Convert wxString in Unicode mode to a multi-byte string
 360 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 361 {
 362     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 363 }
 364
 365 #elif wxUSE_UNICODE_UTF8
 366
 367 const wxWCharBuffer wxString::wc_str() const
 368 {
 369     return wxConvUTF8.cMB2WC(m_impl.c_str(),
 370                              m_impl.length() + 1 /* size, not length */,
 371                              NULL);
 372 }
 373
 374 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 375 {
 376     // FIXME-UTF8: optimize the case when conv==wxConvUTF8 or wxConvLibc
 377     //             under UTF8 locale
 378     // FIXME-UTF8: use wc_str() here once we have buffers with length
 379
 380     size_t wcLen;
 381     wxWCharBuffer wcBuf(
 382             wxConvUTF8.cMB2WC(m_impl.c_str(),
 383                               m_impl.length() + 1 /* size, not length */,
 384                               &wcLen));
 385     if ( !wcLen )
 386         return wxCharBuffer("");
 387
 388     return conv.cWC2MB(wcBuf, wcLen, NULL);
 389 }
 390
 391 #else // ANSI
 392
 393 //Converts this string to a wide character string if unicode
 394 //mode is not enabled and wxUSE_WCHAR_T is enabled
 395 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 396 {
 397     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 398 }
 399
 400 #endif // Unicode/ANSI
 401
 402 // shrink to minimal size (releasing extra memory)
 403 bool wxString::Shrink()
 404 {
 405   wxString tmp(begin(), end());
 406   swap(tmp);
 407   return tmp.length() == length();
 408 }
 409
 410 // deprecated compatibility code:
 411 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 412 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 413 {
 414     return DoGetWriteBuf(nLen);
 415 }
 416
 417 void wxString::UngetWriteBuf()
 418 {
 419     DoUngetWriteBuf();
 420 }
 421
 422 void wxString::UngetWriteBuf(size_t nLen)
 423 {
 424     DoUngetWriteBuf(nLen);
 425 }
 426 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 427
 428
 429 // ---------------------------------------------------------------------------
 430 // data access
 431 // ---------------------------------------------------------------------------
 432
 433 // all functions are inline in string.h
 434
 435 // ---------------------------------------------------------------------------
 436 // concatenation operators
 437 // ---------------------------------------------------------------------------
 438
 439 /*
 440  * concatenation functions come in 5 flavours:
 441  *  string + string
 442  *  char   + string      and      string + char
 443  *  C str  + string      and      string + C str
 444  */
 445
 446 wxString operator+(const wxString& str1, const wxString& str2)
 447 {
 448 #if !wxUSE_STL_BASED_WXSTRING
 449     wxASSERT( str1.IsValid() );
 450     wxASSERT( str2.IsValid() );
 451 #endif
 452
 453     wxString s = str1;
 454     s += str2;
 455
 456     return s;
 457 }
 458
 459 wxString operator+(const wxString& str, wxUniChar ch)
 460 {
 461 #if !wxUSE_STL_BASED_WXSTRING
 462     wxASSERT( str.IsValid() );
 463 #endif
 464
 465     wxString s = str;
 466     s += ch;
 467
 468     return s;
 469 }
 470
 471 wxString operator+(wxUniChar ch, const wxString& str)
 472 {
 473 #if !wxUSE_STL_BASED_WXSTRING
 474     wxASSERT( str.IsValid() );
 475 #endif
 476
 477     wxString s = ch;
 478     s += str;
 479
 480     return s;
 481 }
 482
 483 wxString operator+(const wxString& str, const char *psz)
 484 {
 485 #if !wxUSE_STL_BASED_WXSTRING
 486     wxASSERT( str.IsValid() );
 487 #endif
 488
 489     wxString s;
 490     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 491         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 492     }
 493     s += str;
 494     s += psz;
 495
 496     return s;
 497 }
 498
 499 wxString operator+(const wxString& str, const wchar_t *pwz)
 500 {
 501 #if !wxUSE_STL_BASED_WXSTRING
 502     wxASSERT( str.IsValid() );
 503 #endif
 504
 505     wxString s;
 506     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 507         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 508     }
 509     s += str;
 510     s += pwz;
 511
 512     return s;
 513 }
 514
 515 wxString operator+(const char *psz, const wxString& str)
 516 {
 517 #if !wxUSE_STL_BASED_WXSTRING
 518     wxASSERT( str.IsValid() );
 519 #endif
 520
 521     wxString s;
 522     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 523         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 524     }
 525     s = psz;
 526     s += str;
 527
 528     return s;
 529 }
 530
 531 wxString operator+(const wchar_t *pwz, const wxString& str)
 532 {
 533 #if !wxUSE_STL_BASED_WXSTRING
 534     wxASSERT( str.IsValid() );
 535 #endif
 536
 537     wxString s;
 538     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 539         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 540     }
 541     s = pwz;
 542     s += str;
 543
 544     return s;
 545 }
 546
 547 // ---------------------------------------------------------------------------
 548 // string comparison
 549 // ---------------------------------------------------------------------------
 550
 551 #ifdef HAVE_STD_STRING_COMPARE
 552
 553 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 554 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 555 //     sort strings in characters code point order by sorting the byte sequence
 556 //     in byte values order (i.e. what strcmp() and memcmp() do).
 557
 558 int wxString::compare(const wxString& str) const
 559 {
 560     return m_impl.compare(str.m_impl);
 561 }
 562
 563 int wxString::compare(size_t nStart, size_t nLen,
 564                       const wxString& str) const
 565 {
 566     size_t pos, len;
 567     PosLenToImpl(nStart, nLen, &pos, &len);
 568     return m_impl.compare(pos, len, str.m_impl);
 569 }
 570
 571 int wxString::compare(size_t nStart, size_t nLen,
 572                       const wxString& str,
 573                       size_t nStart2, size_t nLen2) const
 574 {
 575     size_t pos, len;
 576     PosLenToImpl(nStart, nLen, &pos, &len);
 577
 578     size_t pos2, len2;
 579     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 580
 581     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 582 }
 583
 584 int wxString::compare(const char* sz) const
 585 {
 586     return m_impl.compare(ImplStr(sz));
 587 }
 588
 589 int wxString::compare(const wchar_t* sz) const
 590 {
 591     return m_impl.compare(ImplStr(sz));
 592 }
 593
 594 int wxString::compare(size_t nStart, size_t nLen,
 595                       const char* sz, size_t nCount) const
 596 {
 597     size_t pos, len;
 598     PosLenToImpl(nStart, nLen, &pos, &len);
 599
 600     SubstrBufFromMB str(ImplStr(sz, nCount));
 601
 602     return m_impl.compare(pos, len, str.data, str.len);
 603 }
 604
 605 int wxString::compare(size_t nStart, size_t nLen,
 606                       const wchar_t* sz, size_t nCount) const
 607 {
 608     size_t pos, len;
 609     PosLenToImpl(nStart, nLen, &pos, &len);
 610
 611     SubstrBufFromWC str(ImplStr(sz, nCount));
 612
 613     return m_impl.compare(pos, len, str.data, str.len);
 614 }
 615
 616 #else // !HAVE_STD_STRING_COMPARE
 617
 618 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 619                           const wxStringCharType* s2, size_t l2)
 620 {
 621     if( l1 == l2 )
 622         return wxStringMemcmp(s1, s2, l1);
 623     else if( l1 < l2 )
 624     {
 625         int ret = wxStringMemcmp(s1, s2, l1);
 626         return ret == 0 ? -1 : ret;
 627     }
 628     else
 629     {
 630         int ret = wxStringMemcmp(s1, s2, l2);
 631         return ret == 0 ? +1 : ret;
 632     }
 633 }
 634
 635 int wxString::compare(const wxString& str) const
 636 {
 637     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 638                      str.m_impl.data(), str.m_impl.length());
 639 }
 640
 641 int wxString::compare(size_t nStart, size_t nLen,
 642                       const wxString& str) const
 643 {
 644     wxASSERT(nStart <= length());
 645     size_type strLen = length() - nStart;
 646     nLen = strLen < nLen ? strLen : nLen;
 647
 648     size_t pos, len;
 649     PosLenToImpl(nStart, nLen, &pos, &len);
 650
 651     return ::wxDoCmp(m_impl.data() + pos,  len,
 652                      str.m_impl.data(), str.m_impl.length());
 653 }
 654
 655 int wxString::compare(size_t nStart, size_t nLen,
 656                       const wxString& str,
 657                       size_t nStart2, size_t nLen2) const
 658 {
 659     wxASSERT(nStart <= length());
 660     wxASSERT(nStart2 <= str.length());
 661     size_type strLen  =     length() - nStart,
 662               strLen2 = str.length() - nStart2;
 663     nLen  = strLen  < nLen  ? strLen  : nLen;
 664     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 665
 666     size_t pos, len;
 667     PosLenToImpl(nStart, nLen, &pos, &len);
 668     size_t pos2, len2;
 669     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 670
 671     return ::wxDoCmp(m_impl.data() + pos, len,
 672                      str.m_impl.data() + pos2, len2);
 673 }
 674
 675 int wxString::compare(const char* sz) const
 676 {
 677     SubstrBufFromMB str(ImplStr(sz, npos));
 678     if ( str.len == npos )
 679         str.len = wxStringStrlen(str.data);
 680     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 681 }
 682
 683 int wxString::compare(const wchar_t* sz) const
 684 {
 685     SubstrBufFromWC str(ImplStr(sz, npos));
 686     if ( str.len == npos )
 687         str.len = wxStringStrlen(str.data);
 688     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 689 }
 690
 691 int wxString::compare(size_t nStart, size_t nLen,
 692                       const char* sz, size_t nCount) const
 693 {
 694     wxASSERT(nStart <= length());
 695     size_type strLen = length() - nStart;
 696     nLen = strLen < nLen ? strLen : nLen;
 697
 698     size_t pos, len;
 699     PosLenToImpl(nStart, nLen, &pos, &len);
 700
 701     SubstrBufFromMB str(ImplStr(sz, nCount));
 702     if ( str.len == npos )
 703         str.len = wxStringStrlen(str.data);
 704
 705     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 706 }
 707
 708 int wxString::compare(size_t nStart, size_t nLen,
 709                       const wchar_t* sz, size_t nCount) const
 710 {
 711     wxASSERT(nStart <= length());
 712     size_type strLen = length() - nStart;
 713     nLen = strLen < nLen ? strLen : nLen;
 714
 715     size_t pos, len;
 716     PosLenToImpl(nStart, nLen, &pos, &len);
 717
 718     SubstrBufFromWC str(ImplStr(sz, nCount));
 719     if ( str.len == npos )
 720         str.len = wxStringStrlen(str.data);
 721
 722     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 723 }
 724
 725 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 726
 727
 728 // ---------------------------------------------------------------------------
 729 // find_{first,last}_[not]_of functions
 730 // ---------------------------------------------------------------------------
 731
 732 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 733
 734 // NB: All these functions are implemented  with the argument being wxChar*,
 735 //     i.e. widechar string in any Unicode build, even though native string
 736 //     representation is char* in the UTF-8 build. This is because we couldn't
 737 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 738
 739 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 740 {
 741     return find_first_of(sz, nStart, wxStrlen(sz));
 742 }
 743
 744 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 745 {
 746     return find_first_not_of(sz, nStart, wxStrlen(sz));
 747 }
 748
 749 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 750 {
 751     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 752
 753     size_t idx = nStart;
 754     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 755     {
 756         if ( wxTmemchr(sz, *i, n) )
 757             return idx;
 758     }
 759
 760     return npos;
 761 }
 762
 763 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 764 {
 765     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 766
 767     size_t idx = nStart;
 768     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 769     {
 770         if ( !wxTmemchr(sz, *i, n) )
 771             return idx;
 772     }
 773
 774     return npos;
 775 }
 776
 777
 778 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 779 {
 780     return find_last_of(sz, nStart, wxStrlen(sz));
 781 }
 782
 783 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 784 {
 785     return find_last_not_of(sz, nStart, wxStrlen(sz));
 786 }
 787
 788 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 789 {
 790     size_t len = length();
 791
 792     if ( nStart == npos )
 793     {
 794         nStart = len - 1;
 795     }
 796     else
 797     {
 798         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 799     }
 800
 801     size_t idx = nStart;
 802     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 803           i != rend(); --idx, ++i )
 804     {
 805         if ( wxTmemchr(sz, *i, n) )
 806             return idx;
 807     }
 808
 809     return npos;
 810 }
 811
 812 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 813 {
 814     size_t len = length();
 815
 816     if ( nStart == npos )
 817     {
 818         nStart = len - 1;
 819     }
 820     else
 821     {
 822         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 823     }
 824
 825     size_t idx = nStart;
 826     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 827           i != rend(); --idx, ++i )
 828     {
 829         if ( !wxTmemchr(sz, *i, n) )
 830             return idx;
 831     }
 832
 833     return npos;
 834 }
 835
 836 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
 837 {
 838     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 839
 840     size_t idx = nStart;
 841     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 842     {
 843         if ( *i != ch )
 844             return idx;
 845     }
 846
 847     return npos;
 848 }
 849
 850 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
 851 {
 852     size_t len = length();
 853
 854     if ( nStart == npos )
 855     {
 856         nStart = len - 1;
 857     }
 858     else
 859     {
 860         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 861     }
 862
 863     size_t idx = nStart;
 864     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 865           i != rend(); --idx, ++i )
 866     {
 867         if ( *i != ch )
 868             return idx;
 869     }
 870
 871     return npos;
 872 }
 873
 874 // the functions above were implemented for wchar_t* arguments in Unicode
 875 // build and char* in ANSI build; below are implementations for the other
 876 // version:
 877 #if wxUSE_UNICODE
 878     #define wxOtherCharType char
 879     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
 880 #else
 881     #define wxOtherCharType wchar_t
 882     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
 883 #endif
 884
 885 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
 886     { return find_first_of(STRCONV(sz), nStart); }
 887
 888 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
 889                                size_t n) const
 890     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
 891 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
 892     { return find_last_of(STRCONV(sz), nStart); }
 893 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
 894                               size_t n) const
 895     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
 896 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
 897     { return find_first_not_of(STRCONV(sz), nStart); }
 898 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
 899                                    size_t n) const
 900     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
 901 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
 902     { return find_last_not_of(STRCONV(sz), nStart); }
 903 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
 904                                   size_t n) const
 905     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
 906
 907 #undef wxOtherCharType
 908 #undef STRCONV
 909
 910 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 911
 912 // ===========================================================================
 913 // other common string functions
 914 // ===========================================================================
 915
 916 int wxString::CmpNoCase(const wxString& s) const
 917 {
 918     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
 919
 920     size_t idx = 0;
 921     const_iterator i1 = begin();
 922     const_iterator end1 = end();
 923     const_iterator i2 = s.begin();
 924     const_iterator end2 = s.end();
 925
 926     for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
 927     {
 928         wxUniChar lower1 = (wxChar)wxTolower(*i1);
 929         wxUniChar lower2 = (wxChar)wxTolower(*i2);
 930         if ( lower1 != lower2 )
 931             return lower1 < lower2 ? -1 : 1;
 932     }
 933
 934     size_t len1 = length();
 935     size_t len2 = s.length();
 936
 937     if ( len1 < len2 )
 938         return -1;
 939     else if ( len1 > len2 )
 940         return 1;
 941     return 0;
 942 }
 943
 944
 945 #if wxUSE_UNICODE
 946
 947 #ifdef __MWERKS__
 948 #ifndef __SCHAR_MAX__
 949 #define __SCHAR_MAX__ 127
 950 #endif
 951 #endif
 952
 953 wxString wxString::FromAscii(const char *ascii)
 954 {
 955     if (!ascii)
 956        return wxEmptyString;
 957
 958     size_t len = strlen( ascii );
 959     wxString res;
 960
 961     if ( len )
 962     {
 963         wxStringBuffer buf(res, len);
 964
 965         wchar_t *dest = buf;
 966
 967         for ( ;; )
 968         {
 969            if ( (*dest++ = (wchar_t)(unsigned char)*ascii++) == L'\0' )
 970                break;
 971         }
 972     }
 973
 974     return res;
 975 }
 976
 977 wxString wxString::FromAscii(const char ascii)
 978 {
 979     // What do we do with '\0' ?
 980
 981     wxString res;
 982     res += (wchar_t)(unsigned char) ascii;
 983
 984     return res;
 985 }
 986
 987 const wxCharBuffer wxString::ToAscii() const
 988 {
 989     // this will allocate enough space for the terminating NUL too
 990     wxCharBuffer buffer(length());
 991
 992
 993     char *dest = buffer.data();
 994
 995     const wchar_t *pwc = c_str();
 996     for ( ;; )
 997     {
 998         *dest++ = (char)(*pwc > SCHAR_MAX ? wxT('_') : *pwc);
 999
1000         // the output string can't have embedded NULs anyhow, so we can safely
1001         // stop at first of them even if we do have any
1002         if ( !*pwc++ )
1003             break;
1004     }
1005
1006     return buffer;
1007 }
1008
1009 #endif // Unicode
1010
1011 // extract string of length nCount starting at nFirst
1012 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1013 {
1014     size_t nLen = length();
1015
1016     // default value of nCount is npos and means "till the end"
1017     if ( nCount == npos )
1018     {
1019         nCount = nLen - nFirst;
1020     }
1021
1022     // out-of-bounds requests return sensible things
1023     if ( nFirst + nCount > nLen )
1024     {
1025         nCount = nLen - nFirst;
1026     }
1027
1028     if ( nFirst > nLen )
1029     {
1030         // AllocCopy() will return empty string
1031         return wxEmptyString;
1032     }
1033
1034     wxString dest(*this, nFirst, nCount);
1035     if ( dest.length() != nCount )
1036     {
1037         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1038     }
1039
1040     return dest;
1041 }
1042
1043 // check that the string starts with prefix and return the rest of the string
1044 // in the provided pointer if it is not NULL, otherwise return false
1045 bool wxString::StartsWith(const wxChar *prefix, wxString *rest) const
1046 {
1047     wxASSERT_MSG( prefix, _T("invalid parameter in wxString::StartsWith") );
1048
1049     // first check if the beginning of the string matches the prefix: note
1050     // that we don't have to check that we don't run out of this string as
1051     // when we reach the terminating NUL, either prefix string ends too (and
1052     // then it's ok) or we break out of the loop because there is no match
1053     const wxChar *p = c_str();
1054     while ( *prefix )
1055     {
1056         if ( *prefix++ != *p++ )
1057         {
1058             // no match
1059             return false;
1060         }
1061     }
1062
1063     if ( rest )
1064     {
1065         // put the rest of the string into provided pointer
1066         *rest = p;
1067     }
1068
1069     return true;
1070 }
1071
1072
1073 // check that the string ends with suffix and return the rest of it in the
1074 // provided pointer if it is not NULL, otherwise return false
1075 bool wxString::EndsWith(const wxChar *suffix, wxString *rest) const
1076 {
1077     wxASSERT_MSG( suffix, _T("invalid parameter in wxString::EndssWith") );
1078
1079     int start = length() - wxStrlen(suffix);
1080
1081     if ( start < 0 || compare(start, npos, suffix) != 0 )
1082         return false;
1083
1084     if ( rest )
1085     {
1086         // put the rest of the string into provided pointer
1087         rest->assign(*this, 0, start);
1088     }
1089
1090     return true;
1091 }
1092
1093
1094 // extract nCount last (rightmost) characters
1095 wxString wxString::Right(size_t nCount) const
1096 {
1097   if ( nCount > length() )
1098     nCount = length();
1099
1100   wxString dest(*this, length() - nCount, nCount);
1101   if ( dest.length() != nCount ) {
1102     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1103   }
1104   return dest;
1105 }
1106
1107 // get all characters after the last occurence of ch
1108 // (returns the whole string if ch not found)
1109 wxString wxString::AfterLast(wxUniChar ch) const
1110 {
1111   wxString str;
1112   int iPos = Find(ch, true);
1113   if ( iPos == wxNOT_FOUND )
1114     str = *this;
1115   else
1116     str = wx_str() + iPos + 1;
1117
1118   return str;
1119 }
1120
1121 // extract nCount first (leftmost) characters
1122 wxString wxString::Left(size_t nCount) const
1123 {
1124   if ( nCount > length() )
1125     nCount = length();
1126
1127   wxString dest(*this, 0, nCount);
1128   if ( dest.length() != nCount ) {
1129     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1130   }
1131   return dest;
1132 }
1133
1134 // get all characters before the first occurence of ch
1135 // (returns the whole string if ch not found)
1136 wxString wxString::BeforeFirst(wxUniChar ch) const
1137 {
1138   int iPos = Find(ch);
1139   if ( iPos == wxNOT_FOUND ) iPos = length();
1140   return wxString(*this, 0, iPos);
1141 }
1142
1143 /// get all characters before the last occurence of ch
1144 /// (returns empty string if ch not found)
1145 wxString wxString::BeforeLast(wxUniChar ch) const
1146 {
1147   wxString str;
1148   int iPos = Find(ch, true);
1149   if ( iPos != wxNOT_FOUND && iPos != 0 )
1150     str = wxString(c_str(), iPos);
1151
1152   return str;
1153 }
1154
1155 /// get all characters after the first occurence of ch
1156 /// (returns empty string if ch not found)
1157 wxString wxString::AfterFirst(wxUniChar ch) const
1158 {
1159   wxString str;
1160   int iPos = Find(ch);
1161   if ( iPos != wxNOT_FOUND )
1162     str = wx_str() + iPos + 1;
1163
1164   return str;
1165 }
1166
1167 // replace first (or all) occurences of some substring with another one
1168 size_t wxString::Replace(const wxString& strOld,
1169                          const wxString& strNew, bool bReplaceAll)
1170 {
1171     // if we tried to replace an empty string we'd enter an infinite loop below
1172     wxCHECK_MSG( !strOld.empty(), 0,
1173                  _T("wxString::Replace(): invalid parameter") );
1174
1175     size_t uiCount = 0;   // count of replacements made
1176
1177     size_t uiOldLen = strOld.length();
1178     size_t uiNewLen = strNew.length();
1179
1180     size_t dwPos = 0;
1181
1182     while ( (*this)[dwPos] != wxT('\0') )
1183     {
1184         //DO NOT USE STRSTR HERE
1185         //this string can contain embedded null characters,
1186         //so strstr will function incorrectly
1187         dwPos = find(strOld, dwPos);
1188         if ( dwPos == npos )
1189             break;                  // exit the loop
1190         else
1191         {
1192             //replace this occurance of the old string with the new one
1193             replace(dwPos, uiOldLen, strNew, uiNewLen);
1194
1195             //move up pos past the string that was replaced
1196             dwPos += uiNewLen;
1197
1198             //increase replace count
1199             ++uiCount;
1200
1201             // stop now?
1202             if ( !bReplaceAll )
1203                 break;                  // exit the loop
1204         }
1205     }
1206
1207     return uiCount;
1208 }
1209
1210 bool wxString::IsAscii() const
1211 {
1212     for ( const_iterator i = begin(); i != end(); ++i )
1213     {
1214         if ( !(*i).IsAscii() )
1215             return false;
1216     }
1217
1218     return true;
1219 }
1220
1221 bool wxString::IsWord() const
1222 {
1223     for ( const_iterator i = begin(); i != end(); ++i )
1224     {
1225         if ( !wxIsalpha(*i) )
1226             return false;
1227     }
1228
1229     return true;
1230 }
1231
1232 bool wxString::IsNumber() const
1233 {
1234     if ( empty() )
1235         return true;
1236
1237     const_iterator i = begin();
1238
1239     if ( *i == _T('-') || *i == _T('+') )
1240         ++i;
1241
1242     for ( ; i != end(); ++i )
1243     {
1244         if ( !wxIsdigit(*i) )
1245             return false;
1246     }
1247
1248     return true;
1249 }
1250
1251 wxString wxString::Strip(stripType w) const
1252 {
1253     wxString s = *this;
1254     if ( w & leading ) s.Trim(false);
1255     if ( w & trailing ) s.Trim(true);
1256     return s;
1257 }
1258
1259 // ---------------------------------------------------------------------------
1260 // case conversion
1261 // ---------------------------------------------------------------------------
1262
1263 wxString& wxString::MakeUpper()
1264 {
1265   for ( iterator it = begin(), en = end(); it != en; ++it )
1266     *it = (wxChar)wxToupper(*it);
1267
1268   return *this;
1269 }
1270
1271 wxString& wxString::MakeLower()
1272 {
1273   for ( iterator it = begin(), en = end(); it != en; ++it )
1274     *it = (wxChar)wxTolower(*it);
1275
1276   return *this;
1277 }
1278
1279 // ---------------------------------------------------------------------------
1280 // trimming and padding
1281 // ---------------------------------------------------------------------------
1282
1283 // some compilers (VC++ 6.0 not to name them) return true for a call to
1284 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1285 // live with this by checking that the character is a 7 bit one - even if this
1286 // may fail to detect some spaces (I don't know if Unicode doesn't have
1287 // space-like symbols somewhere except in the first 128 chars), it is arguably
1288 // still better than trimming away accented letters
1289 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1290
1291 // trims spaces (in the sense of isspace) from left or right side
1292 wxString& wxString::Trim(bool bFromRight)
1293 {
1294     // first check if we're going to modify the string at all
1295     if ( !empty() &&
1296          (
1297           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1298           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1299          )
1300        )
1301     {
1302         if ( bFromRight )
1303         {
1304             // find last non-space character
1305             reverse_iterator psz = rbegin();
1306             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1307                 psz++;
1308
1309             // truncate at trailing space start
1310             erase(psz.base(), end());
1311         }
1312         else
1313         {
1314             // find first non-space character
1315             iterator psz = begin();
1316             while ( (psz != end()) && wxSafeIsspace(*psz) )
1317                 psz++;
1318
1319             // fix up data and length
1320             erase(begin(), psz);
1321         }
1322     }
1323
1324     return *this;
1325 }
1326
1327 // adds nCount characters chPad to the string from either side
1328 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1329 {
1330     wxString s(chPad, nCount);
1331
1332     if ( bFromRight )
1333         *this += s;
1334     else
1335     {
1336         s += *this;
1337         swap(s);
1338     }
1339
1340     return *this;
1341 }
1342
1343 // truncate the string
1344 wxString& wxString::Truncate(size_t uiLen)
1345 {
1346     if ( uiLen < length() )
1347     {
1348         erase(begin() + uiLen, end());
1349     }
1350     //else: nothing to do, string is already short enough
1351
1352     return *this;
1353 }
1354
1355 // ---------------------------------------------------------------------------
1356 // finding (return wxNOT_FOUND if not found and index otherwise)
1357 // ---------------------------------------------------------------------------
1358
1359 // find a character
1360 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1361 {
1362     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1363
1364     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1365 }
1366
1367 // ----------------------------------------------------------------------------
1368 // conversion to numbers
1369 // ----------------------------------------------------------------------------
1370
1371 // the implementation of all the functions below is exactly the same so factor
1372 // it out
1373
1374 template <typename T, typename F>
1375 bool wxStringToIntType(const wxChar *start,
1376                        T *val,
1377                        int base,
1378                        F func)
1379 {
1380     wxCHECK_MSG( val, false, _T("NULL output pointer") );
1381     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1382
1383 #ifndef __WXWINCE__
1384     errno = 0;
1385 #endif
1386
1387     wxChar *end;
1388     *val = (*func)(start, &end, base);
1389
1390     // return true only if scan was stopped by the terminating NUL and if the
1391     // string was not empty to start with and no under/overflow occurred
1392     return !*end && (end != start)
1393 #ifndef __WXWINCE__
1394         && (errno != ERANGE)
1395 #endif
1396     ;
1397 }
1398
1399 bool wxString::ToLong(long *val, int base) const
1400 {
1401     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtol);
1402 }
1403
1404 bool wxString::ToULong(unsigned long *val, int base) const
1405 {
1406     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoul);
1407 }
1408
1409 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1410 {
1411     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoll);
1412 }
1413
1414 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1415 {
1416     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoull);
1417 }
1418
1419 bool wxString::ToDouble(double *val) const
1420 {
1421     wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1422
1423 #ifndef __WXWINCE__
1424     errno = 0;
1425 #endif
1426
1427     const wxChar *start = c_str();
1428     wxChar *end;
1429     *val = wxStrtod(start, &end);
1430
1431     // return true only if scan was stopped by the terminating NUL and if the
1432     // string was not empty to start with and no under/overflow occurred
1433     return !*end && (end != start)
1434 #ifndef __WXWINCE__
1435         && (errno != ERANGE)
1436 #endif
1437     ;
1438 }
1439
1440 // ---------------------------------------------------------------------------
1441 // formatted output
1442 // ---------------------------------------------------------------------------
1443
1444 /* static */
1445 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1446 wxString wxStringPrintfMixinBase::DoFormat(const wxString& format, ...)
1447 #else
1448 wxString wxString::DoFormat(const wxString& format, ...)
1449 #endif
1450 {
1451     va_list argptr;
1452     va_start(argptr, format);
1453
1454     wxString s;
1455     s.PrintfV(format, argptr);
1456
1457     va_end(argptr);
1458
1459     return s;
1460 }
1461
1462 /* static */
1463 wxString wxString::FormatV(const wxString& format, va_list argptr)
1464 {
1465     wxString s;
1466     s.PrintfV(format, argptr);
1467     return s;
1468 }
1469
1470 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1471 int wxStringPrintfMixinBase::DoPrintf(const wxString& format, ...)
1472 #else
1473 int wxString::DoPrintf(const wxString& format, ...)
1474 #endif
1475 {
1476     va_list argptr;
1477     va_start(argptr, format);
1478
1479 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1480     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1481     // because it's the only cast that works safely for downcasting when
1482     // multiple inheritance is used:
1483     wxString *str = static_cast<wxString*>(this);
1484 #else
1485     wxString *str = this;
1486 #endif
1487
1488     int iLen = str->PrintfV(format, argptr);
1489
1490     va_end(argptr);
1491
1492     return iLen;
1493 }
1494
1495 #if wxUSE_UNICODE_UTF8
1496 template<typename BufferType>
1497 #else
1498 // we only need one version in non-UTF8 builds and at least two Windows
1499 // compilers have problems with this function template, so use just one
1500 // normal function here
1501 #endif
1502 static int DoStringPrintfV(wxString& str,
1503                            const wxString& format, va_list argptr)
1504 {
1505     int size = 1024;
1506
1507     for ( ;; )
1508     {
1509 #if wxUSE_UNICODE_UTF8
1510         BufferType tmp(str, size + 1);
1511         typename BufferType::CharType *buf = tmp;
1512 #else
1513         wxStringBuffer tmp(str, size + 1);
1514         wxChar *buf = tmp;
1515 #endif
1516
1517         if ( !buf )
1518         {
1519             // out of memory
1520             return -1;
1521         }
1522
1523         // wxVsnprintf() may modify the original arg pointer, so pass it
1524         // only a copy
1525         va_list argptrcopy;
1526         wxVaCopy(argptrcopy, argptr);
1527         int len = wxVsnprintf(buf, size, format, argptrcopy);
1528         va_end(argptrcopy);
1529
1530         // some implementations of vsnprintf() don't NUL terminate
1531         // the string if there is not enough space for it so
1532         // always do it manually
1533         buf[size] = _T('\0');
1534
1535         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1536         // total number of characters which would have been written if the
1537         // buffer were large enough (newer standards such as Unix98)
1538         if ( len < 0 )
1539         {
1540 #if wxUSE_WXVSNPRINTF
1541             // we know that our own implementation of wxVsnprintf() returns -1
1542             // only for a format error - thus there's something wrong with
1543             // the user's format string
1544             return -1;
1545 #else // assume that system version only returns error if not enough space
1546             // still not enough, as we don't know how much we need, double the
1547             // current size of the buffer
1548             size *= 2;
1549 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1550         }
1551         else if ( len >= size )
1552         {
1553 #if wxUSE_WXVSNPRINTF
1554             // we know that our own implementation of wxVsnprintf() returns
1555             // size+1 when there's not enough space but that's not the size
1556             // of the required buffer!
1557             size *= 2;      // so we just double the current size of the buffer
1558 #else
1559             // some vsnprintf() implementations NUL-terminate the buffer and
1560             // some don't in len == size case, to be safe always add 1
1561             size = len + 1;
1562 #endif
1563         }
1564         else // ok, there was enough space
1565         {
1566             break;
1567         }
1568     }
1569
1570     // we could have overshot
1571     str.Shrink();
1572
1573     return str.length();
1574 }
1575
1576 int wxString::PrintfV(const wxString& format, va_list argptr)
1577 {
1578     va_list argcopy;
1579     wxVaCopy(argcopy, argptr);
1580
1581 #if wxUSE_UNICODE_UTF8
1582     #if wxUSE_STL_BASED_WXSTRING
1583         typedef wxStringTypeBuffer<char> Utf8Buffer;
1584     #else
1585         typedef wxImplStringBuffer Utf8Buffer;
1586     #endif
1587 #endif
1588
1589 #if wxUSE_UTF8_LOCALE_ONLY
1590     return DoStringPrintfV<Utf8Buffer>(*this, format, argcopy);
1591 #else
1592     #if wxUSE_UNICODE_UTF8
1593     if ( wxLocaleIsUtf8 )
1594         return DoStringPrintfV<Utf8Buffer>(*this, format, argcopy);
1595     else
1596         // wxChar* version
1597         return DoStringPrintfV<wxStringBuffer>(*this, format, argcopy);
1598     #else
1599         return DoStringPrintfV(*this, format, argcopy);
1600     #endif // UTF8/WCHAR
1601 #endif
1602 }
1603
1604 // ----------------------------------------------------------------------------
1605 // misc other operations
1606 // ----------------------------------------------------------------------------
1607
1608 // returns true if the string matches the pattern which may contain '*' and
1609 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1610 // of them)
1611 bool wxString::Matches(const wxString& mask) const
1612 {
1613     // I disable this code as it doesn't seem to be faster (in fact, it seems
1614     // to be much slower) than the old, hand-written code below and using it
1615     // here requires always linking with libregex even if the user code doesn't
1616     // use it
1617 #if 0 // wxUSE_REGEX
1618     // first translate the shell-like mask into a regex
1619     wxString pattern;
1620     pattern.reserve(wxStrlen(pszMask));
1621
1622     pattern += _T('^');
1623     while ( *pszMask )
1624     {
1625         switch ( *pszMask )
1626         {
1627             case _T('?'):
1628                 pattern += _T('.');
1629                 break;
1630
1631             case _T('*'):
1632                 pattern += _T(".*");
1633                 break;
1634
1635             case _T('^'):
1636             case _T('.'):
1637             case _T('$'):
1638             case _T('('):
1639             case _T(')'):
1640             case _T('|'):
1641             case _T('+'):
1642             case _T('\\'):
1643                 // these characters are special in a RE, quote them
1644                 // (however note that we don't quote '[' and ']' to allow
1645                 // using them for Unix shell like matching)
1646                 pattern += _T('\\');
1647                 // fall through
1648
1649             default:
1650                 pattern += *pszMask;
1651         }
1652
1653         pszMask++;
1654     }
1655     pattern += _T('$');
1656
1657     // and now use it
1658     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1659 #else // !wxUSE_REGEX
1660   // TODO: this is, of course, awfully inefficient...
1661
1662   // FIXME-UTF8: implement using iterators, remove #if
1663 #if wxUSE_UNICODE_UTF8
1664   wxWCharBuffer maskBuf = mask.wc_str();
1665   wxWCharBuffer txtBuf = wc_str();
1666   const wxChar *pszMask = maskBuf.data();
1667   const wxChar *pszTxt = txtBuf.data();
1668 #else
1669   const wxChar *pszMask = mask.wx_str();
1670   // the char currently being checked
1671   const wxChar *pszTxt = wx_str();
1672 #endif
1673
1674   // the last location where '*' matched
1675   const wxChar *pszLastStarInText = NULL;
1676   const wxChar *pszLastStarInMask = NULL;
1677
1678 match:
1679   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1680     switch ( *pszMask ) {
1681       case wxT('?'):
1682         if ( *pszTxt == wxT('\0') )
1683           return false;
1684
1685         // pszTxt and pszMask will be incremented in the loop statement
1686
1687         break;
1688
1689       case wxT('*'):
1690         {
1691           // remember where we started to be able to backtrack later
1692           pszLastStarInText = pszTxt;
1693           pszLastStarInMask = pszMask;
1694
1695           // ignore special chars immediately following this one
1696           // (should this be an error?)
1697           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1698             pszMask++;
1699
1700           // if there is nothing more, match
1701           if ( *pszMask == wxT('\0') )
1702             return true;
1703
1704           // are there any other metacharacters in the mask?
1705           size_t uiLenMask;
1706           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1707
1708           if ( pEndMask != NULL ) {
1709             // we have to match the string between two metachars
1710             uiLenMask = pEndMask - pszMask;
1711           }
1712           else {
1713             // we have to match the remainder of the string
1714             uiLenMask = wxStrlen(pszMask);
1715           }
1716
1717           wxString strToMatch(pszMask, uiLenMask);
1718           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1719           if ( pMatch == NULL )
1720             return false;
1721
1722           // -1 to compensate "++" in the loop
1723           pszTxt = pMatch + uiLenMask - 1;
1724           pszMask += uiLenMask - 1;
1725         }
1726         break;
1727
1728       default:
1729         if ( *pszMask != *pszTxt )
1730           return false;
1731         break;
1732     }
1733   }
1734
1735   // match only if nothing left
1736   if ( *pszTxt == wxT('\0') )
1737     return true;
1738
1739   // if we failed to match, backtrack if we can
1740   if ( pszLastStarInText ) {
1741     pszTxt = pszLastStarInText + 1;
1742     pszMask = pszLastStarInMask;
1743
1744     pszLastStarInText = NULL;
1745
1746     // don't bother resetting pszLastStarInMask, it's unnecessary
1747
1748     goto match;
1749   }
1750
1751   return false;
1752 #endif // wxUSE_REGEX/!wxUSE_REGEX
1753 }
1754
1755 // Count the number of chars
1756 int wxString::Freq(wxUniChar ch) const
1757 {
1758     int count = 0;
1759     for ( const_iterator i = begin(); i != end(); ++i )
1760     {
1761         if ( *i == ch )
1762             count ++;
1763     }
1764     return count;
1765 }
1766
1767 // convert to upper case, return the copy of the string
1768 wxString wxString::Upper() const
1769 { wxString s(*this); return s.MakeUpper(); }
1770
1771 // convert to lower case, return the copy of the string
1772 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }