src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #ifdef __SALFORDC__
  39     #include <clib.h>
  40 #endif
  41
  42 #include "wx/hashmap.h"
  43
  44 // string handling functions used by wxString:
  45 #if wxUSE_UNICODE_UTF8
  46     #define wxStringMemcpy   memcpy
  47     #define wxStringMemcmp   memcmp
  48     #define wxStringMemchr   memchr
  49     #define wxStringStrlen   strlen
  50 #else
  51     #define wxStringMemcpy   wxTmemcpy
  52     #define wxStringMemcmp   wxTmemcmp
  53     #define wxStringMemchr   wxTmemchr
  54     #define wxStringStrlen   wxStrlen
  55 #endif
  56
  57
  58 // ---------------------------------------------------------------------------
  59 // static class variables definition
  60 // ---------------------------------------------------------------------------
  61
  62 //According to STL _must_ be a -1 size_t
  63 const size_t wxString::npos = (size_t) -1;
  64
  65 // ----------------------------------------------------------------------------
  66 // global functions
  67 // ----------------------------------------------------------------------------
  68
  69 #if wxUSE_STD_IOSTREAM
  70
  71 #include <iostream>
  72
  73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
  74 {
  75 // FIXME-UTF8: always, not only if wxUSE_UNICODE
  76 #if wxUSE_UNICODE && !defined(__BORLANDC__)
  77     return os << (const wchar_t*)str.AsWCharBuf();
  78 #else
  79     return os << (const char*)str.AsCharBuf();
  80 #endif
  81 }
  82
  83 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
  84 {
  85     return os << str.c_str();
  86 }
  87
  88 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
  89 {
  90     return os << str.data();
  91 }
  92
  93 #ifndef __BORLANDC__
  94 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
  95 {
  96     return os << str.data();
  97 }
  98 #endif
  99
 100 #endif // wxUSE_STD_IOSTREAM
 101
 102 // ===========================================================================
 103 // wxString class core
 104 // ===========================================================================
 105
 106 #if wxUSE_UNICODE_UTF8
 107
 108 void wxString::PosLenToImpl(size_t pos, size_t len,
 109                             size_t *implPos, size_t *implLen) const
 110 {
 111     if ( pos == npos )
 112         *implPos = npos;
 113     else
 114     {
 115         const_iterator i = begin() + pos;
 116         *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
 117         if ( len == npos )
 118             *implLen = npos;
 119         else
 120         {
 121             // too large length is interpreted as "to the end of the string"
 122             // FIXME-UTF8: verify this is the case in std::string, assert
 123             // otherwise
 124             if ( pos + len > length() )
 125                 len = length() - pos;
 126
 127             *implLen = (i + len).impl() - i.impl();
 128         }
 129     }
 130 }
 131
 132 #endif // wxUSE_UNICODE_UTF8
 133
 134 // ----------------------------------------------------------------------------
 135 // wxCStrData converted strings caching
 136 // ----------------------------------------------------------------------------
 137
 138 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 139 //             string objects; re-enable after fixing this bug and benchmarking
 140 //             performance to see if using a hash is a good idea at all
 141 #if 0
 142
 143 // For backward compatibility reasons, it must be possible to assign the value
 144 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 145 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 146 // because the memory would be freed immediately, but it has to be valid as long
 147 // as the string is not modified, so that code like this still works:
 148 //
 149 // const wxChar *s = str.c_str();
 150 // while ( s ) { ... }
 151
 152 // FIXME-UTF8: not thread safe!
 153 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 154 //             destroyed, but we should do it when the string is modified, to
 155 //             keep memory usage down
 156 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 157 //             invalidated the cache on every change, we could keep the previous
 158 //             conversion
 159 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 160 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 161
 162 template<typename T>
 163 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 164 {
 165     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 166     if ( i != hash.end() )
 167     {
 168         free(i->second);
 169         hash.erase(i);
 170     }
 171 }
 172
 173 #if wxUSE_UNICODE
 174 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 175 //     so we have to use wxString* here and const-cast when used
 176 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 177                     wxStringCharConversionCache);
 178 static wxStringCharConversionCache gs_stringsCharCache;
 179
 180 const char* wxCStrData::AsChar() const
 181 {
 182     // remove previously cache value, if any (see FIXMEs above):
 183     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 184
 185     // convert the string and keep it:
 186     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 187         m_str->mb_str().release();
 188
 189     return s + m_offset;
 190 }
 191 #endif // wxUSE_UNICODE
 192
 193 #if !wxUSE_UNICODE_WCHAR
 194 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 195                     wxStringWCharConversionCache);
 196 static wxStringWCharConversionCache gs_stringsWCharCache;
 197
 198 const wchar_t* wxCStrData::AsWChar() const
 199 {
 200     // remove previously cache value, if any (see FIXMEs above):
 201     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 202
 203     // convert the string and keep it:
 204     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 205         m_str->wc_str().release();
 206
 207     return s + m_offset;
 208 }
 209 #endif // !wxUSE_UNICODE_WCHAR
 210
 211 wxString::~wxString()
 212 {
 213 #if wxUSE_UNICODE
 214     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 215     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 216 #endif
 217 #if !wxUSE_UNICODE_WCHAR
 218     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 219 #endif
 220 }
 221 #endif
 222
 223 #if wxUSE_UNICODE
 224 const char* wxCStrData::AsChar() const
 225 {
 226     wxString *str = wxConstCast(m_str, wxString);
 227
 228     // convert the string:
 229     wxCharBuffer buf(str->mb_str());
 230
 231     // FIXME-UTF8: do the conversion in-place in the existing buffer
 232     if ( str->m_convertedToChar &&
 233          strlen(buf) == strlen(str->m_convertedToChar) )
 234     {
 235         // keep the same buffer for as long as possible, so that several calls
 236         // to c_str() in a row still work:
 237         strcpy(str->m_convertedToChar, buf);
 238     }
 239     else
 240     {
 241         str->m_convertedToChar = buf.release();
 242     }
 243
 244     // and keep it:
 245     return str->m_convertedToChar + m_offset;
 246 }
 247 #endif // wxUSE_UNICODE
 248
 249 #if !wxUSE_UNICODE_WCHAR
 250 const wchar_t* wxCStrData::AsWChar() const
 251 {
 252     wxString *str = wxConstCast(m_str, wxString);
 253
 254     // convert the string:
 255     wxWCharBuffer buf(str->wc_str());
 256
 257     // FIXME-UTF8: do the conversion in-place in the existing buffer
 258     if ( str->m_convertedToWChar &&
 259          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 260     {
 261         // keep the same buffer for as long as possible, so that several calls
 262         // to c_str() in a row still work:
 263         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 264     }
 265     else
 266     {
 267         str->m_convertedToWChar = buf.release();
 268     }
 269
 270     // and keep it:
 271     return str->m_convertedToWChar + m_offset;
 272 }
 273 #endif // !wxUSE_UNICODE_WCHAR
 274
 275 // ===========================================================================
 276 // wxString class core
 277 // ===========================================================================
 278
 279 // ---------------------------------------------------------------------------
 280 // construction and conversion
 281 // ---------------------------------------------------------------------------
 282
 283 #if wxUSE_UNICODE_WCHAR
 284 /* static */
 285 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 286                                                const wxMBConv& conv)
 287 {
 288     // anything to do?
 289     if ( !psz || nLength == 0 )
 290         return SubstrBufFromMB(L"", 0);
 291
 292     if ( nLength == npos )
 293         nLength = wxNO_LEN;
 294
 295     size_t wcLen;
 296     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 297     if ( !wcLen )
 298         return SubstrBufFromMB(_T(""), 0);
 299     else
 300         return SubstrBufFromMB(wcBuf, wcLen);
 301 }
 302 #endif // wxUSE_UNICODE_WCHAR
 303
 304 #if wxUSE_UNICODE_UTF8
 305 /* static */
 306 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 307                                                const wxMBConv& conv)
 308 {
 309     // FIXME-UTF8: return as-is without copying under UTF8 locale, return
 310     //             converted string under other locales - needs wxCharBuffer
 311     //             changes
 312
 313     // anything to do?
 314     if ( !psz || nLength == 0 )
 315         return SubstrBufFromMB("", 0);
 316
 317     if ( nLength == npos )
 318         nLength = wxNO_LEN;
 319
 320     // first convert to wide string:
 321     size_t wcLen;
 322     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 323     if ( !wcLen )
 324         return SubstrBufFromMB("", 0);
 325
 326     // and then to UTF-8:
 327     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxConvUTF8));
 328     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 329     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 330
 331     return buf;
 332 }
 333 #endif // wxUSE_UNICODE_UTF8
 334
 335 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 336 /* static */
 337 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 338                                                const wxMBConv& conv)
 339 {
 340     // anything to do?
 341     if ( !pwz || nLength == 0 )
 342         return SubstrBufFromWC("", 0);
 343
 344     if ( nLength == npos )
 345         nLength = wxNO_LEN;
 346
 347     size_t mbLen;
 348     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 349     if ( !mbLen )
 350         return SubstrBufFromWC("", 0);
 351     else
 352         return SubstrBufFromWC(mbBuf, mbLen);
 353 }
 354 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 355
 356
 357 #if wxUSE_UNICODE_WCHAR
 358
 359 //Convert wxString in Unicode mode to a multi-byte string
 360 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 361 {
 362     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 363 }
 364
 365 #elif wxUSE_UNICODE_UTF8
 366
 367 const wxWCharBuffer wxString::wc_str() const
 368 {
 369     return wxConvUTF8.cMB2WC(m_impl.c_str(),
 370                              m_impl.length() + 1 /* size, not length */,
 371                              NULL);
 372 }
 373
 374 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 375 {
 376     // FIXME-UTF8: optimize the case when conv==wxConvUTF8 or wxConvLibc
 377     //             under UTF8 locale
 378     // FIXME-UTF8: use wc_str() here once we have buffers with length
 379
 380     size_t wcLen;
 381     wxWCharBuffer wcBuf(
 382             wxConvUTF8.cMB2WC(m_impl.c_str(),
 383                               m_impl.length() + 1 /* size, not length */,
 384                               &wcLen));
 385     if ( !wcLen )
 386         return wxCharBuffer("");
 387
 388     return conv.cWC2MB(wcBuf, wcLen, NULL);
 389 }
 390
 391 #else // ANSI
 392
 393 //Converts this string to a wide character string if unicode
 394 //mode is not enabled and wxUSE_WCHAR_T is enabled
 395 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 396 {
 397     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 398 }
 399
 400 #endif // Unicode/ANSI
 401
 402 // shrink to minimal size (releasing extra memory)
 403 bool wxString::Shrink()
 404 {
 405   wxString tmp(begin(), end());
 406   swap(tmp);
 407   return tmp.length() == length();
 408 }
 409
 410 // deprecated compatibility code:
 411 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 412 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 413 {
 414     return DoGetWriteBuf(nLen);
 415 }
 416
 417 void wxString::UngetWriteBuf()
 418 {
 419     DoUngetWriteBuf();
 420 }
 421
 422 void wxString::UngetWriteBuf(size_t nLen)
 423 {
 424     DoUngetWriteBuf(nLen);
 425 }
 426 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 427
 428
 429 // ---------------------------------------------------------------------------
 430 // data access
 431 // ---------------------------------------------------------------------------
 432
 433 // all functions are inline in string.h
 434
 435 // ---------------------------------------------------------------------------
 436 // concatenation operators
 437 // ---------------------------------------------------------------------------
 438
 439 /*
 440  * concatenation functions come in 5 flavours:
 441  *  string + string
 442  *  char   + string      and      string + char
 443  *  C str  + string      and      string + C str
 444  */
 445
 446 wxString operator+(const wxString& str1, const wxString& str2)
 447 {
 448 #if !wxUSE_STL_BASED_WXSTRING
 449     wxASSERT( str1.IsValid() );
 450     wxASSERT( str2.IsValid() );
 451 #endif
 452
 453     wxString s = str1;
 454     s += str2;
 455
 456     return s;
 457 }
 458
 459 wxString operator+(const wxString& str, wxUniChar ch)
 460 {
 461 #if !wxUSE_STL_BASED_WXSTRING
 462     wxASSERT( str.IsValid() );
 463 #endif
 464
 465     wxString s = str;
 466     s += ch;
 467
 468     return s;
 469 }
 470
 471 wxString operator+(wxUniChar ch, const wxString& str)
 472 {
 473 #if !wxUSE_STL_BASED_WXSTRING
 474     wxASSERT( str.IsValid() );
 475 #endif
 476
 477     wxString s = ch;
 478     s += str;
 479
 480     return s;
 481 }
 482
 483 wxString operator+(const wxString& str, const char *psz)
 484 {
 485 #if !wxUSE_STL_BASED_WXSTRING
 486     wxASSERT( str.IsValid() );
 487 #endif
 488
 489     wxString s;
 490     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 491         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 492     }
 493     s += str;
 494     s += psz;
 495
 496     return s;
 497 }
 498
 499 wxString operator+(const wxString& str, const wchar_t *pwz)
 500 {
 501 #if !wxUSE_STL_BASED_WXSTRING
 502     wxASSERT( str.IsValid() );
 503 #endif
 504
 505     wxString s;
 506     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 507         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 508     }
 509     s += str;
 510     s += pwz;
 511
 512     return s;
 513 }
 514
 515 wxString operator+(const char *psz, const wxString& str)
 516 {
 517 #if !wxUSE_STL_BASED_WXSTRING
 518     wxASSERT( str.IsValid() );
 519 #endif
 520
 521     wxString s;
 522     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 523         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 524     }
 525     s = psz;
 526     s += str;
 527
 528     return s;
 529 }
 530
 531 wxString operator+(const wchar_t *pwz, const wxString& str)
 532 {
 533 #if !wxUSE_STL_BASED_WXSTRING
 534     wxASSERT( str.IsValid() );
 535 #endif
 536
 537     wxString s;
 538     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 539         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 540     }
 541     s = pwz;
 542     s += str;
 543
 544     return s;
 545 }
 546
 547 // ---------------------------------------------------------------------------
 548 // string comparison
 549 // ---------------------------------------------------------------------------
 550
 551 #ifdef HAVE_STD_STRING_COMPARE
 552
 553 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 554 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 555 //     sort strings in characters code point order by sorting the byte sequence
 556 //     in byte values order (i.e. what strcmp() and memcmp() do).
 557
 558 int wxString::compare(const wxString& str) const
 559 {
 560     return m_impl.compare(str.m_impl);
 561 }
 562
 563 int wxString::compare(size_t nStart, size_t nLen,
 564                       const wxString& str) const
 565 {
 566     size_t pos, len;
 567     PosLenToImpl(nStart, nLen, &pos, &len);
 568     return m_impl.compare(pos, len, str.m_impl);
 569 }
 570
 571 int wxString::compare(size_t nStart, size_t nLen,
 572                       const wxString& str,
 573                       size_t nStart2, size_t nLen2) const
 574 {
 575     size_t pos, len;
 576     PosLenToImpl(nStart, nLen, &pos, &len);
 577
 578     size_t pos2, len2;
 579     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 580
 581     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 582 }
 583
 584 int wxString::compare(const char* sz) const
 585 {
 586     return m_impl.compare(ImplStr(sz));
 587 }
 588
 589 int wxString::compare(const wchar_t* sz) const
 590 {
 591     return m_impl.compare(ImplStr(sz));
 592 }
 593
 594 int wxString::compare(size_t nStart, size_t nLen,
 595                       const char* sz, size_t nCount) const
 596 {
 597     size_t pos, len;
 598     PosLenToImpl(nStart, nLen, &pos, &len);
 599
 600     SubstrBufFromMB str(ImplStr(sz, nCount));
 601
 602     return m_impl.compare(pos, len, str.data, str.len);
 603 }
 604
 605 int wxString::compare(size_t nStart, size_t nLen,
 606                       const wchar_t* sz, size_t nCount) const
 607 {
 608     size_t pos, len;
 609     PosLenToImpl(nStart, nLen, &pos, &len);
 610
 611     SubstrBufFromWC str(ImplStr(sz, nCount));
 612
 613     return m_impl.compare(pos, len, str.data, str.len);
 614 }
 615
 616 #else // !HAVE_STD_STRING_COMPARE
 617
 618 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 619                           const wxStringCharType* s2, size_t l2)
 620 {
 621     if( l1 == l2 )
 622         return wxStringMemcmp(s1, s2, l1);
 623     else if( l1 < l2 )
 624     {
 625         int ret = wxStringMemcmp(s1, s2, l1);
 626         return ret == 0 ? -1 : ret;
 627     }
 628     else
 629     {
 630         int ret = wxStringMemcmp(s1, s2, l2);
 631         return ret == 0 ? +1 : ret;
 632     }
 633 }
 634
 635 int wxString::compare(const wxString& str) const
 636 {
 637     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 638                      str.m_impl.data(), str.m_impl.length());
 639 }
 640
 641 int wxString::compare(size_t nStart, size_t nLen,
 642                       const wxString& str) const
 643 {
 644     wxASSERT(nStart <= length());
 645     size_type strLen = length() - nStart;
 646     nLen = strLen < nLen ? strLen : nLen;
 647
 648     size_t pos, len;
 649     PosLenToImpl(nStart, nLen, &pos, &len);
 650
 651     return ::wxDoCmp(m_impl.data() + pos,  len,
 652                      str.m_impl.data(), str.m_impl.length());
 653 }
 654
 655 int wxString::compare(size_t nStart, size_t nLen,
 656                       const wxString& str,
 657                       size_t nStart2, size_t nLen2) const
 658 {
 659     wxASSERT(nStart <= length());
 660     wxASSERT(nStart2 <= str.length());
 661     size_type strLen  =     length() - nStart,
 662               strLen2 = str.length() - nStart2;
 663     nLen  = strLen  < nLen  ? strLen  : nLen;
 664     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 665
 666     size_t pos, len;
 667     PosLenToImpl(nStart, nLen, &pos, &len);
 668     size_t pos2, len2;
 669     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 670
 671     return ::wxDoCmp(m_impl.data() + pos, len,
 672                      str.m_impl.data() + pos2, len2);
 673 }
 674
 675 int wxString::compare(const char* sz) const
 676 {
 677     SubstrBufFromMB str(ImplStr(sz, npos));
 678     if ( str.len == npos )
 679         str.len = wxStringStrlen(str.data);
 680     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 681 }
 682
 683 int wxString::compare(const wchar_t* sz) const
 684 {
 685     SubstrBufFromWC str(ImplStr(sz, npos));
 686     if ( str.len == npos )
 687         str.len = wxStringStrlen(str.data);
 688     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 689 }
 690
 691 int wxString::compare(size_t nStart, size_t nLen,
 692                       const char* sz, size_t nCount) const
 693 {
 694     wxASSERT(nStart <= length());
 695     size_type strLen = length() - nStart;
 696     nLen = strLen < nLen ? strLen : nLen;
 697
 698     size_t pos, len;
 699     PosLenToImpl(nStart, nLen, &pos, &len);
 700
 701     SubstrBufFromMB str(ImplStr(sz, nCount));
 702     if ( str.len == npos )
 703         str.len = wxStringStrlen(str.data);
 704
 705     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 706 }
 707
 708 int wxString::compare(size_t nStart, size_t nLen,
 709                       const wchar_t* sz, size_t nCount) const
 710 {
 711     wxASSERT(nStart <= length());
 712     size_type strLen = length() - nStart;
 713     nLen = strLen < nLen ? strLen : nLen;
 714
 715     size_t pos, len;
 716     PosLenToImpl(nStart, nLen, &pos, &len);
 717
 718     SubstrBufFromWC str(ImplStr(sz, nCount));
 719     if ( str.len == npos )
 720         str.len = wxStringStrlen(str.data);
 721
 722     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 723 }
 724
 725 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 726
 727
 728 // ---------------------------------------------------------------------------
 729 // find_{first,last}_[not]_of functions
 730 // ---------------------------------------------------------------------------
 731
 732 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 733
 734 // NB: All these functions are implemented  with the argument being wxChar*,
 735 //     i.e. widechar string in any Unicode build, even though native string
 736 //     representation is char* in the UTF-8 build. This is because we couldn't
 737 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 738
 739 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 740 {
 741     return find_first_of(sz, nStart, wxStrlen(sz));
 742 }
 743
 744 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 745 {
 746     return find_first_not_of(sz, nStart, wxStrlen(sz));
 747 }
 748
 749 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 750 {
 751     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 752
 753     size_t idx = nStart;
 754     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 755     {
 756         if ( wxTmemchr(sz, *i, n) )
 757             return idx;
 758     }
 759
 760     return npos;
 761 }
 762
 763 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 764 {
 765     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 766
 767     size_t idx = nStart;
 768     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 769     {
 770         if ( !wxTmemchr(sz, *i, n) )
 771             return idx;
 772     }
 773
 774     return npos;
 775 }
 776
 777
 778 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 779 {
 780     return find_last_of(sz, nStart, wxStrlen(sz));
 781 }
 782
 783 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 784 {
 785     return find_last_not_of(sz, nStart, wxStrlen(sz));
 786 }
 787
 788 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 789 {
 790     size_t len = length();
 791
 792     if ( nStart == npos )
 793     {
 794         nStart = len - 1;
 795     }
 796     else
 797     {
 798         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 799     }
 800
 801     size_t idx = nStart;
 802     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 803           i != rend(); --idx, ++i )
 804     {
 805         if ( wxTmemchr(sz, *i, n) )
 806             return idx;
 807     }
 808
 809     return npos;
 810 }
 811
 812 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 813 {
 814     size_t len = length();
 815
 816     if ( nStart == npos )
 817     {
 818         nStart = len - 1;
 819     }
 820     else
 821     {
 822         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 823     }
 824
 825     size_t idx = nStart;
 826     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 827           i != rend(); --idx, ++i )
 828     {
 829         if ( !wxTmemchr(sz, *i, n) )
 830             return idx;
 831     }
 832
 833     return npos;
 834 }
 835
 836 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
 837 {
 838     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 839
 840     size_t idx = nStart;
 841     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 842     {
 843         if ( *i != ch )
 844             return idx;
 845     }
 846
 847     return npos;
 848 }
 849
 850 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
 851 {
 852     size_t len = length();
 853
 854     if ( nStart == npos )
 855     {
 856         nStart = len - 1;
 857     }
 858     else
 859     {
 860         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 861     }
 862
 863     size_t idx = nStart;
 864     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 865           i != rend(); --idx, ++i )
 866     {
 867         if ( *i != ch )
 868             return idx;
 869     }
 870
 871     return npos;
 872 }
 873
 874 // the functions above were implemented for wchar_t* arguments in Unicode
 875 // build and char* in ANSI build; below are implementations for the other
 876 // version:
 877 #if wxUSE_UNICODE
 878     #define wxOtherCharType char
 879     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
 880 #else
 881     #define wxOtherCharType wchar_t
 882     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
 883 #endif
 884
 885 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
 886     { return find_first_of(STRCONV(sz), nStart); }
 887
 888 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
 889                                size_t n) const
 890     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
 891 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
 892     { return find_last_of(STRCONV(sz), nStart); }
 893 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
 894                               size_t n) const
 895     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
 896 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
 897     { return find_first_not_of(STRCONV(sz), nStart); }
 898 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
 899                                    size_t n) const
 900     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
 901 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
 902     { return find_last_not_of(STRCONV(sz), nStart); }
 903 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
 904                                   size_t n) const
 905     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
 906
 907 #undef wxOtherCharType
 908 #undef STRCONV
 909
 910 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 911
 912 // ===========================================================================
 913 // other common string functions
 914 // ===========================================================================
 915
 916 int wxString::CmpNoCase(const wxString& s) const
 917 {
 918     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
 919
 920     size_t idx = 0;
 921     const_iterator i1 = begin();
 922     const_iterator end1 = end();
 923     const_iterator i2 = s.begin();
 924     const_iterator end2 = s.end();
 925
 926     for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
 927     {
 928         wxUniChar lower1 = (wxChar)wxTolower(*i1);
 929         wxUniChar lower2 = (wxChar)wxTolower(*i2);
 930         if ( lower1 != lower2 )
 931             return lower1 < lower2 ? -1 : 1;
 932     }
 933
 934     size_t len1 = length();
 935     size_t len2 = s.length();
 936
 937     if ( len1 < len2 )
 938         return -1;
 939     else if ( len1 > len2 )
 940         return 1;
 941     return 0;
 942 }
 943
 944
 945 #if wxUSE_UNICODE
 946
 947 #ifdef __MWERKS__
 948 #ifndef __SCHAR_MAX__
 949 #define __SCHAR_MAX__ 127
 950 #endif
 951 #endif
 952
 953 wxString wxString::FromAscii(const char *ascii)
 954 {
 955     if (!ascii)
 956        return wxEmptyString;
 957
 958     size_t len = strlen(ascii);
 959     wxString res;
 960
 961     if ( len )
 962     {
 963         wxImplStringBuffer buf(res, len);
 964         wxStringCharType *dest = buf;
 965
 966         for ( ;; )
 967         {
 968             unsigned char c = (unsigned char)*ascii++;
 969             wxASSERT_MSG( c < 0x80,
 970                           _T("Non-ASCII value passed to FromAscii().") );
 971
 972             *dest++ = (wchar_t)c;
 973
 974             if ( c == '\0' )
 975                 break;
 976         }
 977     }
 978
 979     return res;
 980 }
 981
 982 wxString wxString::FromAscii(const char ascii)
 983 {
 984     // What do we do with '\0' ?
 985
 986     unsigned char c = (unsigned char)ascii;
 987
 988     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
 989
 990     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
 991     return wxString(wxUniChar((wchar_t)c));
 992 }
 993
 994 const wxCharBuffer wxString::ToAscii() const
 995 {
 996     // this will allocate enough space for the terminating NUL too
 997     wxCharBuffer buffer(length());
 998     char *dest = buffer.data();
 999
1000     for ( const_iterator i = begin(); i != end(); ++i )
1001     {
1002         wxUniChar c(*i);
1003         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1004         *dest++ = c.IsAscii() ? (char)c : '_';
1005
1006         // the output string can't have embedded NULs anyhow, so we can safely
1007         // stop at first of them even if we do have any
1008         if ( !c )
1009             break;
1010     }
1011
1012     return buffer;
1013 }
1014
1015 #endif // wxUSE_UNICODE
1016
1017 // extract string of length nCount starting at nFirst
1018 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1019 {
1020     size_t nLen = length();
1021
1022     // default value of nCount is npos and means "till the end"
1023     if ( nCount == npos )
1024     {
1025         nCount = nLen - nFirst;
1026     }
1027
1028     // out-of-bounds requests return sensible things
1029     if ( nFirst + nCount > nLen )
1030     {
1031         nCount = nLen - nFirst;
1032     }
1033
1034     if ( nFirst > nLen )
1035     {
1036         // AllocCopy() will return empty string
1037         return wxEmptyString;
1038     }
1039
1040     wxString dest(*this, nFirst, nCount);
1041     if ( dest.length() != nCount )
1042     {
1043         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1044     }
1045
1046     return dest;
1047 }
1048
1049 // check that the string starts with prefix and return the rest of the string
1050 // in the provided pointer if it is not NULL, otherwise return false
1051 bool wxString::StartsWith(const wxChar *prefix, wxString *rest) const
1052 {
1053     wxASSERT_MSG( prefix, _T("invalid parameter in wxString::StartsWith") );
1054
1055     // first check if the beginning of the string matches the prefix: note
1056     // that we don't have to check that we don't run out of this string as
1057     // when we reach the terminating NUL, either prefix string ends too (and
1058     // then it's ok) or we break out of the loop because there is no match
1059     const wxChar *p = c_str();
1060     while ( *prefix )
1061     {
1062         if ( *prefix++ != *p++ )
1063         {
1064             // no match
1065             return false;
1066         }
1067     }
1068
1069     if ( rest )
1070     {
1071         // put the rest of the string into provided pointer
1072         *rest = p;
1073     }
1074
1075     return true;
1076 }
1077
1078
1079 // check that the string ends with suffix and return the rest of it in the
1080 // provided pointer if it is not NULL, otherwise return false
1081 bool wxString::EndsWith(const wxChar *suffix, wxString *rest) const
1082 {
1083     wxASSERT_MSG( suffix, _T("invalid parameter in wxString::EndssWith") );
1084
1085     int start = length() - wxStrlen(suffix);
1086
1087     if ( start < 0 || compare(start, npos, suffix) != 0 )
1088         return false;
1089
1090     if ( rest )
1091     {
1092         // put the rest of the string into provided pointer
1093         rest->assign(*this, 0, start);
1094     }
1095
1096     return true;
1097 }
1098
1099
1100 // extract nCount last (rightmost) characters
1101 wxString wxString::Right(size_t nCount) const
1102 {
1103   if ( nCount > length() )
1104     nCount = length();
1105
1106   wxString dest(*this, length() - nCount, nCount);
1107   if ( dest.length() != nCount ) {
1108     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1109   }
1110   return dest;
1111 }
1112
1113 // get all characters after the last occurence of ch
1114 // (returns the whole string if ch not found)
1115 wxString wxString::AfterLast(wxUniChar ch) const
1116 {
1117   wxString str;
1118   int iPos = Find(ch, true);
1119   if ( iPos == wxNOT_FOUND )
1120     str = *this;
1121   else
1122     str = wx_str() + iPos + 1;
1123
1124   return str;
1125 }
1126
1127 // extract nCount first (leftmost) characters
1128 wxString wxString::Left(size_t nCount) const
1129 {
1130   if ( nCount > length() )
1131     nCount = length();
1132
1133   wxString dest(*this, 0, nCount);
1134   if ( dest.length() != nCount ) {
1135     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1136   }
1137   return dest;
1138 }
1139
1140 // get all characters before the first occurence of ch
1141 // (returns the whole string if ch not found)
1142 wxString wxString::BeforeFirst(wxUniChar ch) const
1143 {
1144   int iPos = Find(ch);
1145   if ( iPos == wxNOT_FOUND ) iPos = length();
1146   return wxString(*this, 0, iPos);
1147 }
1148
1149 /// get all characters before the last occurence of ch
1150 /// (returns empty string if ch not found)
1151 wxString wxString::BeforeLast(wxUniChar ch) const
1152 {
1153   wxString str;
1154   int iPos = Find(ch, true);
1155   if ( iPos != wxNOT_FOUND && iPos != 0 )
1156     str = wxString(c_str(), iPos);
1157
1158   return str;
1159 }
1160
1161 /// get all characters after the first occurence of ch
1162 /// (returns empty string if ch not found)
1163 wxString wxString::AfterFirst(wxUniChar ch) const
1164 {
1165   wxString str;
1166   int iPos = Find(ch);
1167   if ( iPos != wxNOT_FOUND )
1168     str = wx_str() + iPos + 1;
1169
1170   return str;
1171 }
1172
1173 // replace first (or all) occurences of some substring with another one
1174 size_t wxString::Replace(const wxString& strOld,
1175                          const wxString& strNew, bool bReplaceAll)
1176 {
1177     // if we tried to replace an empty string we'd enter an infinite loop below
1178     wxCHECK_MSG( !strOld.empty(), 0,
1179                  _T("wxString::Replace(): invalid parameter") );
1180
1181     size_t uiCount = 0;   // count of replacements made
1182
1183     size_t uiOldLen = strOld.length();
1184     size_t uiNewLen = strNew.length();
1185
1186     size_t dwPos = 0;
1187
1188     while ( (*this)[dwPos] != wxT('\0') )
1189     {
1190         //DO NOT USE STRSTR HERE
1191         //this string can contain embedded null characters,
1192         //so strstr will function incorrectly
1193         dwPos = find(strOld, dwPos);
1194         if ( dwPos == npos )
1195             break;                  // exit the loop
1196         else
1197         {
1198             //replace this occurance of the old string with the new one
1199             replace(dwPos, uiOldLen, strNew, uiNewLen);
1200
1201             //move up pos past the string that was replaced
1202             dwPos += uiNewLen;
1203
1204             //increase replace count
1205             ++uiCount;
1206
1207             // stop now?
1208             if ( !bReplaceAll )
1209                 break;                  // exit the loop
1210         }
1211     }
1212
1213     return uiCount;
1214 }
1215
1216 bool wxString::IsAscii() const
1217 {
1218     for ( const_iterator i = begin(); i != end(); ++i )
1219     {
1220         if ( !(*i).IsAscii() )
1221             return false;
1222     }
1223
1224     return true;
1225 }
1226
1227 bool wxString::IsWord() const
1228 {
1229     for ( const_iterator i = begin(); i != end(); ++i )
1230     {
1231         if ( !wxIsalpha(*i) )
1232             return false;
1233     }
1234
1235     return true;
1236 }
1237
1238 bool wxString::IsNumber() const
1239 {
1240     if ( empty() )
1241         return true;
1242
1243     const_iterator i = begin();
1244
1245     if ( *i == _T('-') || *i == _T('+') )
1246         ++i;
1247
1248     for ( ; i != end(); ++i )
1249     {
1250         if ( !wxIsdigit(*i) )
1251             return false;
1252     }
1253
1254     return true;
1255 }
1256
1257 wxString wxString::Strip(stripType w) const
1258 {
1259     wxString s = *this;
1260     if ( w & leading ) s.Trim(false);
1261     if ( w & trailing ) s.Trim(true);
1262     return s;
1263 }
1264
1265 // ---------------------------------------------------------------------------
1266 // case conversion
1267 // ---------------------------------------------------------------------------
1268
1269 wxString& wxString::MakeUpper()
1270 {
1271   for ( iterator it = begin(), en = end(); it != en; ++it )
1272     *it = (wxChar)wxToupper(*it);
1273
1274   return *this;
1275 }
1276
1277 wxString& wxString::MakeLower()
1278 {
1279   for ( iterator it = begin(), en = end(); it != en; ++it )
1280     *it = (wxChar)wxTolower(*it);
1281
1282   return *this;
1283 }
1284
1285 // ---------------------------------------------------------------------------
1286 // trimming and padding
1287 // ---------------------------------------------------------------------------
1288
1289 // some compilers (VC++ 6.0 not to name them) return true for a call to
1290 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1291 // live with this by checking that the character is a 7 bit one - even if this
1292 // may fail to detect some spaces (I don't know if Unicode doesn't have
1293 // space-like symbols somewhere except in the first 128 chars), it is arguably
1294 // still better than trimming away accented letters
1295 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1296
1297 // trims spaces (in the sense of isspace) from left or right side
1298 wxString& wxString::Trim(bool bFromRight)
1299 {
1300     // first check if we're going to modify the string at all
1301     if ( !empty() &&
1302          (
1303           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1304           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1305          )
1306        )
1307     {
1308         if ( bFromRight )
1309         {
1310             // find last non-space character
1311             reverse_iterator psz = rbegin();
1312             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1313                 psz++;
1314
1315             // truncate at trailing space start
1316             erase(psz.base(), end());
1317         }
1318         else
1319         {
1320             // find first non-space character
1321             iterator psz = begin();
1322             while ( (psz != end()) && wxSafeIsspace(*psz) )
1323                 psz++;
1324
1325             // fix up data and length
1326             erase(begin(), psz);
1327         }
1328     }
1329
1330     return *this;
1331 }
1332
1333 // adds nCount characters chPad to the string from either side
1334 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1335 {
1336     wxString s(chPad, nCount);
1337
1338     if ( bFromRight )
1339         *this += s;
1340     else
1341     {
1342         s += *this;
1343         swap(s);
1344     }
1345
1346     return *this;
1347 }
1348
1349 // truncate the string
1350 wxString& wxString::Truncate(size_t uiLen)
1351 {
1352     if ( uiLen < length() )
1353     {
1354         erase(begin() + uiLen, end());
1355     }
1356     //else: nothing to do, string is already short enough
1357
1358     return *this;
1359 }
1360
1361 // ---------------------------------------------------------------------------
1362 // finding (return wxNOT_FOUND if not found and index otherwise)
1363 // ---------------------------------------------------------------------------
1364
1365 // find a character
1366 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1367 {
1368     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1369
1370     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1371 }
1372
1373 // ----------------------------------------------------------------------------
1374 // conversion to numbers
1375 // ----------------------------------------------------------------------------
1376
1377 // the implementation of all the functions below is exactly the same so factor
1378 // it out
1379
1380 template <typename T, typename F>
1381 bool wxStringToIntType(const wxChar *start,
1382                        T *val,
1383                        int base,
1384                        F func)
1385 {
1386     wxCHECK_MSG( val, false, _T("NULL output pointer") );
1387     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1388
1389 #ifndef __WXWINCE__
1390     errno = 0;
1391 #endif
1392
1393     wxChar *end;
1394     *val = (*func)(start, &end, base);
1395
1396     // return true only if scan was stopped by the terminating NUL and if the
1397     // string was not empty to start with and no under/overflow occurred
1398     return !*end && (end != start)
1399 #ifndef __WXWINCE__
1400         && (errno != ERANGE)
1401 #endif
1402     ;
1403 }
1404
1405 bool wxString::ToLong(long *val, int base) const
1406 {
1407     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtol);
1408 }
1409
1410 bool wxString::ToULong(unsigned long *val, int base) const
1411 {
1412     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoul);
1413 }
1414
1415 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1416 {
1417     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoll);
1418 }
1419
1420 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1421 {
1422     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoull);
1423 }
1424
1425 bool wxString::ToDouble(double *val) const
1426 {
1427     wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1428
1429 #ifndef __WXWINCE__
1430     errno = 0;
1431 #endif
1432
1433     const wxChar *start = c_str();
1434     wxChar *end;
1435     *val = wxStrtod(start, &end);
1436
1437     // return true only if scan was stopped by the terminating NUL and if the
1438     // string was not empty to start with and no under/overflow occurred
1439     return !*end && (end != start)
1440 #ifndef __WXWINCE__
1441         && (errno != ERANGE)
1442 #endif
1443     ;
1444 }
1445
1446 // ---------------------------------------------------------------------------
1447 // formatted output
1448 // ---------------------------------------------------------------------------
1449
1450 #if !wxUSE_UTF8_LOCALE_ONLY
1451 /* static */
1452 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1453 wxString wxStringPrintfMixinBase::DoFormat(const wxChar *format, ...)
1454 #else
1455 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1456 #endif
1457 {
1458     va_list argptr;
1459     va_start(argptr, format);
1460
1461     wxString s;
1462     s.PrintfV(format, argptr);
1463
1464     va_end(argptr);
1465
1466     return s;
1467 }
1468 #endif // !wxUSE_UTF8_LOCALE_ONLY
1469
1470 #if wxUSE_UNICODE_UTF8
1471 /* static */
1472 wxString wxString::DoFormatUtf8(const char *format, ...)
1473 {
1474     va_list argptr;
1475     va_start(argptr, format);
1476
1477     wxString s;
1478     s.PrintfV(format, argptr);
1479
1480     va_end(argptr);
1481
1482     return s;
1483 }
1484 #endif // wxUSE_UNICODE_UTF8
1485
1486 /* static */
1487 wxString wxString::FormatV(const wxString& format, va_list argptr)
1488 {
1489     wxString s;
1490     s.PrintfV(format, argptr);
1491     return s;
1492 }
1493
1494 #if !wxUSE_UTF8_LOCALE_ONLY
1495 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1496 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1497 #else
1498 int wxString::DoPrintfWchar(const wxChar *format, ...)
1499 #endif
1500 {
1501     va_list argptr;
1502     va_start(argptr, format);
1503
1504 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1505     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1506     // because it's the only cast that works safely for downcasting when
1507     // multiple inheritance is used:
1508     wxString *str = static_cast<wxString*>(this);
1509 #else
1510     wxString *str = this;
1511 #endif
1512
1513     int iLen = str->PrintfV(format, argptr);
1514
1515     va_end(argptr);
1516
1517     return iLen;
1518 }
1519 #endif // !wxUSE_UTF8_LOCALE_ONLY
1520
1521 #if wxUSE_UNICODE_UTF8
1522 int wxString::DoPrintfUtf8(const char *format, ...)
1523 {
1524     va_list argptr;
1525     va_start(argptr, format);
1526
1527     int iLen = PrintfV(format, argptr);
1528
1529     va_end(argptr);
1530
1531     return iLen;
1532 }
1533 #endif // wxUSE_UNICODE_UTF8
1534
1535 #if wxUSE_UNICODE_UTF8
1536 template<typename BufferType>
1537 #else
1538 // we only need one version in non-UTF8 builds and at least two Windows
1539 // compilers have problems with this function template, so use just one
1540 // normal function here
1541 #endif
1542 static int DoStringPrintfV(wxString& str,
1543                            const wxString& format, va_list argptr)
1544 {
1545     int size = 1024;
1546
1547     for ( ;; )
1548     {
1549 #if wxUSE_UNICODE_UTF8
1550         BufferType tmp(str, size + 1);
1551         typename BufferType::CharType *buf = tmp;
1552 #else
1553         wxStringBuffer tmp(str, size + 1);
1554         wxChar *buf = tmp;
1555 #endif
1556
1557         if ( !buf )
1558         {
1559             // out of memory
1560             return -1;
1561         }
1562
1563         // wxVsnprintf() may modify the original arg pointer, so pass it
1564         // only a copy
1565         va_list argptrcopy;
1566         wxVaCopy(argptrcopy, argptr);
1567         int len = wxVsnprintf(buf, size, format, argptrcopy);
1568         va_end(argptrcopy);
1569
1570         // some implementations of vsnprintf() don't NUL terminate
1571         // the string if there is not enough space for it so
1572         // always do it manually
1573         buf[size] = _T('\0');
1574
1575         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1576         // total number of characters which would have been written if the
1577         // buffer were large enough (newer standards such as Unix98)
1578         if ( len < 0 )
1579         {
1580 #if wxUSE_WXVSNPRINTF
1581             // we know that our own implementation of wxVsnprintf() returns -1
1582             // only for a format error - thus there's something wrong with
1583             // the user's format string
1584             return -1;
1585 #else // assume that system version only returns error if not enough space
1586             // still not enough, as we don't know how much we need, double the
1587             // current size of the buffer
1588             size *= 2;
1589 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1590         }
1591         else if ( len >= size )
1592         {
1593 #if wxUSE_WXVSNPRINTF
1594             // we know that our own implementation of wxVsnprintf() returns
1595             // size+1 when there's not enough space but that's not the size
1596             // of the required buffer!
1597             size *= 2;      // so we just double the current size of the buffer
1598 #else
1599             // some vsnprintf() implementations NUL-terminate the buffer and
1600             // some don't in len == size case, to be safe always add 1
1601             size = len + 1;
1602 #endif
1603         }
1604         else // ok, there was enough space
1605         {
1606             break;
1607         }
1608     }
1609
1610     // we could have overshot
1611     str.Shrink();
1612
1613     return str.length();
1614 }
1615
1616 int wxString::PrintfV(const wxString& format, va_list argptr)
1617 {
1618     va_list argcopy;
1619     wxVaCopy(argcopy, argptr);
1620
1621 #if wxUSE_UNICODE_UTF8
1622     #if wxUSE_STL_BASED_WXSTRING
1623         typedef wxStringTypeBuffer<char> Utf8Buffer;
1624     #else
1625         typedef wxImplStringBuffer Utf8Buffer;
1626     #endif
1627 #endif
1628
1629 #if wxUSE_UTF8_LOCALE_ONLY
1630     return DoStringPrintfV<Utf8Buffer>(*this, format, argcopy);
1631 #else
1632     #if wxUSE_UNICODE_UTF8
1633     if ( wxLocaleIsUtf8 )
1634         return DoStringPrintfV<Utf8Buffer>(*this, format, argcopy);
1635     else
1636         // wxChar* version
1637         return DoStringPrintfV<wxStringBuffer>(*this, format, argcopy);
1638     #else
1639         return DoStringPrintfV(*this, format, argcopy);
1640     #endif // UTF8/WCHAR
1641 #endif
1642 }
1643
1644 // ----------------------------------------------------------------------------
1645 // misc other operations
1646 // ----------------------------------------------------------------------------
1647
1648 // returns true if the string matches the pattern which may contain '*' and
1649 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1650 // of them)
1651 bool wxString::Matches(const wxString& mask) const
1652 {
1653     // I disable this code as it doesn't seem to be faster (in fact, it seems
1654     // to be much slower) than the old, hand-written code below and using it
1655     // here requires always linking with libregex even if the user code doesn't
1656     // use it
1657 #if 0 // wxUSE_REGEX
1658     // first translate the shell-like mask into a regex
1659     wxString pattern;
1660     pattern.reserve(wxStrlen(pszMask));
1661
1662     pattern += _T('^');
1663     while ( *pszMask )
1664     {
1665         switch ( *pszMask )
1666         {
1667             case _T('?'):
1668                 pattern += _T('.');
1669                 break;
1670
1671             case _T('*'):
1672                 pattern += _T(".*");
1673                 break;
1674
1675             case _T('^'):
1676             case _T('.'):
1677             case _T('$'):
1678             case _T('('):
1679             case _T(')'):
1680             case _T('|'):
1681             case _T('+'):
1682             case _T('\\'):
1683                 // these characters are special in a RE, quote them
1684                 // (however note that we don't quote '[' and ']' to allow
1685                 // using them for Unix shell like matching)
1686                 pattern += _T('\\');
1687                 // fall through
1688
1689             default:
1690                 pattern += *pszMask;
1691         }
1692
1693         pszMask++;
1694     }
1695     pattern += _T('$');
1696
1697     // and now use it
1698     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1699 #else // !wxUSE_REGEX
1700   // TODO: this is, of course, awfully inefficient...
1701
1702   // FIXME-UTF8: implement using iterators, remove #if
1703 #if wxUSE_UNICODE_UTF8
1704   wxWCharBuffer maskBuf = mask.wc_str();
1705   wxWCharBuffer txtBuf = wc_str();
1706   const wxChar *pszMask = maskBuf.data();
1707   const wxChar *pszTxt = txtBuf.data();
1708 #else
1709   const wxChar *pszMask = mask.wx_str();
1710   // the char currently being checked
1711   const wxChar *pszTxt = wx_str();
1712 #endif
1713
1714   // the last location where '*' matched
1715   const wxChar *pszLastStarInText = NULL;
1716   const wxChar *pszLastStarInMask = NULL;
1717
1718 match:
1719   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1720     switch ( *pszMask ) {
1721       case wxT('?'):
1722         if ( *pszTxt == wxT('\0') )
1723           return false;
1724
1725         // pszTxt and pszMask will be incremented in the loop statement
1726
1727         break;
1728
1729       case wxT('*'):
1730         {
1731           // remember where we started to be able to backtrack later
1732           pszLastStarInText = pszTxt;
1733           pszLastStarInMask = pszMask;
1734
1735           // ignore special chars immediately following this one
1736           // (should this be an error?)
1737           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1738             pszMask++;
1739
1740           // if there is nothing more, match
1741           if ( *pszMask == wxT('\0') )
1742             return true;
1743
1744           // are there any other metacharacters in the mask?
1745           size_t uiLenMask;
1746           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1747
1748           if ( pEndMask != NULL ) {
1749             // we have to match the string between two metachars
1750             uiLenMask = pEndMask - pszMask;
1751           }
1752           else {
1753             // we have to match the remainder of the string
1754             uiLenMask = wxStrlen(pszMask);
1755           }
1756
1757           wxString strToMatch(pszMask, uiLenMask);
1758           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1759           if ( pMatch == NULL )
1760             return false;
1761
1762           // -1 to compensate "++" in the loop
1763           pszTxt = pMatch + uiLenMask - 1;
1764           pszMask += uiLenMask - 1;
1765         }
1766         break;
1767
1768       default:
1769         if ( *pszMask != *pszTxt )
1770           return false;
1771         break;
1772     }
1773   }
1774
1775   // match only if nothing left
1776   if ( *pszTxt == wxT('\0') )
1777     return true;
1778
1779   // if we failed to match, backtrack if we can
1780   if ( pszLastStarInText ) {
1781     pszTxt = pszLastStarInText + 1;
1782     pszMask = pszLastStarInMask;
1783
1784     pszLastStarInText = NULL;
1785
1786     // don't bother resetting pszLastStarInMask, it's unnecessary
1787
1788     goto match;
1789   }
1790
1791   return false;
1792 #endif // wxUSE_REGEX/!wxUSE_REGEX
1793 }
1794
1795 // Count the number of chars
1796 int wxString::Freq(wxUniChar ch) const
1797 {
1798     int count = 0;
1799     for ( const_iterator i = begin(); i != end(); ++i )
1800     {
1801         if ( *i == ch )
1802             count ++;
1803     }
1804     return count;
1805 }
1806
1807 // convert to upper case, return the copy of the string
1808 wxString wxString::Upper() const
1809 { wxString s(*this); return s.MakeUpper(); }
1810
1811 // convert to lower case, return the copy of the string
1812 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }