src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #ifdef __SALFORDC__
  39     #include <clib.h>
  40 #endif
  41
  42 #include "wx/hashmap.h"
  43
  44 // string handling functions used by wxString:
  45 #if wxUSE_UNICODE_UTF8
  46     #define wxStringMemcpy   memcpy
  47     #define wxStringMemcmp   memcmp
  48     #define wxStringMemchr   memchr
  49     #define wxStringStrlen   strlen
  50 #else
  51     #define wxStringMemcpy   wxTmemcpy
  52     #define wxStringMemcmp   wxTmemcmp
  53     #define wxStringMemchr   wxTmemchr
  54     #define wxStringStrlen   wxStrlen
  55 #endif
  56
  57
  58 // ---------------------------------------------------------------------------
  59 // static class variables definition
  60 // ---------------------------------------------------------------------------
  61
  62 //According to STL _must_ be a -1 size_t
  63 const size_t wxString::npos = (size_t) -1;
  64
  65 // ----------------------------------------------------------------------------
  66 // global functions
  67 // ----------------------------------------------------------------------------
  68
  69 #if wxUSE_STD_IOSTREAM
  70
  71 #include <iostream>
  72
  73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
  74 {
  75 // FIXME-UTF8: always, not only if wxUSE_UNICODE
  76 #if wxUSE_UNICODE && !defined(__BORLANDC__)
  77     return os << (const wchar_t*)str.AsWCharBuf();
  78 #else
  79     return os << (const char*)str.AsCharBuf();
  80 #endif
  81 }
  82
  83 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
  84 {
  85     return os << str.c_str();
  86 }
  87
  88 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
  89 {
  90     return os << str.data();
  91 }
  92
  93 #ifndef __BORLANDC__
  94 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
  95 {
  96     return os << str.data();
  97 }
  98 #endif
  99
 100 #endif // wxUSE_STD_IOSTREAM
 101
 102 // ===========================================================================
 103 // wxString class core
 104 // ===========================================================================
 105
 106 #if wxUSE_UNICODE_UTF8
 107
 108 void wxString::PosLenToImpl(size_t pos, size_t len,
 109                             size_t *implPos, size_t *implLen) const
 110 {
 111     if ( pos == npos )
 112         *implPos = npos;
 113     else
 114     {
 115         const_iterator i = begin() + pos;
 116         *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
 117         if ( len == npos )
 118             *implLen = npos;
 119         else
 120         {
 121             // too large length is interpreted as "to the end of the string"
 122             // FIXME-UTF8: verify this is the case in std::string, assert
 123             // otherwise
 124             if ( pos + len > length() )
 125                 len = length() - pos;
 126
 127             *implLen = (i + len).impl() - i.impl();
 128         }
 129     }
 130 }
 131
 132 #endif // wxUSE_UNICODE_UTF8
 133
 134 // ----------------------------------------------------------------------------
 135 // wxCStrData converted strings caching
 136 // ----------------------------------------------------------------------------
 137
 138 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 139 //             string objects; re-enable after fixing this bug and benchmarking
 140 //             performance to see if using a hash is a good idea at all
 141 #if 0
 142
 143 // For backward compatibility reasons, it must be possible to assign the value
 144 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 145 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 146 // because the memory would be freed immediately, but it has to be valid as long
 147 // as the string is not modified, so that code like this still works:
 148 //
 149 // const wxChar *s = str.c_str();
 150 // while ( s ) { ... }
 151
 152 // FIXME-UTF8: not thread safe!
 153 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 154 //             destroyed, but we should do it when the string is modified, to
 155 //             keep memory usage down
 156 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 157 //             invalidated the cache on every change, we could keep the previous
 158 //             conversion
 159 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 160 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 161
 162 template<typename T>
 163 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 164 {
 165     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 166     if ( i != hash.end() )
 167     {
 168         free(i->second);
 169         hash.erase(i);
 170     }
 171 }
 172
 173 #if wxUSE_UNICODE
 174 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 175 //     so we have to use wxString* here and const-cast when used
 176 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 177                     wxStringCharConversionCache);
 178 static wxStringCharConversionCache gs_stringsCharCache;
 179
 180 const char* wxCStrData::AsChar() const
 181 {
 182     // remove previously cache value, if any (see FIXMEs above):
 183     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 184
 185     // convert the string and keep it:
 186     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 187         m_str->mb_str().release();
 188
 189     return s + m_offset;
 190 }
 191 #endif // wxUSE_UNICODE
 192
 193 #if !wxUSE_UNICODE_WCHAR
 194 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 195                     wxStringWCharConversionCache);
 196 static wxStringWCharConversionCache gs_stringsWCharCache;
 197
 198 const wchar_t* wxCStrData::AsWChar() const
 199 {
 200     // remove previously cache value, if any (see FIXMEs above):
 201     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 202
 203     // convert the string and keep it:
 204     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 205         m_str->wc_str().release();
 206
 207     return s + m_offset;
 208 }
 209 #endif // !wxUSE_UNICODE_WCHAR
 210
 211 wxString::~wxString()
 212 {
 213 #if wxUSE_UNICODE
 214     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 215     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 216 #endif
 217 #if !wxUSE_UNICODE_WCHAR
 218     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 219 #endif
 220 }
 221 #endif
 222
 223 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 224 const char* wxCStrData::AsChar() const
 225 {
 226 #if wxUSE_UNICODE_UTF8
 227     if ( wxLocaleIsUtf8 )
 228         return AsInternal();
 229 #endif
 230     // under non-UTF8 locales, we have to convert the internal UTF-8
 231     // representation using wxConvLibc and cache the result
 232
 233     wxString *str = wxConstCast(m_str, wxString);
 234
 235     // convert the string:
 236     wxCharBuffer buf(str->mb_str());
 237
 238     // FIXME-UTF8: do the conversion in-place in the existing buffer
 239     if ( str->m_convertedToChar &&
 240          strlen(buf) == strlen(str->m_convertedToChar) )
 241     {
 242         // keep the same buffer for as long as possible, so that several calls
 243         // to c_str() in a row still work:
 244         strcpy(str->m_convertedToChar, buf);
 245     }
 246     else
 247     {
 248         str->m_convertedToChar = buf.release();
 249     }
 250
 251     // and keep it:
 252     return str->m_convertedToChar + m_offset;
 253 }
 254 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 255
 256 #if !wxUSE_UNICODE_WCHAR
 257 const wchar_t* wxCStrData::AsWChar() const
 258 {
 259     wxString *str = wxConstCast(m_str, wxString);
 260
 261     // convert the string:
 262     wxWCharBuffer buf(str->wc_str());
 263
 264     // FIXME-UTF8: do the conversion in-place in the existing buffer
 265     if ( str->m_convertedToWChar &&
 266          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 267     {
 268         // keep the same buffer for as long as possible, so that several calls
 269         // to c_str() in a row still work:
 270         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 271     }
 272     else
 273     {
 274         str->m_convertedToWChar = buf.release();
 275     }
 276
 277     // and keep it:
 278     return str->m_convertedToWChar + m_offset;
 279 }
 280 #endif // !wxUSE_UNICODE_WCHAR
 281
 282 // ===========================================================================
 283 // wxString class core
 284 // ===========================================================================
 285
 286 // ---------------------------------------------------------------------------
 287 // construction and conversion
 288 // ---------------------------------------------------------------------------
 289
 290 #if wxUSE_UNICODE_WCHAR
 291 /* static */
 292 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 293                                                const wxMBConv& conv)
 294 {
 295     // anything to do?
 296     if ( !psz || nLength == 0 )
 297         return SubstrBufFromMB(L"", 0);
 298
 299     if ( nLength == npos )
 300         nLength = wxNO_LEN;
 301
 302     size_t wcLen;
 303     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 304     if ( !wcLen )
 305         return SubstrBufFromMB(_T(""), 0);
 306     else
 307         return SubstrBufFromMB(wcBuf, wcLen);
 308 }
 309 #endif // wxUSE_UNICODE_WCHAR
 310
 311 #if wxUSE_UNICODE_UTF8
 312 /* static */
 313 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 314                                                const wxMBConv& conv)
 315 {
 316     // anything to do?
 317     if ( !psz || nLength == 0 )
 318         return SubstrBufFromMB("", 0);
 319
 320     // if psz is already in UTF-8, we don't have to do the roundtrip to
 321     // wchar_t* and back:
 322     if ( conv.IsUTF8() )
 323     {
 324         // we need to validate the input because UTF8 iterators assume valid
 325         // UTF-8 sequence and psz may be invalid:
 326         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 327         {
 328             return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
 329         }
 330         // else: do the roundtrip through wchar_t*
 331     }
 332
 333     if ( nLength == npos )
 334         nLength = wxNO_LEN;
 335
 336     // first convert to wide string:
 337     size_t wcLen;
 338     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 339     if ( !wcLen )
 340         return SubstrBufFromMB("", 0);
 341
 342     // and then to UTF-8:
 343     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvUTF8()));
 344     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 345     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 346
 347     return buf;
 348 }
 349 #endif // wxUSE_UNICODE_UTF8
 350
 351 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 352 /* static */
 353 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 354                                                const wxMBConv& conv)
 355 {
 356     // anything to do?
 357     if ( !pwz || nLength == 0 )
 358         return SubstrBufFromWC("", 0);
 359
 360     if ( nLength == npos )
 361         nLength = wxNO_LEN;
 362
 363     size_t mbLen;
 364     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 365     if ( !mbLen )
 366         return SubstrBufFromWC("", 0);
 367     else
 368         return SubstrBufFromWC(mbBuf, mbLen);
 369 }
 370 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 371
 372
 373 #if wxUSE_UNICODE_WCHAR
 374
 375 //Convert wxString in Unicode mode to a multi-byte string
 376 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 377 {
 378     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 379 }
 380
 381 #elif wxUSE_UNICODE_UTF8
 382
 383 const wxWCharBuffer wxString::wc_str() const
 384 {
 385     return wxMBConvUTF8().cMB2WC(m_impl.c_str(),
 386                                  m_impl.length() + 1 /* size, not length */,
 387                                  NULL);
 388 }
 389
 390 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 391 {
 392     if ( conv.IsUTF8() )
 393         return wxCharBuffer::CreateNonOwned(m_impl.c_str());
 394
 395     // FIXME-UTF8: use wc_str() here once we have buffers with length
 396
 397     size_t wcLen;
 398     wxWCharBuffer wcBuf(
 399             wxMBConvUTF8().cMB2WC(m_impl.c_str(),
 400                                   m_impl.length() + 1 /* size, not length */,
 401                                   &wcLen));
 402     if ( !wcLen )
 403         return wxCharBuffer("");
 404
 405     return conv.cWC2MB(wcBuf, wcLen, NULL);
 406 }
 407
 408 #else // ANSI
 409
 410 //Converts this string to a wide character string if unicode
 411 //mode is not enabled and wxUSE_WCHAR_T is enabled
 412 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 413 {
 414     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 415 }
 416
 417 #endif // Unicode/ANSI
 418
 419 // shrink to minimal size (releasing extra memory)
 420 bool wxString::Shrink()
 421 {
 422   wxString tmp(begin(), end());
 423   swap(tmp);
 424   return tmp.length() == length();
 425 }
 426
 427 // deprecated compatibility code:
 428 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 429 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 430 {
 431     return DoGetWriteBuf(nLen);
 432 }
 433
 434 void wxString::UngetWriteBuf()
 435 {
 436     DoUngetWriteBuf();
 437 }
 438
 439 void wxString::UngetWriteBuf(size_t nLen)
 440 {
 441     DoUngetWriteBuf(nLen);
 442 }
 443 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 444
 445
 446 // ---------------------------------------------------------------------------
 447 // data access
 448 // ---------------------------------------------------------------------------
 449
 450 // all functions are inline in string.h
 451
 452 // ---------------------------------------------------------------------------
 453 // concatenation operators
 454 // ---------------------------------------------------------------------------
 455
 456 /*
 457  * concatenation functions come in 5 flavours:
 458  *  string + string
 459  *  char   + string      and      string + char
 460  *  C str  + string      and      string + C str
 461  */
 462
 463 wxString operator+(const wxString& str1, const wxString& str2)
 464 {
 465 #if !wxUSE_STL_BASED_WXSTRING
 466     wxASSERT( str1.IsValid() );
 467     wxASSERT( str2.IsValid() );
 468 #endif
 469
 470     wxString s = str1;
 471     s += str2;
 472
 473     return s;
 474 }
 475
 476 wxString operator+(const wxString& str, wxUniChar ch)
 477 {
 478 #if !wxUSE_STL_BASED_WXSTRING
 479     wxASSERT( str.IsValid() );
 480 #endif
 481
 482     wxString s = str;
 483     s += ch;
 484
 485     return s;
 486 }
 487
 488 wxString operator+(wxUniChar ch, const wxString& str)
 489 {
 490 #if !wxUSE_STL_BASED_WXSTRING
 491     wxASSERT( str.IsValid() );
 492 #endif
 493
 494     wxString s = ch;
 495     s += str;
 496
 497     return s;
 498 }
 499
 500 wxString operator+(const wxString& str, const char *psz)
 501 {
 502 #if !wxUSE_STL_BASED_WXSTRING
 503     wxASSERT( str.IsValid() );
 504 #endif
 505
 506     wxString s;
 507     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 508         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 509     }
 510     s += str;
 511     s += psz;
 512
 513     return s;
 514 }
 515
 516 wxString operator+(const wxString& str, const wchar_t *pwz)
 517 {
 518 #if !wxUSE_STL_BASED_WXSTRING
 519     wxASSERT( str.IsValid() );
 520 #endif
 521
 522     wxString s;
 523     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 524         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 525     }
 526     s += str;
 527     s += pwz;
 528
 529     return s;
 530 }
 531
 532 wxString operator+(const char *psz, const wxString& str)
 533 {
 534 #if !wxUSE_STL_BASED_WXSTRING
 535     wxASSERT( str.IsValid() );
 536 #endif
 537
 538     wxString s;
 539     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 540         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 541     }
 542     s = psz;
 543     s += str;
 544
 545     return s;
 546 }
 547
 548 wxString operator+(const wchar_t *pwz, const wxString& str)
 549 {
 550 #if !wxUSE_STL_BASED_WXSTRING
 551     wxASSERT( str.IsValid() );
 552 #endif
 553
 554     wxString s;
 555     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 556         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 557     }
 558     s = pwz;
 559     s += str;
 560
 561     return s;
 562 }
 563
 564 // ---------------------------------------------------------------------------
 565 // string comparison
 566 // ---------------------------------------------------------------------------
 567
 568 #ifdef HAVE_STD_STRING_COMPARE
 569
 570 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 571 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 572 //     sort strings in characters code point order by sorting the byte sequence
 573 //     in byte values order (i.e. what strcmp() and memcmp() do).
 574
 575 int wxString::compare(const wxString& str) const
 576 {
 577     return m_impl.compare(str.m_impl);
 578 }
 579
 580 int wxString::compare(size_t nStart, size_t nLen,
 581                       const wxString& str) const
 582 {
 583     size_t pos, len;
 584     PosLenToImpl(nStart, nLen, &pos, &len);
 585     return m_impl.compare(pos, len, str.m_impl);
 586 }
 587
 588 int wxString::compare(size_t nStart, size_t nLen,
 589                       const wxString& str,
 590                       size_t nStart2, size_t nLen2) const
 591 {
 592     size_t pos, len;
 593     PosLenToImpl(nStart, nLen, &pos, &len);
 594
 595     size_t pos2, len2;
 596     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 597
 598     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 599 }
 600
 601 int wxString::compare(const char* sz) const
 602 {
 603     return m_impl.compare(ImplStr(sz));
 604 }
 605
 606 int wxString::compare(const wchar_t* sz) const
 607 {
 608     return m_impl.compare(ImplStr(sz));
 609 }
 610
 611 int wxString::compare(size_t nStart, size_t nLen,
 612                       const char* sz, size_t nCount) const
 613 {
 614     size_t pos, len;
 615     PosLenToImpl(nStart, nLen, &pos, &len);
 616
 617     SubstrBufFromMB str(ImplStr(sz, nCount));
 618
 619     return m_impl.compare(pos, len, str.data, str.len);
 620 }
 621
 622 int wxString::compare(size_t nStart, size_t nLen,
 623                       const wchar_t* sz, size_t nCount) const
 624 {
 625     size_t pos, len;
 626     PosLenToImpl(nStart, nLen, &pos, &len);
 627
 628     SubstrBufFromWC str(ImplStr(sz, nCount));
 629
 630     return m_impl.compare(pos, len, str.data, str.len);
 631 }
 632
 633 #else // !HAVE_STD_STRING_COMPARE
 634
 635 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 636                           const wxStringCharType* s2, size_t l2)
 637 {
 638     if( l1 == l2 )
 639         return wxStringMemcmp(s1, s2, l1);
 640     else if( l1 < l2 )
 641     {
 642         int ret = wxStringMemcmp(s1, s2, l1);
 643         return ret == 0 ? -1 : ret;
 644     }
 645     else
 646     {
 647         int ret = wxStringMemcmp(s1, s2, l2);
 648         return ret == 0 ? +1 : ret;
 649     }
 650 }
 651
 652 int wxString::compare(const wxString& str) const
 653 {
 654     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 655                      str.m_impl.data(), str.m_impl.length());
 656 }
 657
 658 int wxString::compare(size_t nStart, size_t nLen,
 659                       const wxString& str) const
 660 {
 661     wxASSERT(nStart <= length());
 662     size_type strLen = length() - nStart;
 663     nLen = strLen < nLen ? strLen : nLen;
 664
 665     size_t pos, len;
 666     PosLenToImpl(nStart, nLen, &pos, &len);
 667
 668     return ::wxDoCmp(m_impl.data() + pos,  len,
 669                      str.m_impl.data(), str.m_impl.length());
 670 }
 671
 672 int wxString::compare(size_t nStart, size_t nLen,
 673                       const wxString& str,
 674                       size_t nStart2, size_t nLen2) const
 675 {
 676     wxASSERT(nStart <= length());
 677     wxASSERT(nStart2 <= str.length());
 678     size_type strLen  =     length() - nStart,
 679               strLen2 = str.length() - nStart2;
 680     nLen  = strLen  < nLen  ? strLen  : nLen;
 681     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 682
 683     size_t pos, len;
 684     PosLenToImpl(nStart, nLen, &pos, &len);
 685     size_t pos2, len2;
 686     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 687
 688     return ::wxDoCmp(m_impl.data() + pos, len,
 689                      str.m_impl.data() + pos2, len2);
 690 }
 691
 692 int wxString::compare(const char* sz) const
 693 {
 694     SubstrBufFromMB str(ImplStr(sz, npos));
 695     if ( str.len == npos )
 696         str.len = wxStringStrlen(str.data);
 697     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 698 }
 699
 700 int wxString::compare(const wchar_t* sz) const
 701 {
 702     SubstrBufFromWC str(ImplStr(sz, npos));
 703     if ( str.len == npos )
 704         str.len = wxStringStrlen(str.data);
 705     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 706 }
 707
 708 int wxString::compare(size_t nStart, size_t nLen,
 709                       const char* sz, size_t nCount) const
 710 {
 711     wxASSERT(nStart <= length());
 712     size_type strLen = length() - nStart;
 713     nLen = strLen < nLen ? strLen : nLen;
 714
 715     size_t pos, len;
 716     PosLenToImpl(nStart, nLen, &pos, &len);
 717
 718     SubstrBufFromMB str(ImplStr(sz, nCount));
 719     if ( str.len == npos )
 720         str.len = wxStringStrlen(str.data);
 721
 722     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 723 }
 724
 725 int wxString::compare(size_t nStart, size_t nLen,
 726                       const wchar_t* sz, size_t nCount) const
 727 {
 728     wxASSERT(nStart <= length());
 729     size_type strLen = length() - nStart;
 730     nLen = strLen < nLen ? strLen : nLen;
 731
 732     size_t pos, len;
 733     PosLenToImpl(nStart, nLen, &pos, &len);
 734
 735     SubstrBufFromWC str(ImplStr(sz, nCount));
 736     if ( str.len == npos )
 737         str.len = wxStringStrlen(str.data);
 738
 739     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 740 }
 741
 742 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 743
 744
 745 // ---------------------------------------------------------------------------
 746 // find_{first,last}_[not]_of functions
 747 // ---------------------------------------------------------------------------
 748
 749 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 750
 751 // NB: All these functions are implemented  with the argument being wxChar*,
 752 //     i.e. widechar string in any Unicode build, even though native string
 753 //     representation is char* in the UTF-8 build. This is because we couldn't
 754 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 755
 756 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 757 {
 758     return find_first_of(sz, nStart, wxStrlen(sz));
 759 }
 760
 761 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 762 {
 763     return find_first_not_of(sz, nStart, wxStrlen(sz));
 764 }
 765
 766 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 767 {
 768     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 769
 770     size_t idx = nStart;
 771     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 772     {
 773         if ( wxTmemchr(sz, *i, n) )
 774             return idx;
 775     }
 776
 777     return npos;
 778 }
 779
 780 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 781 {
 782     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 783
 784     size_t idx = nStart;
 785     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 786     {
 787         if ( !wxTmemchr(sz, *i, n) )
 788             return idx;
 789     }
 790
 791     return npos;
 792 }
 793
 794
 795 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 796 {
 797     return find_last_of(sz, nStart, wxStrlen(sz));
 798 }
 799
 800 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 801 {
 802     return find_last_not_of(sz, nStart, wxStrlen(sz));
 803 }
 804
 805 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 806 {
 807     size_t len = length();
 808
 809     if ( nStart == npos )
 810     {
 811         nStart = len - 1;
 812     }
 813     else
 814     {
 815         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 816     }
 817
 818     size_t idx = nStart;
 819     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 820           i != rend(); --idx, ++i )
 821     {
 822         if ( wxTmemchr(sz, *i, n) )
 823             return idx;
 824     }
 825
 826     return npos;
 827 }
 828
 829 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 830 {
 831     size_t len = length();
 832
 833     if ( nStart == npos )
 834     {
 835         nStart = len - 1;
 836     }
 837     else
 838     {
 839         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 840     }
 841
 842     size_t idx = nStart;
 843     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 844           i != rend(); --idx, ++i )
 845     {
 846         if ( !wxTmemchr(sz, *i, n) )
 847             return idx;
 848     }
 849
 850     return npos;
 851 }
 852
 853 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
 854 {
 855     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 856
 857     size_t idx = nStart;
 858     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 859     {
 860         if ( *i != ch )
 861             return idx;
 862     }
 863
 864     return npos;
 865 }
 866
 867 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
 868 {
 869     size_t len = length();
 870
 871     if ( nStart == npos )
 872     {
 873         nStart = len - 1;
 874     }
 875     else
 876     {
 877         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 878     }
 879
 880     size_t idx = nStart;
 881     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 882           i != rend(); --idx, ++i )
 883     {
 884         if ( *i != ch )
 885             return idx;
 886     }
 887
 888     return npos;
 889 }
 890
 891 // the functions above were implemented for wchar_t* arguments in Unicode
 892 // build and char* in ANSI build; below are implementations for the other
 893 // version:
 894 #if wxUSE_UNICODE
 895     #define wxOtherCharType char
 896     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
 897 #else
 898     #define wxOtherCharType wchar_t
 899     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
 900 #endif
 901
 902 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
 903     { return find_first_of(STRCONV(sz), nStart); }
 904
 905 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
 906                                size_t n) const
 907     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
 908 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
 909     { return find_last_of(STRCONV(sz), nStart); }
 910 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
 911                               size_t n) const
 912     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
 913 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
 914     { return find_first_not_of(STRCONV(sz), nStart); }
 915 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
 916                                    size_t n) const
 917     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
 918 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
 919     { return find_last_not_of(STRCONV(sz), nStart); }
 920 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
 921                                   size_t n) const
 922     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
 923
 924 #undef wxOtherCharType
 925 #undef STRCONV
 926
 927 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 928
 929 // ===========================================================================
 930 // other common string functions
 931 // ===========================================================================
 932
 933 int wxString::CmpNoCase(const wxString& s) const
 934 {
 935     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
 936
 937     size_t idx = 0;
 938     const_iterator i1 = begin();
 939     const_iterator end1 = end();
 940     const_iterator i2 = s.begin();
 941     const_iterator end2 = s.end();
 942
 943     for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
 944     {
 945         wxUniChar lower1 = (wxChar)wxTolower(*i1);
 946         wxUniChar lower2 = (wxChar)wxTolower(*i2);
 947         if ( lower1 != lower2 )
 948             return lower1 < lower2 ? -1 : 1;
 949     }
 950
 951     size_t len1 = length();
 952     size_t len2 = s.length();
 953
 954     if ( len1 < len2 )
 955         return -1;
 956     else if ( len1 > len2 )
 957         return 1;
 958     return 0;
 959 }
 960
 961
 962 #if wxUSE_UNICODE
 963
 964 #ifdef __MWERKS__
 965 #ifndef __SCHAR_MAX__
 966 #define __SCHAR_MAX__ 127
 967 #endif
 968 #endif
 969
 970 wxString wxString::FromAscii(const char *ascii)
 971 {
 972     if (!ascii)
 973        return wxEmptyString;
 974
 975     size_t len = strlen(ascii);
 976     wxString res;
 977
 978     if ( len )
 979     {
 980         wxImplStringBuffer buf(res, len);
 981         wxStringCharType *dest = buf;
 982
 983         for ( ;; )
 984         {
 985             unsigned char c = (unsigned char)*ascii++;
 986             wxASSERT_MSG( c < 0x80,
 987                           _T("Non-ASCII value passed to FromAscii().") );
 988
 989             *dest++ = (wchar_t)c;
 990
 991             if ( c == '\0' )
 992                 break;
 993         }
 994     }
 995
 996     return res;
 997 }
 998
 999 wxString wxString::FromAscii(const char ascii)
1000 {
1001     // What do we do with '\0' ?
1002
1003     unsigned char c = (unsigned char)ascii;
1004
1005     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1006
1007     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1008     return wxString(wxUniChar((wchar_t)c));
1009 }
1010
1011 const wxCharBuffer wxString::ToAscii() const
1012 {
1013     // this will allocate enough space for the terminating NUL too
1014     wxCharBuffer buffer(length());
1015     char *dest = buffer.data();
1016
1017     for ( const_iterator i = begin(); i != end(); ++i )
1018     {
1019         wxUniChar c(*i);
1020         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1021         *dest++ = c.IsAscii() ? (char)c : '_';
1022
1023         // the output string can't have embedded NULs anyhow, so we can safely
1024         // stop at first of them even if we do have any
1025         if ( !c )
1026             break;
1027     }
1028
1029     return buffer;
1030 }
1031
1032 #endif // wxUSE_UNICODE
1033
1034 // extract string of length nCount starting at nFirst
1035 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1036 {
1037     size_t nLen = length();
1038
1039     // default value of nCount is npos and means "till the end"
1040     if ( nCount == npos )
1041     {
1042         nCount = nLen - nFirst;
1043     }
1044
1045     // out-of-bounds requests return sensible things
1046     if ( nFirst + nCount > nLen )
1047     {
1048         nCount = nLen - nFirst;
1049     }
1050
1051     if ( nFirst > nLen )
1052     {
1053         // AllocCopy() will return empty string
1054         return wxEmptyString;
1055     }
1056
1057     wxString dest(*this, nFirst, nCount);
1058     if ( dest.length() != nCount )
1059     {
1060         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1061     }
1062
1063     return dest;
1064 }
1065
1066 // check that the string starts with prefix and return the rest of the string
1067 // in the provided pointer if it is not NULL, otherwise return false
1068 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1069 {
1070     if ( compare(0, prefix.length(), prefix) != 0 )
1071         return false;
1072
1073     if ( rest )
1074     {
1075         // put the rest of the string into provided pointer
1076         rest->assign(*this, prefix.length(), npos);
1077     }
1078
1079     return true;
1080 }
1081
1082
1083 // check that the string ends with suffix and return the rest of it in the
1084 // provided pointer if it is not NULL, otherwise return false
1085 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1086 {
1087     int start = length() - suffix.length();
1088
1089     if ( start < 0 || compare(start, npos, suffix) != 0 )
1090         return false;
1091
1092     if ( rest )
1093     {
1094         // put the rest of the string into provided pointer
1095         rest->assign(*this, 0, start);
1096     }
1097
1098     return true;
1099 }
1100
1101
1102 // extract nCount last (rightmost) characters
1103 wxString wxString::Right(size_t nCount) const
1104 {
1105   if ( nCount > length() )
1106     nCount = length();
1107
1108   wxString dest(*this, length() - nCount, nCount);
1109   if ( dest.length() != nCount ) {
1110     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1111   }
1112   return dest;
1113 }
1114
1115 // get all characters after the last occurence of ch
1116 // (returns the whole string if ch not found)
1117 wxString wxString::AfterLast(wxUniChar ch) const
1118 {
1119   wxString str;
1120   int iPos = Find(ch, true);
1121   if ( iPos == wxNOT_FOUND )
1122     str = *this;
1123   else
1124     str = wx_str() + iPos + 1;
1125
1126   return str;
1127 }
1128
1129 // extract nCount first (leftmost) characters
1130 wxString wxString::Left(size_t nCount) const
1131 {
1132   if ( nCount > length() )
1133     nCount = length();
1134
1135   wxString dest(*this, 0, nCount);
1136   if ( dest.length() != nCount ) {
1137     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1138   }
1139   return dest;
1140 }
1141
1142 // get all characters before the first occurence of ch
1143 // (returns the whole string if ch not found)
1144 wxString wxString::BeforeFirst(wxUniChar ch) const
1145 {
1146   int iPos = Find(ch);
1147   if ( iPos == wxNOT_FOUND ) iPos = length();
1148   return wxString(*this, 0, iPos);
1149 }
1150
1151 /// get all characters before the last occurence of ch
1152 /// (returns empty string if ch not found)
1153 wxString wxString::BeforeLast(wxUniChar ch) const
1154 {
1155   wxString str;
1156   int iPos = Find(ch, true);
1157   if ( iPos != wxNOT_FOUND && iPos != 0 )
1158     str = wxString(c_str(), iPos);
1159
1160   return str;
1161 }
1162
1163 /// get all characters after the first occurence of ch
1164 /// (returns empty string if ch not found)
1165 wxString wxString::AfterFirst(wxUniChar ch) const
1166 {
1167   wxString str;
1168   int iPos = Find(ch);
1169   if ( iPos != wxNOT_FOUND )
1170     str = wx_str() + iPos + 1;
1171
1172   return str;
1173 }
1174
1175 // replace first (or all) occurences of some substring with another one
1176 size_t wxString::Replace(const wxString& strOld,
1177                          const wxString& strNew, bool bReplaceAll)
1178 {
1179     // if we tried to replace an empty string we'd enter an infinite loop below
1180     wxCHECK_MSG( !strOld.empty(), 0,
1181                  _T("wxString::Replace(): invalid parameter") );
1182
1183     size_t uiCount = 0;   // count of replacements made
1184
1185     size_t uiOldLen = strOld.length();
1186     size_t uiNewLen = strNew.length();
1187
1188     size_t dwPos = 0;
1189
1190     while ( (*this)[dwPos] != wxT('\0') )
1191     {
1192         //DO NOT USE STRSTR HERE
1193         //this string can contain embedded null characters,
1194         //so strstr will function incorrectly
1195         dwPos = find(strOld, dwPos);
1196         if ( dwPos == npos )
1197             break;                  // exit the loop
1198         else
1199         {
1200             //replace this occurance of the old string with the new one
1201             replace(dwPos, uiOldLen, strNew, uiNewLen);
1202
1203             //move up pos past the string that was replaced
1204             dwPos += uiNewLen;
1205
1206             //increase replace count
1207             ++uiCount;
1208
1209             // stop now?
1210             if ( !bReplaceAll )
1211                 break;                  // exit the loop
1212         }
1213     }
1214
1215     return uiCount;
1216 }
1217
1218 bool wxString::IsAscii() const
1219 {
1220     for ( const_iterator i = begin(); i != end(); ++i )
1221     {
1222         if ( !(*i).IsAscii() )
1223             return false;
1224     }
1225
1226     return true;
1227 }
1228
1229 bool wxString::IsWord() const
1230 {
1231     for ( const_iterator i = begin(); i != end(); ++i )
1232     {
1233         if ( !wxIsalpha(*i) )
1234             return false;
1235     }
1236
1237     return true;
1238 }
1239
1240 bool wxString::IsNumber() const
1241 {
1242     if ( empty() )
1243         return true;
1244
1245     const_iterator i = begin();
1246
1247     if ( *i == _T('-') || *i == _T('+') )
1248         ++i;
1249
1250     for ( ; i != end(); ++i )
1251     {
1252         if ( !wxIsdigit(*i) )
1253             return false;
1254     }
1255
1256     return true;
1257 }
1258
1259 wxString wxString::Strip(stripType w) const
1260 {
1261     wxString s = *this;
1262     if ( w & leading ) s.Trim(false);
1263     if ( w & trailing ) s.Trim(true);
1264     return s;
1265 }
1266
1267 // ---------------------------------------------------------------------------
1268 // case conversion
1269 // ---------------------------------------------------------------------------
1270
1271 wxString& wxString::MakeUpper()
1272 {
1273   for ( iterator it = begin(), en = end(); it != en; ++it )
1274     *it = (wxChar)wxToupper(*it);
1275
1276   return *this;
1277 }
1278
1279 wxString& wxString::MakeLower()
1280 {
1281   for ( iterator it = begin(), en = end(); it != en; ++it )
1282     *it = (wxChar)wxTolower(*it);
1283
1284   return *this;
1285 }
1286
1287 // ---------------------------------------------------------------------------
1288 // trimming and padding
1289 // ---------------------------------------------------------------------------
1290
1291 // some compilers (VC++ 6.0 not to name them) return true for a call to
1292 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1293 // live with this by checking that the character is a 7 bit one - even if this
1294 // may fail to detect some spaces (I don't know if Unicode doesn't have
1295 // space-like symbols somewhere except in the first 128 chars), it is arguably
1296 // still better than trimming away accented letters
1297 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1298
1299 // trims spaces (in the sense of isspace) from left or right side
1300 wxString& wxString::Trim(bool bFromRight)
1301 {
1302     // first check if we're going to modify the string at all
1303     if ( !empty() &&
1304          (
1305           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1306           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1307          )
1308        )
1309     {
1310         if ( bFromRight )
1311         {
1312             // find last non-space character
1313             reverse_iterator psz = rbegin();
1314             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1315                 psz++;
1316
1317             // truncate at trailing space start
1318             erase(psz.base(), end());
1319         }
1320         else
1321         {
1322             // find first non-space character
1323             iterator psz = begin();
1324             while ( (psz != end()) && wxSafeIsspace(*psz) )
1325                 psz++;
1326
1327             // fix up data and length
1328             erase(begin(), psz);
1329         }
1330     }
1331
1332     return *this;
1333 }
1334
1335 // adds nCount characters chPad to the string from either side
1336 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1337 {
1338     wxString s(chPad, nCount);
1339
1340     if ( bFromRight )
1341         *this += s;
1342     else
1343     {
1344         s += *this;
1345         swap(s);
1346     }
1347
1348     return *this;
1349 }
1350
1351 // truncate the string
1352 wxString& wxString::Truncate(size_t uiLen)
1353 {
1354     if ( uiLen < length() )
1355     {
1356         erase(begin() + uiLen, end());
1357     }
1358     //else: nothing to do, string is already short enough
1359
1360     return *this;
1361 }
1362
1363 // ---------------------------------------------------------------------------
1364 // finding (return wxNOT_FOUND if not found and index otherwise)
1365 // ---------------------------------------------------------------------------
1366
1367 // find a character
1368 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1369 {
1370     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1371
1372     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1373 }
1374
1375 // ----------------------------------------------------------------------------
1376 // conversion to numbers
1377 // ----------------------------------------------------------------------------
1378
1379 // the implementation of all the functions below is exactly the same so factor
1380 // it out
1381
1382 template <typename T, typename F>
1383 bool wxStringToIntType(const wxChar *start,
1384                        T *val,
1385                        int base,
1386                        F func)
1387 {
1388     wxCHECK_MSG( val, false, _T("NULL output pointer") );
1389     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1390
1391 #ifndef __WXWINCE__
1392     errno = 0;
1393 #endif
1394
1395     wxChar *end;
1396     *val = (*func)(start, &end, base);
1397
1398     // return true only if scan was stopped by the terminating NUL and if the
1399     // string was not empty to start with and no under/overflow occurred
1400     return !*end && (end != start)
1401 #ifndef __WXWINCE__
1402         && (errno != ERANGE)
1403 #endif
1404     ;
1405 }
1406
1407 bool wxString::ToLong(long *val, int base) const
1408 {
1409     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtol);
1410 }
1411
1412 bool wxString::ToULong(unsigned long *val, int base) const
1413 {
1414     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoul);
1415 }
1416
1417 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1418 {
1419     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoll);
1420 }
1421
1422 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1423 {
1424     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoull);
1425 }
1426
1427 bool wxString::ToDouble(double *val) const
1428 {
1429     wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1430
1431 #ifndef __WXWINCE__
1432     errno = 0;
1433 #endif
1434
1435     const wxChar *start = c_str();
1436     wxChar *end;
1437     *val = wxStrtod(start, &end);
1438
1439     // return true only if scan was stopped by the terminating NUL and if the
1440     // string was not empty to start with and no under/overflow occurred
1441     return !*end && (end != start)
1442 #ifndef __WXWINCE__
1443         && (errno != ERANGE)
1444 #endif
1445     ;
1446 }
1447
1448 // ---------------------------------------------------------------------------
1449 // formatted output
1450 // ---------------------------------------------------------------------------
1451
1452 #if !wxUSE_UTF8_LOCALE_ONLY
1453 /* static */
1454 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1455 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1456 #else
1457 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1458 #endif
1459 {
1460     va_list argptr;
1461     va_start(argptr, format);
1462
1463     wxString s;
1464     s.PrintfV(format, argptr);
1465
1466     va_end(argptr);
1467
1468     return s;
1469 }
1470 #endif // !wxUSE_UTF8_LOCALE_ONLY
1471
1472 #if wxUSE_UNICODE_UTF8
1473 /* static */
1474 wxString wxString::DoFormatUtf8(const char *format, ...)
1475 {
1476     va_list argptr;
1477     va_start(argptr, format);
1478
1479     wxString s;
1480     s.PrintfV(format, argptr);
1481
1482     va_end(argptr);
1483
1484     return s;
1485 }
1486 #endif // wxUSE_UNICODE_UTF8
1487
1488 /* static */
1489 wxString wxString::FormatV(const wxString& format, va_list argptr)
1490 {
1491     wxString s;
1492     s.PrintfV(format, argptr);
1493     return s;
1494 }
1495
1496 #if !wxUSE_UTF8_LOCALE_ONLY
1497 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1498 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1499 #else
1500 int wxString::DoPrintfWchar(const wxChar *format, ...)
1501 #endif
1502 {
1503     va_list argptr;
1504     va_start(argptr, format);
1505
1506 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1507     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1508     // because it's the only cast that works safely for downcasting when
1509     // multiple inheritance is used:
1510     wxString *str = static_cast<wxString*>(this);
1511 #else
1512     wxString *str = this;
1513 #endif
1514
1515     int iLen = str->PrintfV(format, argptr);
1516
1517     va_end(argptr);
1518
1519     return iLen;
1520 }
1521 #endif // !wxUSE_UTF8_LOCALE_ONLY
1522
1523 #if wxUSE_UNICODE_UTF8
1524 int wxString::DoPrintfUtf8(const char *format, ...)
1525 {
1526     va_list argptr;
1527     va_start(argptr, format);
1528
1529     int iLen = PrintfV(format, argptr);
1530
1531     va_end(argptr);
1532
1533     return iLen;
1534 }
1535 #endif // wxUSE_UNICODE_UTF8
1536
1537 #if wxUSE_UNICODE_UTF8
1538 template<typename BufferType>
1539 #else
1540 // we only need one version in non-UTF8 builds and at least two Windows
1541 // compilers have problems with this function template, so use just one
1542 // normal function here
1543 #endif
1544 static int DoStringPrintfV(wxString& str,
1545                            const wxString& format, va_list argptr)
1546 {
1547     int size = 1024;
1548
1549     for ( ;; )
1550     {
1551 #if wxUSE_UNICODE_UTF8
1552         BufferType tmp(str, size + 1);
1553         typename BufferType::CharType *buf = tmp;
1554 #else
1555         wxStringBuffer tmp(str, size + 1);
1556         wxChar *buf = tmp;
1557 #endif
1558
1559         if ( !buf )
1560         {
1561             // out of memory
1562             return -1;
1563         }
1564
1565         // wxVsnprintf() may modify the original arg pointer, so pass it
1566         // only a copy
1567         va_list argptrcopy;
1568         wxVaCopy(argptrcopy, argptr);
1569         int len = wxVsnprintf(buf, size, format, argptrcopy);
1570         va_end(argptrcopy);
1571
1572         // some implementations of vsnprintf() don't NUL terminate
1573         // the string if there is not enough space for it so
1574         // always do it manually
1575         buf[size] = _T('\0');
1576
1577         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1578         // total number of characters which would have been written if the
1579         // buffer were large enough (newer standards such as Unix98)
1580         if ( len < 0 )
1581         {
1582 #if wxUSE_WXVSNPRINTF
1583             // we know that our own implementation of wxVsnprintf() returns -1
1584             // only for a format error - thus there's something wrong with
1585             // the user's format string
1586             return -1;
1587 #else // assume that system version only returns error if not enough space
1588             // still not enough, as we don't know how much we need, double the
1589             // current size of the buffer
1590             size *= 2;
1591 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1592         }
1593         else if ( len >= size )
1594         {
1595 #if wxUSE_WXVSNPRINTF
1596             // we know that our own implementation of wxVsnprintf() returns
1597             // size+1 when there's not enough space but that's not the size
1598             // of the required buffer!
1599             size *= 2;      // so we just double the current size of the buffer
1600 #else
1601             // some vsnprintf() implementations NUL-terminate the buffer and
1602             // some don't in len == size case, to be safe always add 1
1603             size = len + 1;
1604 #endif
1605         }
1606         else // ok, there was enough space
1607         {
1608             break;
1609         }
1610     }
1611
1612     // we could have overshot
1613     str.Shrink();
1614
1615     return str.length();
1616 }
1617
1618 int wxString::PrintfV(const wxString& format, va_list argptr)
1619 {
1620 #if wxUSE_UNICODE_UTF8
1621     #if wxUSE_STL_BASED_WXSTRING
1622         typedef wxStringTypeBuffer<char> Utf8Buffer;
1623     #else
1624         typedef wxImplStringBuffer Utf8Buffer;
1625     #endif
1626 #endif
1627
1628 #if wxUSE_UTF8_LOCALE_ONLY
1629     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1630 #else
1631     #if wxUSE_UNICODE_UTF8
1632     if ( wxLocaleIsUtf8 )
1633         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1634     else
1635         // wxChar* version
1636         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1637     #else
1638         return DoStringPrintfV(*this, format, argptr);
1639     #endif // UTF8/WCHAR
1640 #endif
1641 }
1642
1643 // ----------------------------------------------------------------------------
1644 // misc other operations
1645 // ----------------------------------------------------------------------------
1646
1647 // returns true if the string matches the pattern which may contain '*' and
1648 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1649 // of them)
1650 bool wxString::Matches(const wxString& mask) const
1651 {
1652     // I disable this code as it doesn't seem to be faster (in fact, it seems
1653     // to be much slower) than the old, hand-written code below and using it
1654     // here requires always linking with libregex even if the user code doesn't
1655     // use it
1656 #if 0 // wxUSE_REGEX
1657     // first translate the shell-like mask into a regex
1658     wxString pattern;
1659     pattern.reserve(wxStrlen(pszMask));
1660
1661     pattern += _T('^');
1662     while ( *pszMask )
1663     {
1664         switch ( *pszMask )
1665         {
1666             case _T('?'):
1667                 pattern += _T('.');
1668                 break;
1669
1670             case _T('*'):
1671                 pattern += _T(".*");
1672                 break;
1673
1674             case _T('^'):
1675             case _T('.'):
1676             case _T('$'):
1677             case _T('('):
1678             case _T(')'):
1679             case _T('|'):
1680             case _T('+'):
1681             case _T('\\'):
1682                 // these characters are special in a RE, quote them
1683                 // (however note that we don't quote '[' and ']' to allow
1684                 // using them for Unix shell like matching)
1685                 pattern += _T('\\');
1686                 // fall through
1687
1688             default:
1689                 pattern += *pszMask;
1690         }
1691
1692         pszMask++;
1693     }
1694     pattern += _T('$');
1695
1696     // and now use it
1697     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1698 #else // !wxUSE_REGEX
1699   // TODO: this is, of course, awfully inefficient...
1700
1701   // FIXME-UTF8: implement using iterators, remove #if
1702 #if wxUSE_UNICODE_UTF8
1703   wxWCharBuffer maskBuf = mask.wc_str();
1704   wxWCharBuffer txtBuf = wc_str();
1705   const wxChar *pszMask = maskBuf.data();
1706   const wxChar *pszTxt = txtBuf.data();
1707 #else
1708   const wxChar *pszMask = mask.wx_str();
1709   // the char currently being checked
1710   const wxChar *pszTxt = wx_str();
1711 #endif
1712
1713   // the last location where '*' matched
1714   const wxChar *pszLastStarInText = NULL;
1715   const wxChar *pszLastStarInMask = NULL;
1716
1717 match:
1718   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1719     switch ( *pszMask ) {
1720       case wxT('?'):
1721         if ( *pszTxt == wxT('\0') )
1722           return false;
1723
1724         // pszTxt and pszMask will be incremented in the loop statement
1725
1726         break;
1727
1728       case wxT('*'):
1729         {
1730           // remember where we started to be able to backtrack later
1731           pszLastStarInText = pszTxt;
1732           pszLastStarInMask = pszMask;
1733
1734           // ignore special chars immediately following this one
1735           // (should this be an error?)
1736           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1737             pszMask++;
1738
1739           // if there is nothing more, match
1740           if ( *pszMask == wxT('\0') )
1741             return true;
1742
1743           // are there any other metacharacters in the mask?
1744           size_t uiLenMask;
1745           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1746
1747           if ( pEndMask != NULL ) {
1748             // we have to match the string between two metachars
1749             uiLenMask = pEndMask - pszMask;
1750           }
1751           else {
1752             // we have to match the remainder of the string
1753             uiLenMask = wxStrlen(pszMask);
1754           }
1755
1756           wxString strToMatch(pszMask, uiLenMask);
1757           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1758           if ( pMatch == NULL )
1759             return false;
1760
1761           // -1 to compensate "++" in the loop
1762           pszTxt = pMatch + uiLenMask - 1;
1763           pszMask += uiLenMask - 1;
1764         }
1765         break;
1766
1767       default:
1768         if ( *pszMask != *pszTxt )
1769           return false;
1770         break;
1771     }
1772   }
1773
1774   // match only if nothing left
1775   if ( *pszTxt == wxT('\0') )
1776     return true;
1777
1778   // if we failed to match, backtrack if we can
1779   if ( pszLastStarInText ) {
1780     pszTxt = pszLastStarInText + 1;
1781     pszMask = pszLastStarInMask;
1782
1783     pszLastStarInText = NULL;
1784
1785     // don't bother resetting pszLastStarInMask, it's unnecessary
1786
1787     goto match;
1788   }
1789
1790   return false;
1791 #endif // wxUSE_REGEX/!wxUSE_REGEX
1792 }
1793
1794 // Count the number of chars
1795 int wxString::Freq(wxUniChar ch) const
1796 {
1797     int count = 0;
1798     for ( const_iterator i = begin(); i != end(); ++i )
1799     {
1800         if ( *i == ch )
1801             count ++;
1802     }
1803     return count;
1804 }
1805
1806 // convert to upper case, return the copy of the string
1807 wxString wxString::Upper() const
1808 { wxString s(*this); return s.MakeUpper(); }
1809
1810 // convert to lower case, return the copy of the string
1811 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }