src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #ifdef __SALFORDC__
  39     #include <clib.h>
  40 #endif
  41
  42 #include "wx/hashmap.h"
  43
  44 // string handling functions used by wxString:
  45 #if wxUSE_UNICODE_UTF8
  46     #define wxStringMemcpy   memcpy
  47     #define wxStringMemcmp   memcmp
  48     #define wxStringMemchr   memchr
  49     #define wxStringStrlen   strlen
  50 #else
  51     #define wxStringMemcpy   wxTmemcpy
  52     #define wxStringMemcmp   wxTmemcmp
  53     #define wxStringMemchr   wxTmemchr
  54     #define wxStringStrlen   wxStrlen
  55 #endif
  56
  57
  58 // ---------------------------------------------------------------------------
  59 // static class variables definition
  60 // ---------------------------------------------------------------------------
  61
  62 //According to STL _must_ be a -1 size_t
  63 const size_t wxString::npos = (size_t) -1;
  64
  65 // ----------------------------------------------------------------------------
  66 // global functions
  67 // ----------------------------------------------------------------------------
  68
  69 #if wxUSE_STD_IOSTREAM
  70
  71 #include <iostream>
  72
  73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
  74 {
  75 // FIXME-UTF8: always, not only if wxUSE_UNICODE
  76 #if wxUSE_UNICODE && !defined(__BORLANDC__)
  77     return os << (const wchar_t*)str.AsWCharBuf();
  78 #else
  79     return os << (const char*)str.AsCharBuf();
  80 #endif
  81 }
  82
  83 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
  84 {
  85     return os << str.c_str();
  86 }
  87
  88 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
  89 {
  90     return os << str.data();
  91 }
  92
  93 #ifndef __BORLANDC__
  94 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
  95 {
  96     return os << str.data();
  97 }
  98 #endif
  99
 100 #endif // wxUSE_STD_IOSTREAM
 101
 102 // ===========================================================================
 103 // wxString class core
 104 // ===========================================================================
 105
 106 #if wxUSE_UNICODE_UTF8
 107
 108 void wxString::PosLenToImpl(size_t pos, size_t len,
 109                             size_t *implPos, size_t *implLen) const
 110 {
 111     if ( pos == npos )
 112         *implPos = npos;
 113     else
 114     {
 115         const_iterator i = begin() + pos;
 116         *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
 117         if ( len == npos )
 118             *implLen = npos;
 119         else
 120         {
 121             // too large length is interpreted as "to the end of the string"
 122             // FIXME-UTF8: verify this is the case in std::string, assert
 123             // otherwise
 124             if ( pos + len > length() )
 125                 len = length() - pos;
 126
 127             *implLen = (i + len).impl() - i.impl();
 128         }
 129     }
 130 }
 131
 132 #endif // wxUSE_UNICODE_UTF8
 133
 134 // ----------------------------------------------------------------------------
 135 // wxCStrData converted strings caching
 136 // ----------------------------------------------------------------------------
 137
 138 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 139 //             string objects; re-enable after fixing this bug and benchmarking
 140 //             performance to see if using a hash is a good idea at all
 141 #if 0
 142
 143 // For backward compatibility reasons, it must be possible to assign the value
 144 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 145 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 146 // because the memory would be freed immediately, but it has to be valid as long
 147 // as the string is not modified, so that code like this still works:
 148 //
 149 // const wxChar *s = str.c_str();
 150 // while ( s ) { ... }
 151
 152 // FIXME-UTF8: not thread safe!
 153 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 154 //             destroyed, but we should do it when the string is modified, to
 155 //             keep memory usage down
 156 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 157 //             invalidated the cache on every change, we could keep the previous
 158 //             conversion
 159 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 160 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 161
 162 template<typename T>
 163 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 164 {
 165     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 166     if ( i != hash.end() )
 167     {
 168         free(i->second);
 169         hash.erase(i);
 170     }
 171 }
 172
 173 #if wxUSE_UNICODE
 174 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 175 //     so we have to use wxString* here and const-cast when used
 176 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 177                     wxStringCharConversionCache);
 178 static wxStringCharConversionCache gs_stringsCharCache;
 179
 180 const char* wxCStrData::AsChar() const
 181 {
 182     // remove previously cache value, if any (see FIXMEs above):
 183     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 184
 185     // convert the string and keep it:
 186     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 187         m_str->mb_str().release();
 188
 189     return s + m_offset;
 190 }
 191 #endif // wxUSE_UNICODE
 192
 193 #if !wxUSE_UNICODE_WCHAR
 194 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 195                     wxStringWCharConversionCache);
 196 static wxStringWCharConversionCache gs_stringsWCharCache;
 197
 198 const wchar_t* wxCStrData::AsWChar() const
 199 {
 200     // remove previously cache value, if any (see FIXMEs above):
 201     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 202
 203     // convert the string and keep it:
 204     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 205         m_str->wc_str().release();
 206
 207     return s + m_offset;
 208 }
 209 #endif // !wxUSE_UNICODE_WCHAR
 210
 211 wxString::~wxString()
 212 {
 213 #if wxUSE_UNICODE
 214     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 215     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 216 #endif
 217 #if !wxUSE_UNICODE_WCHAR
 218     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 219 #endif
 220 }
 221 #endif
 222
 223 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 224 const char* wxCStrData::AsChar() const
 225 {
 226 #if wxUSE_UNICODE_UTF8
 227     if ( wxLocaleIsUtf8 )
 228         return AsInternal();
 229 #endif
 230     // under non-UTF8 locales, we have to convert the internal UTF-8
 231     // representation using wxConvLibc and cache the result
 232
 233     wxString *str = wxConstCast(m_str, wxString);
 234
 235     // convert the string:
 236     wxCharBuffer buf(str->mb_str());
 237
 238     // if it failed, return empty string and not NULL to avoid crashes in code
 239     // written with either wxWidgets 2 wxString or std::string behaviour in
 240     // mind: neither of them ever returns NULL and so we shouldn't neither
 241     if ( !buf )
 242         return "";
 243
 244     // FIXME-UTF8: do the conversion in-place in the existing buffer
 245     if ( str->m_convertedToChar &&
 246          strlen(buf) == strlen(str->m_convertedToChar) )
 247     {
 248         // keep the same buffer for as long as possible, so that several calls
 249         // to c_str() in a row still work:
 250         strcpy(str->m_convertedToChar, buf);
 251     }
 252     else
 253     {
 254         str->m_convertedToChar = buf.release();
 255     }
 256
 257     // and keep it:
 258     return str->m_convertedToChar + m_offset;
 259 }
 260 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 261
 262 #if !wxUSE_UNICODE_WCHAR
 263 const wchar_t* wxCStrData::AsWChar() const
 264 {
 265     wxString *str = wxConstCast(m_str, wxString);
 266
 267     // convert the string:
 268     wxWCharBuffer buf(str->wc_str());
 269
 270     // notice that here, unlike above in AsChar(), conversion can't fail as our
 271     // internal UTF-8 is always well-formed -- or the string was corrupted and
 272     // all bets are off anyhow
 273
 274     // FIXME-UTF8: do the conversion in-place in the existing buffer
 275     if ( str->m_convertedToWChar &&
 276          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 277     {
 278         // keep the same buffer for as long as possible, so that several calls
 279         // to c_str() in a row still work:
 280         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 281     }
 282     else
 283     {
 284         str->m_convertedToWChar = buf.release();
 285     }
 286
 287     // and keep it:
 288     return str->m_convertedToWChar + m_offset;
 289 }
 290 #endif // !wxUSE_UNICODE_WCHAR
 291
 292 // ===========================================================================
 293 // wxString class core
 294 // ===========================================================================
 295
 296 // ---------------------------------------------------------------------------
 297 // construction and conversion
 298 // ---------------------------------------------------------------------------
 299
 300 #if wxUSE_UNICODE_WCHAR
 301 /* static */
 302 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 303                                                const wxMBConv& conv)
 304 {
 305     // anything to do?
 306     if ( !psz || nLength == 0 )
 307         return SubstrBufFromMB(L"", 0);
 308
 309     if ( nLength == npos )
 310         nLength = wxNO_LEN;
 311
 312     size_t wcLen;
 313     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 314     if ( !wcLen )
 315         return SubstrBufFromMB(_T(""), 0);
 316     else
 317         return SubstrBufFromMB(wcBuf, wcLen);
 318 }
 319 #endif // wxUSE_UNICODE_WCHAR
 320
 321 #if wxUSE_UNICODE_UTF8
 322 /* static */
 323 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 324                                                const wxMBConv& conv)
 325 {
 326     // anything to do?
 327     if ( !psz || nLength == 0 )
 328         return SubstrBufFromMB("", 0);
 329
 330     // if psz is already in UTF-8, we don't have to do the roundtrip to
 331     // wchar_t* and back:
 332     if ( conv.IsUTF8() )
 333     {
 334         // we need to validate the input because UTF8 iterators assume valid
 335         // UTF-8 sequence and psz may be invalid:
 336         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 337         {
 338             return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
 339         }
 340         // else: do the roundtrip through wchar_t*
 341     }
 342
 343     if ( nLength == npos )
 344         nLength = wxNO_LEN;
 345
 346     // first convert to wide string:
 347     size_t wcLen;
 348     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 349     if ( !wcLen )
 350         return SubstrBufFromMB("", 0);
 351
 352     // and then to UTF-8:
 353     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
 354     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 355     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 356
 357     return buf;
 358 }
 359 #endif // wxUSE_UNICODE_UTF8
 360
 361 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 362 /* static */
 363 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 364                                                const wxMBConv& conv)
 365 {
 366     // anything to do?
 367     if ( !pwz || nLength == 0 )
 368         return SubstrBufFromWC("", 0);
 369
 370     if ( nLength == npos )
 371         nLength = wxNO_LEN;
 372
 373     size_t mbLen;
 374     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 375     if ( !mbLen )
 376         return SubstrBufFromWC("", 0);
 377     else
 378         return SubstrBufFromWC(mbBuf, mbLen);
 379 }
 380 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 381
 382
 383 #if wxUSE_UNICODE_WCHAR
 384
 385 //Convert wxString in Unicode mode to a multi-byte string
 386 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 387 {
 388     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 389 }
 390
 391 #elif wxUSE_UNICODE_UTF8
 392
 393 const wxWCharBuffer wxString::wc_str() const
 394 {
 395     return wxMBConvStrictUTF8().cMB2WC
 396                                 (
 397                                     m_impl.c_str(),
 398                                     m_impl.length() + 1, // size, not length
 399                                     NULL
 400                                 );
 401 }
 402
 403 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 404 {
 405     if ( conv.IsUTF8() )
 406         return wxCharBuffer::CreateNonOwned(m_impl.c_str());
 407
 408     // FIXME-UTF8: use wc_str() here once we have buffers with length
 409
 410     size_t wcLen;
 411     wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
 412                                              (
 413                                                 m_impl.c_str(),
 414                                                 m_impl.length() + 1, // size
 415                                                 &wcLen
 416                                              ));
 417     if ( !wcLen )
 418         return wxCharBuffer("");
 419
 420     return conv.cWC2MB(wcBuf, wcLen+1, NULL);
 421 }
 422
 423 #else // ANSI
 424
 425 //Converts this string to a wide character string if unicode
 426 //mode is not enabled and wxUSE_WCHAR_T is enabled
 427 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 428 {
 429     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 430 }
 431
 432 #endif // Unicode/ANSI
 433
 434 // shrink to minimal size (releasing extra memory)
 435 bool wxString::Shrink()
 436 {
 437   wxString tmp(begin(), end());
 438   swap(tmp);
 439   return tmp.length() == length();
 440 }
 441
 442 // deprecated compatibility code:
 443 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 444 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 445 {
 446     return DoGetWriteBuf(nLen);
 447 }
 448
 449 void wxString::UngetWriteBuf()
 450 {
 451     DoUngetWriteBuf();
 452 }
 453
 454 void wxString::UngetWriteBuf(size_t nLen)
 455 {
 456     DoUngetWriteBuf(nLen);
 457 }
 458 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 459
 460
 461 // ---------------------------------------------------------------------------
 462 // data access
 463 // ---------------------------------------------------------------------------
 464
 465 // all functions are inline in string.h
 466
 467 // ---------------------------------------------------------------------------
 468 // concatenation operators
 469 // ---------------------------------------------------------------------------
 470
 471 /*
 472  * concatenation functions come in 5 flavours:
 473  *  string + string
 474  *  char   + string      and      string + char
 475  *  C str  + string      and      string + C str
 476  */
 477
 478 wxString operator+(const wxString& str1, const wxString& str2)
 479 {
 480 #if !wxUSE_STL_BASED_WXSTRING
 481     wxASSERT( str1.IsValid() );
 482     wxASSERT( str2.IsValid() );
 483 #endif
 484
 485     wxString s = str1;
 486     s += str2;
 487
 488     return s;
 489 }
 490
 491 wxString operator+(const wxString& str, wxUniChar ch)
 492 {
 493 #if !wxUSE_STL_BASED_WXSTRING
 494     wxASSERT( str.IsValid() );
 495 #endif
 496
 497     wxString s = str;
 498     s += ch;
 499
 500     return s;
 501 }
 502
 503 wxString operator+(wxUniChar ch, const wxString& str)
 504 {
 505 #if !wxUSE_STL_BASED_WXSTRING
 506     wxASSERT( str.IsValid() );
 507 #endif
 508
 509     wxString s = ch;
 510     s += str;
 511
 512     return s;
 513 }
 514
 515 wxString operator+(const wxString& str, const char *psz)
 516 {
 517 #if !wxUSE_STL_BASED_WXSTRING
 518     wxASSERT( str.IsValid() );
 519 #endif
 520
 521     wxString s;
 522     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 523         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 524     }
 525     s += str;
 526     s += psz;
 527
 528     return s;
 529 }
 530
 531 wxString operator+(const wxString& str, const wchar_t *pwz)
 532 {
 533 #if !wxUSE_STL_BASED_WXSTRING
 534     wxASSERT( str.IsValid() );
 535 #endif
 536
 537     wxString s;
 538     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 539         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 540     }
 541     s += str;
 542     s += pwz;
 543
 544     return s;
 545 }
 546
 547 wxString operator+(const char *psz, const wxString& str)
 548 {
 549 #if !wxUSE_STL_BASED_WXSTRING
 550     wxASSERT( str.IsValid() );
 551 #endif
 552
 553     wxString s;
 554     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 555         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 556     }
 557     s = psz;
 558     s += str;
 559
 560     return s;
 561 }
 562
 563 wxString operator+(const wchar_t *pwz, const wxString& str)
 564 {
 565 #if !wxUSE_STL_BASED_WXSTRING
 566     wxASSERT( str.IsValid() );
 567 #endif
 568
 569     wxString s;
 570     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 571         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 572     }
 573     s = pwz;
 574     s += str;
 575
 576     return s;
 577 }
 578
 579 // ---------------------------------------------------------------------------
 580 // string comparison
 581 // ---------------------------------------------------------------------------
 582
 583 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 584 {
 585     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 586                                : wxToupper(GetChar(0u)) == wxToupper(c));
 587 }
 588
 589 #ifdef HAVE_STD_STRING_COMPARE
 590
 591 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 592 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 593 //     sort strings in characters code point order by sorting the byte sequence
 594 //     in byte values order (i.e. what strcmp() and memcmp() do).
 595
 596 int wxString::compare(const wxString& str) const
 597 {
 598     return m_impl.compare(str.m_impl);
 599 }
 600
 601 int wxString::compare(size_t nStart, size_t nLen,
 602                       const wxString& str) const
 603 {
 604     size_t pos, len;
 605     PosLenToImpl(nStart, nLen, &pos, &len);
 606     return m_impl.compare(pos, len, str.m_impl);
 607 }
 608
 609 int wxString::compare(size_t nStart, size_t nLen,
 610                       const wxString& str,
 611                       size_t nStart2, size_t nLen2) const
 612 {
 613     size_t pos, len;
 614     PosLenToImpl(nStart, nLen, &pos, &len);
 615
 616     size_t pos2, len2;
 617     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 618
 619     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 620 }
 621
 622 int wxString::compare(const char* sz) const
 623 {
 624     return m_impl.compare(ImplStr(sz));
 625 }
 626
 627 int wxString::compare(const wchar_t* sz) const
 628 {
 629     return m_impl.compare(ImplStr(sz));
 630 }
 631
 632 int wxString::compare(size_t nStart, size_t nLen,
 633                       const char* sz, size_t nCount) const
 634 {
 635     size_t pos, len;
 636     PosLenToImpl(nStart, nLen, &pos, &len);
 637
 638     SubstrBufFromMB str(ImplStr(sz, nCount));
 639
 640     return m_impl.compare(pos, len, str.data, str.len);
 641 }
 642
 643 int wxString::compare(size_t nStart, size_t nLen,
 644                       const wchar_t* sz, size_t nCount) const
 645 {
 646     size_t pos, len;
 647     PosLenToImpl(nStart, nLen, &pos, &len);
 648
 649     SubstrBufFromWC str(ImplStr(sz, nCount));
 650
 651     return m_impl.compare(pos, len, str.data, str.len);
 652 }
 653
 654 #else // !HAVE_STD_STRING_COMPARE
 655
 656 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 657                           const wxStringCharType* s2, size_t l2)
 658 {
 659     if( l1 == l2 )
 660         return wxStringMemcmp(s1, s2, l1);
 661     else if( l1 < l2 )
 662     {
 663         int ret = wxStringMemcmp(s1, s2, l1);
 664         return ret == 0 ? -1 : ret;
 665     }
 666     else
 667     {
 668         int ret = wxStringMemcmp(s1, s2, l2);
 669         return ret == 0 ? +1 : ret;
 670     }
 671 }
 672
 673 int wxString::compare(const wxString& str) const
 674 {
 675     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 676                      str.m_impl.data(), str.m_impl.length());
 677 }
 678
 679 int wxString::compare(size_t nStart, size_t nLen,
 680                       const wxString& str) const
 681 {
 682     wxASSERT(nStart <= length());
 683     size_type strLen = length() - nStart;
 684     nLen = strLen < nLen ? strLen : nLen;
 685
 686     size_t pos, len;
 687     PosLenToImpl(nStart, nLen, &pos, &len);
 688
 689     return ::wxDoCmp(m_impl.data() + pos,  len,
 690                      str.m_impl.data(), str.m_impl.length());
 691 }
 692
 693 int wxString::compare(size_t nStart, size_t nLen,
 694                       const wxString& str,
 695                       size_t nStart2, size_t nLen2) const
 696 {
 697     wxASSERT(nStart <= length());
 698     wxASSERT(nStart2 <= str.length());
 699     size_type strLen  =     length() - nStart,
 700               strLen2 = str.length() - nStart2;
 701     nLen  = strLen  < nLen  ? strLen  : nLen;
 702     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 703
 704     size_t pos, len;
 705     PosLenToImpl(nStart, nLen, &pos, &len);
 706     size_t pos2, len2;
 707     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 708
 709     return ::wxDoCmp(m_impl.data() + pos, len,
 710                      str.m_impl.data() + pos2, len2);
 711 }
 712
 713 int wxString::compare(const char* sz) const
 714 {
 715     SubstrBufFromMB str(ImplStr(sz, npos));
 716     if ( str.len == npos )
 717         str.len = wxStringStrlen(str.data);
 718     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 719 }
 720
 721 int wxString::compare(const wchar_t* sz) const
 722 {
 723     SubstrBufFromWC str(ImplStr(sz, npos));
 724     if ( str.len == npos )
 725         str.len = wxStringStrlen(str.data);
 726     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 727 }
 728
 729 int wxString::compare(size_t nStart, size_t nLen,
 730                       const char* sz, size_t nCount) const
 731 {
 732     wxASSERT(nStart <= length());
 733     size_type strLen = length() - nStart;
 734     nLen = strLen < nLen ? strLen : nLen;
 735
 736     size_t pos, len;
 737     PosLenToImpl(nStart, nLen, &pos, &len);
 738
 739     SubstrBufFromMB str(ImplStr(sz, nCount));
 740     if ( str.len == npos )
 741         str.len = wxStringStrlen(str.data);
 742
 743     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 744 }
 745
 746 int wxString::compare(size_t nStart, size_t nLen,
 747                       const wchar_t* sz, size_t nCount) const
 748 {
 749     wxASSERT(nStart <= length());
 750     size_type strLen = length() - nStart;
 751     nLen = strLen < nLen ? strLen : nLen;
 752
 753     size_t pos, len;
 754     PosLenToImpl(nStart, nLen, &pos, &len);
 755
 756     SubstrBufFromWC str(ImplStr(sz, nCount));
 757     if ( str.len == npos )
 758         str.len = wxStringStrlen(str.data);
 759
 760     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 761 }
 762
 763 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 764
 765
 766 // ---------------------------------------------------------------------------
 767 // find_{first,last}_[not]_of functions
 768 // ---------------------------------------------------------------------------
 769
 770 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 771
 772 // NB: All these functions are implemented  with the argument being wxChar*,
 773 //     i.e. widechar string in any Unicode build, even though native string
 774 //     representation is char* in the UTF-8 build. This is because we couldn't
 775 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 776
 777 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 778 {
 779     return find_first_of(sz, nStart, wxStrlen(sz));
 780 }
 781
 782 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 783 {
 784     return find_first_not_of(sz, nStart, wxStrlen(sz));
 785 }
 786
 787 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 788 {
 789     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 790
 791     size_t idx = nStart;
 792     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 793     {
 794         if ( wxTmemchr(sz, *i, n) )
 795             return idx;
 796     }
 797
 798     return npos;
 799 }
 800
 801 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 802 {
 803     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 804
 805     size_t idx = nStart;
 806     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 807     {
 808         if ( !wxTmemchr(sz, *i, n) )
 809             return idx;
 810     }
 811
 812     return npos;
 813 }
 814
 815
 816 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 817 {
 818     return find_last_of(sz, nStart, wxStrlen(sz));
 819 }
 820
 821 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 822 {
 823     return find_last_not_of(sz, nStart, wxStrlen(sz));
 824 }
 825
 826 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 827 {
 828     size_t len = length();
 829
 830     if ( nStart == npos )
 831     {
 832         nStart = len - 1;
 833     }
 834     else
 835     {
 836         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 837     }
 838
 839     size_t idx = nStart;
 840     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 841           i != rend(); --idx, ++i )
 842     {
 843         if ( wxTmemchr(sz, *i, n) )
 844             return idx;
 845     }
 846
 847     return npos;
 848 }
 849
 850 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 851 {
 852     size_t len = length();
 853
 854     if ( nStart == npos )
 855     {
 856         nStart = len - 1;
 857     }
 858     else
 859     {
 860         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 861     }
 862
 863     size_t idx = nStart;
 864     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 865           i != rend(); --idx, ++i )
 866     {
 867         if ( !wxTmemchr(sz, *i, n) )
 868             return idx;
 869     }
 870
 871     return npos;
 872 }
 873
 874 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
 875 {
 876     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 877
 878     size_t idx = nStart;
 879     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 880     {
 881         if ( *i != ch )
 882             return idx;
 883     }
 884
 885     return npos;
 886 }
 887
 888 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
 889 {
 890     size_t len = length();
 891
 892     if ( nStart == npos )
 893     {
 894         nStart = len - 1;
 895     }
 896     else
 897     {
 898         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 899     }
 900
 901     size_t idx = nStart;
 902     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 903           i != rend(); --idx, ++i )
 904     {
 905         if ( *i != ch )
 906             return idx;
 907     }
 908
 909     return npos;
 910 }
 911
 912 // the functions above were implemented for wchar_t* arguments in Unicode
 913 // build and char* in ANSI build; below are implementations for the other
 914 // version:
 915 #if wxUSE_UNICODE
 916     #define wxOtherCharType char
 917     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
 918 #else
 919     #define wxOtherCharType wchar_t
 920     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
 921 #endif
 922
 923 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
 924     { return find_first_of(STRCONV(sz), nStart); }
 925
 926 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
 927                                size_t n) const
 928     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
 929 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
 930     { return find_last_of(STRCONV(sz), nStart); }
 931 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
 932                               size_t n) const
 933     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
 934 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
 935     { return find_first_not_of(STRCONV(sz), nStart); }
 936 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
 937                                    size_t n) const
 938     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
 939 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
 940     { return find_last_not_of(STRCONV(sz), nStart); }
 941 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
 942                                   size_t n) const
 943     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
 944
 945 #undef wxOtherCharType
 946 #undef STRCONV
 947
 948 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 949
 950 // ===========================================================================
 951 // other common string functions
 952 // ===========================================================================
 953
 954 int wxString::CmpNoCase(const wxString& s) const
 955 {
 956     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
 957
 958     size_t idx = 0;
 959     const_iterator i1 = begin();
 960     const_iterator end1 = end();
 961     const_iterator i2 = s.begin();
 962     const_iterator end2 = s.end();
 963
 964     for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
 965     {
 966         wxUniChar lower1 = (wxChar)wxTolower(*i1);
 967         wxUniChar lower2 = (wxChar)wxTolower(*i2);
 968         if ( lower1 != lower2 )
 969             return lower1 < lower2 ? -1 : 1;
 970     }
 971
 972     size_t len1 = length();
 973     size_t len2 = s.length();
 974
 975     if ( len1 < len2 )
 976         return -1;
 977     else if ( len1 > len2 )
 978         return 1;
 979     return 0;
 980 }
 981
 982
 983 #if wxUSE_UNICODE
 984
 985 #ifdef __MWERKS__
 986 #ifndef __SCHAR_MAX__
 987 #define __SCHAR_MAX__ 127
 988 #endif
 989 #endif
 990
 991 wxString wxString::FromAscii(const char *ascii, size_t len)
 992 {
 993     if (!ascii || len == 0)
 994        return wxEmptyString;
 995
 996     wxString res;
 997
 998     {
 999         wxStringInternalBuffer buf(res, len);
1000         wxStringCharType *dest = buf;
1001
1002         for ( ; len > 0; --len )
1003         {
1004             unsigned char c = (unsigned char)*ascii++;
1005             wxASSERT_MSG( c < 0x80,
1006                           _T("Non-ASCII value passed to FromAscii().") );
1007
1008             *dest++ = (wchar_t)c;
1009         }
1010     }
1011
1012     return res;
1013 }
1014
1015 wxString wxString::FromAscii(const char *ascii)
1016 {
1017     return FromAscii(ascii, wxStrlen(ascii));
1018 }
1019
1020 wxString wxString::FromAscii(char ascii)
1021 {
1022     // What do we do with '\0' ?
1023
1024     unsigned char c = (unsigned char)ascii;
1025
1026     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1027
1028     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1029     return wxString(wxUniChar((wchar_t)c));
1030 }
1031
1032 const wxCharBuffer wxString::ToAscii() const
1033 {
1034     // this will allocate enough space for the terminating NUL too
1035     wxCharBuffer buffer(length());
1036     char *dest = buffer.data();
1037
1038     for ( const_iterator i = begin(); i != end(); ++i )
1039     {
1040         wxUniChar c(*i);
1041         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1042         *dest++ = c.IsAscii() ? (char)c : '_';
1043
1044         // the output string can't have embedded NULs anyhow, so we can safely
1045         // stop at first of them even if we do have any
1046         if ( !c )
1047             break;
1048     }
1049
1050     return buffer;
1051 }
1052
1053 #endif // wxUSE_UNICODE
1054
1055 // extract string of length nCount starting at nFirst
1056 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1057 {
1058     size_t nLen = length();
1059
1060     // default value of nCount is npos and means "till the end"
1061     if ( nCount == npos )
1062     {
1063         nCount = nLen - nFirst;
1064     }
1065
1066     // out-of-bounds requests return sensible things
1067     if ( nFirst + nCount > nLen )
1068     {
1069         nCount = nLen - nFirst;
1070     }
1071
1072     if ( nFirst > nLen )
1073     {
1074         // AllocCopy() will return empty string
1075         return wxEmptyString;
1076     }
1077
1078     wxString dest(*this, nFirst, nCount);
1079     if ( dest.length() != nCount )
1080     {
1081         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1082     }
1083
1084     return dest;
1085 }
1086
1087 // check that the string starts with prefix and return the rest of the string
1088 // in the provided pointer if it is not NULL, otherwise return false
1089 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1090 {
1091     if ( compare(0, prefix.length(), prefix) != 0 )
1092         return false;
1093
1094     if ( rest )
1095     {
1096         // put the rest of the string into provided pointer
1097         rest->assign(*this, prefix.length(), npos);
1098     }
1099
1100     return true;
1101 }
1102
1103
1104 // check that the string ends with suffix and return the rest of it in the
1105 // provided pointer if it is not NULL, otherwise return false
1106 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1107 {
1108     int start = length() - suffix.length();
1109
1110     if ( start < 0 || compare(start, npos, suffix) != 0 )
1111         return false;
1112
1113     if ( rest )
1114     {
1115         // put the rest of the string into provided pointer
1116         rest->assign(*this, 0, start);
1117     }
1118
1119     return true;
1120 }
1121
1122
1123 // extract nCount last (rightmost) characters
1124 wxString wxString::Right(size_t nCount) const
1125 {
1126   if ( nCount > length() )
1127     nCount = length();
1128
1129   wxString dest(*this, length() - nCount, nCount);
1130   if ( dest.length() != nCount ) {
1131     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1132   }
1133   return dest;
1134 }
1135
1136 // get all characters after the last occurence of ch
1137 // (returns the whole string if ch not found)
1138 wxString wxString::AfterLast(wxUniChar ch) const
1139 {
1140   wxString str;
1141   int iPos = Find(ch, true);
1142   if ( iPos == wxNOT_FOUND )
1143     str = *this;
1144   else
1145     str = wx_str() + iPos + 1;
1146
1147   return str;
1148 }
1149
1150 // extract nCount first (leftmost) characters
1151 wxString wxString::Left(size_t nCount) const
1152 {
1153   if ( nCount > length() )
1154     nCount = length();
1155
1156   wxString dest(*this, 0, nCount);
1157   if ( dest.length() != nCount ) {
1158     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1159   }
1160   return dest;
1161 }
1162
1163 // get all characters before the first occurence of ch
1164 // (returns the whole string if ch not found)
1165 wxString wxString::BeforeFirst(wxUniChar ch) const
1166 {
1167   int iPos = Find(ch);
1168   if ( iPos == wxNOT_FOUND ) iPos = length();
1169   return wxString(*this, 0, iPos);
1170 }
1171
1172 /// get all characters before the last occurence of ch
1173 /// (returns empty string if ch not found)
1174 wxString wxString::BeforeLast(wxUniChar ch) const
1175 {
1176   wxString str;
1177   int iPos = Find(ch, true);
1178   if ( iPos != wxNOT_FOUND && iPos != 0 )
1179     str = wxString(c_str(), iPos);
1180
1181   return str;
1182 }
1183
1184 /// get all characters after the first occurence of ch
1185 /// (returns empty string if ch not found)
1186 wxString wxString::AfterFirst(wxUniChar ch) const
1187 {
1188   wxString str;
1189   int iPos = Find(ch);
1190   if ( iPos != wxNOT_FOUND )
1191     str = wx_str() + iPos + 1;
1192
1193   return str;
1194 }
1195
1196 // replace first (or all) occurences of some substring with another one
1197 size_t wxString::Replace(const wxString& strOld,
1198                          const wxString& strNew, bool bReplaceAll)
1199 {
1200     // if we tried to replace an empty string we'd enter an infinite loop below
1201     wxCHECK_MSG( !strOld.empty(), 0,
1202                  _T("wxString::Replace(): invalid parameter") );
1203
1204     size_t uiCount = 0;   // count of replacements made
1205
1206     size_t uiOldLen = strOld.length();
1207     size_t uiNewLen = strNew.length();
1208
1209     size_t dwPos = 0;
1210
1211     while ( (*this)[dwPos] != wxT('\0') )
1212     {
1213         //DO NOT USE STRSTR HERE
1214         //this string can contain embedded null characters,
1215         //so strstr will function incorrectly
1216         dwPos = find(strOld, dwPos);
1217         if ( dwPos == npos )
1218             break;                  // exit the loop
1219         else
1220         {
1221             //replace this occurance of the old string with the new one
1222             replace(dwPos, uiOldLen, strNew, uiNewLen);
1223
1224             //move up pos past the string that was replaced
1225             dwPos += uiNewLen;
1226
1227             //increase replace count
1228             ++uiCount;
1229
1230             // stop now?
1231             if ( !bReplaceAll )
1232                 break;                  // exit the loop
1233         }
1234     }
1235
1236     return uiCount;
1237 }
1238
1239 bool wxString::IsAscii() const
1240 {
1241     for ( const_iterator i = begin(); i != end(); ++i )
1242     {
1243         if ( !(*i).IsAscii() )
1244             return false;
1245     }
1246
1247     return true;
1248 }
1249
1250 bool wxString::IsWord() const
1251 {
1252     for ( const_iterator i = begin(); i != end(); ++i )
1253     {
1254         if ( !wxIsalpha(*i) )
1255             return false;
1256     }
1257
1258     return true;
1259 }
1260
1261 bool wxString::IsNumber() const
1262 {
1263     if ( empty() )
1264         return true;
1265
1266     const_iterator i = begin();
1267
1268     if ( *i == _T('-') || *i == _T('+') )
1269         ++i;
1270
1271     for ( ; i != end(); ++i )
1272     {
1273         if ( !wxIsdigit(*i) )
1274             return false;
1275     }
1276
1277     return true;
1278 }
1279
1280 wxString wxString::Strip(stripType w) const
1281 {
1282     wxString s = *this;
1283     if ( w & leading ) s.Trim(false);
1284     if ( w & trailing ) s.Trim(true);
1285     return s;
1286 }
1287
1288 // ---------------------------------------------------------------------------
1289 // case conversion
1290 // ---------------------------------------------------------------------------
1291
1292 wxString& wxString::MakeUpper()
1293 {
1294   for ( iterator it = begin(), en = end(); it != en; ++it )
1295     *it = (wxChar)wxToupper(*it);
1296
1297   return *this;
1298 }
1299
1300 wxString& wxString::MakeLower()
1301 {
1302   for ( iterator it = begin(), en = end(); it != en; ++it )
1303     *it = (wxChar)wxTolower(*it);
1304
1305   return *this;
1306 }
1307
1308 // ---------------------------------------------------------------------------
1309 // trimming and padding
1310 // ---------------------------------------------------------------------------
1311
1312 // some compilers (VC++ 6.0 not to name them) return true for a call to
1313 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1314 // live with this by checking that the character is a 7 bit one - even if this
1315 // may fail to detect some spaces (I don't know if Unicode doesn't have
1316 // space-like symbols somewhere except in the first 128 chars), it is arguably
1317 // still better than trimming away accented letters
1318 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1319
1320 // trims spaces (in the sense of isspace) from left or right side
1321 wxString& wxString::Trim(bool bFromRight)
1322 {
1323     // first check if we're going to modify the string at all
1324     if ( !empty() &&
1325          (
1326           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1327           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1328          )
1329        )
1330     {
1331         if ( bFromRight )
1332         {
1333             // find last non-space character
1334             reverse_iterator psz = rbegin();
1335             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1336                 psz++;
1337
1338             // truncate at trailing space start
1339             erase(psz.base(), end());
1340         }
1341         else
1342         {
1343             // find first non-space character
1344             iterator psz = begin();
1345             while ( (psz != end()) && wxSafeIsspace(*psz) )
1346                 psz++;
1347
1348             // fix up data and length
1349             erase(begin(), psz);
1350         }
1351     }
1352
1353     return *this;
1354 }
1355
1356 // adds nCount characters chPad to the string from either side
1357 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1358 {
1359     wxString s(chPad, nCount);
1360
1361     if ( bFromRight )
1362         *this += s;
1363     else
1364     {
1365         s += *this;
1366         swap(s);
1367     }
1368
1369     return *this;
1370 }
1371
1372 // truncate the string
1373 wxString& wxString::Truncate(size_t uiLen)
1374 {
1375     if ( uiLen < length() )
1376     {
1377         erase(begin() + uiLen, end());
1378     }
1379     //else: nothing to do, string is already short enough
1380
1381     return *this;
1382 }
1383
1384 // ---------------------------------------------------------------------------
1385 // finding (return wxNOT_FOUND if not found and index otherwise)
1386 // ---------------------------------------------------------------------------
1387
1388 // find a character
1389 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1390 {
1391     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1392
1393     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1394 }
1395
1396 // ----------------------------------------------------------------------------
1397 // conversion to numbers
1398 // ----------------------------------------------------------------------------
1399
1400 // The implementation of all the functions below is exactly the same so factor
1401 // it out. Note that number extraction works correctly on UTF-8 strings, so
1402 // we can use wxStringCharType and wx_str() for maximum efficiency.
1403
1404 #ifndef __WXWINCE__
1405     #define DO_IF_NOT_WINCE(x) x
1406 #else
1407     #define DO_IF_NOT_WINCE(x)
1408 #endif
1409
1410 #define WX_STRING_TO_INT_TYPE(val, base, func)                              \
1411     wxCHECK_MSG( val, false, _T("NULL output pointer") );                   \
1412     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );  \
1413                                                                             \
1414     DO_IF_NOT_WINCE( errno = 0; )                                           \
1415                                                                             \
1416     const wxStringCharType *start = wx_str();                               \
1417     wxStringCharType *end;                                                  \
1418     *val = func(start, &end, base);                                         \
1419                                                                             \
1420     /* return true only if scan was stopped by the terminating NUL and */   \
1421     /* if the string was not empty to start with and no under/overflow */   \
1422     /* occurred: */                                                         \
1423     return !*end && (end != start)                                          \
1424         DO_IF_NOT_WINCE( && (errno != ERANGE) )
1425
1426 bool wxString::ToLong(long *val, int base) const
1427 {
1428     WX_STRING_TO_INT_TYPE(val, base, wxStrtol);
1429 }
1430
1431 bool wxString::ToULong(unsigned long *val, int base) const
1432 {
1433     WX_STRING_TO_INT_TYPE(val, base, wxStrtoul);
1434 }
1435
1436 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1437 {
1438     WX_STRING_TO_INT_TYPE(val, base, wxStrtoll);
1439 }
1440
1441 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1442 {
1443     WX_STRING_TO_INT_TYPE(val, base, wxStrtoull);
1444 }
1445
1446 bool wxString::ToDouble(double *val) const
1447 {
1448     wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1449
1450 #ifndef __WXWINCE__
1451     errno = 0;
1452 #endif
1453
1454     const wxChar *start = c_str();
1455     wxChar *end;
1456     *val = wxStrtod(start, &end);
1457
1458     // return true only if scan was stopped by the terminating NUL and if the
1459     // string was not empty to start with and no under/overflow occurred
1460     return !*end && (end != start)
1461 #ifndef __WXWINCE__
1462         && (errno != ERANGE)
1463 #endif
1464     ;
1465 }
1466
1467 // ---------------------------------------------------------------------------
1468 // formatted output
1469 // ---------------------------------------------------------------------------
1470
1471 #if !wxUSE_UTF8_LOCALE_ONLY
1472 /* static */
1473 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1474 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1475 #else
1476 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1477 #endif
1478 {
1479     va_list argptr;
1480     va_start(argptr, format);
1481
1482     wxString s;
1483     s.PrintfV(format, argptr);
1484
1485     va_end(argptr);
1486
1487     return s;
1488 }
1489 #endif // !wxUSE_UTF8_LOCALE_ONLY
1490
1491 #if wxUSE_UNICODE_UTF8
1492 /* static */
1493 wxString wxString::DoFormatUtf8(const char *format, ...)
1494 {
1495     va_list argptr;
1496     va_start(argptr, format);
1497
1498     wxString s;
1499     s.PrintfV(format, argptr);
1500
1501     va_end(argptr);
1502
1503     return s;
1504 }
1505 #endif // wxUSE_UNICODE_UTF8
1506
1507 /* static */
1508 wxString wxString::FormatV(const wxString& format, va_list argptr)
1509 {
1510     wxString s;
1511     s.PrintfV(format, argptr);
1512     return s;
1513 }
1514
1515 #if !wxUSE_UTF8_LOCALE_ONLY
1516 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1517 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1518 #else
1519 int wxString::DoPrintfWchar(const wxChar *format, ...)
1520 #endif
1521 {
1522     va_list argptr;
1523     va_start(argptr, format);
1524
1525 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1526     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1527     // because it's the only cast that works safely for downcasting when
1528     // multiple inheritance is used:
1529     wxString *str = static_cast<wxString*>(this);
1530 #else
1531     wxString *str = this;
1532 #endif
1533
1534     int iLen = str->PrintfV(format, argptr);
1535
1536     va_end(argptr);
1537
1538     return iLen;
1539 }
1540 #endif // !wxUSE_UTF8_LOCALE_ONLY
1541
1542 #if wxUSE_UNICODE_UTF8
1543 int wxString::DoPrintfUtf8(const char *format, ...)
1544 {
1545     va_list argptr;
1546     va_start(argptr, format);
1547
1548     int iLen = PrintfV(format, argptr);
1549
1550     va_end(argptr);
1551
1552     return iLen;
1553 }
1554 #endif // wxUSE_UNICODE_UTF8
1555
1556 #if wxUSE_UNICODE_UTF8
1557 template<typename BufferType>
1558 #else
1559 // we only need one version in non-UTF8 builds and at least two Windows
1560 // compilers have problems with this function template, so use just one
1561 // normal function here
1562 #endif
1563 static int DoStringPrintfV(wxString& str,
1564                            const wxString& format, va_list argptr)
1565 {
1566     int size = 1024;
1567
1568     for ( ;; )
1569     {
1570 #if wxUSE_UNICODE_UTF8
1571         BufferType tmp(str, size + 1);
1572         typename BufferType::CharType *buf = tmp;
1573 #else
1574         wxStringBuffer tmp(str, size + 1);
1575         wxChar *buf = tmp;
1576 #endif
1577
1578         if ( !buf )
1579         {
1580             // out of memory
1581
1582             // in UTF-8 build, leaving uninitialized junk in the buffer
1583             // could result in invalid non-empty UTF-8 string, so just
1584             // reset the string to empty on failure:
1585             buf[0] = '\0';
1586             return -1;
1587         }
1588
1589         // wxVsnprintf() may modify the original arg pointer, so pass it
1590         // only a copy
1591         va_list argptrcopy;
1592         wxVaCopy(argptrcopy, argptr);
1593         int len = wxVsnprintf(buf, size, format, argptrcopy);
1594         va_end(argptrcopy);
1595
1596         // some implementations of vsnprintf() don't NUL terminate
1597         // the string if there is not enough space for it so
1598         // always do it manually
1599         buf[size] = _T('\0');
1600
1601         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1602         // total number of characters which would have been written if the
1603         // buffer were large enough (newer standards such as Unix98)
1604         if ( len < 0 )
1605         {
1606             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1607             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1608             //     is true if *both* of them use our own implementation,
1609             //     otherwise we can't be sure
1610 #if wxUSE_WXVSNPRINTF
1611             // we know that our own implementation of wxVsnprintf() returns -1
1612             // only for a format error - thus there's something wrong with
1613             // the user's format string
1614             buf[0] = '\0';
1615             return -1;
1616 #else // possibly using system version
1617             // assume it only returns error if there is not enough space, but
1618             // as we don't know how much we need, double the current size of
1619             // the buffer
1620             size *= 2;
1621 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1622         }
1623         else if ( len >= size )
1624         {
1625 #if wxUSE_WXVSNPRINTF
1626             // we know that our own implementation of wxVsnprintf() returns
1627             // size+1 when there's not enough space but that's not the size
1628             // of the required buffer!
1629             size *= 2;      // so we just double the current size of the buffer
1630 #else
1631             // some vsnprintf() implementations NUL-terminate the buffer and
1632             // some don't in len == size case, to be safe always add 1
1633             size = len + 1;
1634 #endif
1635         }
1636         else // ok, there was enough space
1637         {
1638             break;
1639         }
1640     }
1641
1642     // we could have overshot
1643     str.Shrink();
1644
1645     return str.length();
1646 }
1647
1648 int wxString::PrintfV(const wxString& format, va_list argptr)
1649 {
1650 #if wxUSE_UNICODE_UTF8
1651     #if wxUSE_STL_BASED_WXSTRING
1652         typedef wxStringTypeBuffer<char> Utf8Buffer;
1653     #else
1654         typedef wxStringInternalBuffer Utf8Buffer;
1655     #endif
1656 #endif
1657
1658 #if wxUSE_UTF8_LOCALE_ONLY
1659     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1660 #else
1661     #if wxUSE_UNICODE_UTF8
1662     if ( wxLocaleIsUtf8 )
1663         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1664     else
1665         // wxChar* version
1666         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1667     #else
1668         return DoStringPrintfV(*this, format, argptr);
1669     #endif // UTF8/WCHAR
1670 #endif
1671 }
1672
1673 // ----------------------------------------------------------------------------
1674 // misc other operations
1675 // ----------------------------------------------------------------------------
1676
1677 // returns true if the string matches the pattern which may contain '*' and
1678 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1679 // of them)
1680 bool wxString::Matches(const wxString& mask) const
1681 {
1682     // I disable this code as it doesn't seem to be faster (in fact, it seems
1683     // to be much slower) than the old, hand-written code below and using it
1684     // here requires always linking with libregex even if the user code doesn't
1685     // use it
1686 #if 0 // wxUSE_REGEX
1687     // first translate the shell-like mask into a regex
1688     wxString pattern;
1689     pattern.reserve(wxStrlen(pszMask));
1690
1691     pattern += _T('^');
1692     while ( *pszMask )
1693     {
1694         switch ( *pszMask )
1695         {
1696             case _T('?'):
1697                 pattern += _T('.');
1698                 break;
1699
1700             case _T('*'):
1701                 pattern += _T(".*");
1702                 break;
1703
1704             case _T('^'):
1705             case _T('.'):
1706             case _T('$'):
1707             case _T('('):
1708             case _T(')'):
1709             case _T('|'):
1710             case _T('+'):
1711             case _T('\\'):
1712                 // these characters are special in a RE, quote them
1713                 // (however note that we don't quote '[' and ']' to allow
1714                 // using them for Unix shell like matching)
1715                 pattern += _T('\\');
1716                 // fall through
1717
1718             default:
1719                 pattern += *pszMask;
1720         }
1721
1722         pszMask++;
1723     }
1724     pattern += _T('$');
1725
1726     // and now use it
1727     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1728 #else // !wxUSE_REGEX
1729   // TODO: this is, of course, awfully inefficient...
1730
1731   // FIXME-UTF8: implement using iterators, remove #if
1732 #if wxUSE_UNICODE_UTF8
1733   wxWCharBuffer maskBuf = mask.wc_str();
1734   wxWCharBuffer txtBuf = wc_str();
1735   const wxChar *pszMask = maskBuf.data();
1736   const wxChar *pszTxt = txtBuf.data();
1737 #else
1738   const wxChar *pszMask = mask.wx_str();
1739   // the char currently being checked
1740   const wxChar *pszTxt = wx_str();
1741 #endif
1742
1743   // the last location where '*' matched
1744   const wxChar *pszLastStarInText = NULL;
1745   const wxChar *pszLastStarInMask = NULL;
1746
1747 match:
1748   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1749     switch ( *pszMask ) {
1750       case wxT('?'):
1751         if ( *pszTxt == wxT('\0') )
1752           return false;
1753
1754         // pszTxt and pszMask will be incremented in the loop statement
1755
1756         break;
1757
1758       case wxT('*'):
1759         {
1760           // remember where we started to be able to backtrack later
1761           pszLastStarInText = pszTxt;
1762           pszLastStarInMask = pszMask;
1763
1764           // ignore special chars immediately following this one
1765           // (should this be an error?)
1766           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1767             pszMask++;
1768
1769           // if there is nothing more, match
1770           if ( *pszMask == wxT('\0') )
1771             return true;
1772
1773           // are there any other metacharacters in the mask?
1774           size_t uiLenMask;
1775           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1776
1777           if ( pEndMask != NULL ) {
1778             // we have to match the string between two metachars
1779             uiLenMask = pEndMask - pszMask;
1780           }
1781           else {
1782             // we have to match the remainder of the string
1783             uiLenMask = wxStrlen(pszMask);
1784           }
1785
1786           wxString strToMatch(pszMask, uiLenMask);
1787           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1788           if ( pMatch == NULL )
1789             return false;
1790
1791           // -1 to compensate "++" in the loop
1792           pszTxt = pMatch + uiLenMask - 1;
1793           pszMask += uiLenMask - 1;
1794         }
1795         break;
1796
1797       default:
1798         if ( *pszMask != *pszTxt )
1799           return false;
1800         break;
1801     }
1802   }
1803
1804   // match only if nothing left
1805   if ( *pszTxt == wxT('\0') )
1806     return true;
1807
1808   // if we failed to match, backtrack if we can
1809   if ( pszLastStarInText ) {
1810     pszTxt = pszLastStarInText + 1;
1811     pszMask = pszLastStarInMask;
1812
1813     pszLastStarInText = NULL;
1814
1815     // don't bother resetting pszLastStarInMask, it's unnecessary
1816
1817     goto match;
1818   }
1819
1820   return false;
1821 #endif // wxUSE_REGEX/!wxUSE_REGEX
1822 }
1823
1824 // Count the number of chars
1825 int wxString::Freq(wxUniChar ch) const
1826 {
1827     int count = 0;
1828     for ( const_iterator i = begin(); i != end(); ++i )
1829     {
1830         if ( *i == ch )
1831             count ++;
1832     }
1833     return count;
1834 }
1835
1836 // convert to upper case, return the copy of the string
1837 wxString wxString::Upper() const
1838 { wxString s(*this); return s.MakeUpper(); }
1839
1840 // convert to lower case, return the copy of the string
1841 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
1842
1843 // ----------------------------------------------------------------------------
1844 // wxUTF8StringBuffer
1845 // ----------------------------------------------------------------------------
1846
1847 #if wxUSE_UNICODE_WCHAR
1848 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1849 {
1850     wxMBConvStrictUTF8 conv;
1851     size_t wlen = conv.ToWChar(NULL, 0, m_buf);
1852     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1853
1854     wxStringInternalBuffer wbuf(m_str, wlen);
1855     conv.ToWChar(wbuf, wlen, m_buf);
1856 }
1857
1858 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
1859 {
1860     wxCHECK_RET(m_lenSet, "length not set");
1861
1862     wxMBConvStrictUTF8 conv;
1863     size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
1864     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1865
1866     wxStringInternalBufferLength wbuf(m_str, wlen);
1867     conv.ToWChar(wbuf, wlen, m_buf, m_len);
1868     wbuf.SetLength(wlen);
1869 }
1870 #endif // wxUSE_UNICODE_WCHAR