src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #ifdef __SALFORDC__
  39     #include <clib.h>
  40 #endif
  41
  42 #include "wx/hashmap.h"
  43
  44 // string handling functions used by wxString:
  45 #if wxUSE_UNICODE_UTF8
  46     #define wxStringMemcpy   memcpy
  47     #define wxStringMemcmp   memcmp
  48     #define wxStringMemchr   memchr
  49     #define wxStringStrlen   strlen
  50 #else
  51     #define wxStringMemcpy   wxTmemcpy
  52     #define wxStringMemcmp   wxTmemcmp
  53     #define wxStringMemchr   wxTmemchr
  54     #define wxStringStrlen   wxStrlen
  55 #endif
  56
  57
  58 // ---------------------------------------------------------------------------
  59 // static class variables definition
  60 // ---------------------------------------------------------------------------
  61
  62 //According to STL _must_ be a -1 size_t
  63 const size_t wxString::npos = (size_t) -1;
  64
  65 // ----------------------------------------------------------------------------
  66 // global functions
  67 // ----------------------------------------------------------------------------
  68
  69 #if wxUSE_STD_IOSTREAM
  70
  71 #include <iostream>
  72
  73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
  74 {
  75 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
  76     return os << (const char *)str.AsCharBuf();
  77 #else
  78     return os << str.AsInternal();
  79 #endif
  80 }
  81
  82 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
  83 {
  84     return os << str.c_str();
  85 }
  86
  87 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
  88 {
  89     return os << str.data();
  90 }
  91
  92 #ifndef __BORLANDC__
  93 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
  94 {
  95     return os << str.data();
  96 }
  97 #endif
  98
  99 #endif // wxUSE_STD_IOSTREAM
 100
 101 // ===========================================================================
 102 // wxString class core
 103 // ===========================================================================
 104
 105 #if wxUSE_UNICODE_UTF8
 106
 107 void wxString::PosLenToImpl(size_t pos, size_t len,
 108                             size_t *implPos, size_t *implLen) const
 109 {
 110     if ( pos == npos )
 111         *implPos = npos;
 112     else
 113     {
 114         const_iterator i = begin() + pos;
 115         *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
 116         if ( len == npos )
 117             *implLen = npos;
 118         else
 119         {
 120             // too large length is interpreted as "to the end of the string"
 121             // FIXME-UTF8: verify this is the case in std::string, assert
 122             // otherwise
 123             if ( pos + len > length() )
 124                 len = length() - pos;
 125
 126             *implLen = (i + len).impl() - i.impl();
 127         }
 128     }
 129 }
 130
 131 #endif // wxUSE_UNICODE_UTF8
 132
 133 // ----------------------------------------------------------------------------
 134 // wxCStrData converted strings caching
 135 // ----------------------------------------------------------------------------
 136
 137 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 138 //             string objects; re-enable after fixing this bug and benchmarking
 139 //             performance to see if using a hash is a good idea at all
 140 #if 0
 141
 142 // For backward compatibility reasons, it must be possible to assign the value
 143 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 144 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 145 // because the memory would be freed immediately, but it has to be valid as long
 146 // as the string is not modified, so that code like this still works:
 147 //
 148 // const wxChar *s = str.c_str();
 149 // while ( s ) { ... }
 150
 151 // FIXME-UTF8: not thread safe!
 152 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 153 //             destroyed, but we should do it when the string is modified, to
 154 //             keep memory usage down
 155 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 156 //             invalidated the cache on every change, we could keep the previous
 157 //             conversion
 158 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 159 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 160
 161 template<typename T>
 162 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 163 {
 164     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 165     if ( i != hash.end() )
 166     {
 167         free(i->second);
 168         hash.erase(i);
 169     }
 170 }
 171
 172 #if wxUSE_UNICODE
 173 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 174 //     so we have to use wxString* here and const-cast when used
 175 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 176                     wxStringCharConversionCache);
 177 static wxStringCharConversionCache gs_stringsCharCache;
 178
 179 const char* wxCStrData::AsChar() const
 180 {
 181     // remove previously cache value, if any (see FIXMEs above):
 182     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 183
 184     // convert the string and keep it:
 185     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 186         m_str->mb_str().release();
 187
 188     return s + m_offset;
 189 }
 190 #endif // wxUSE_UNICODE
 191
 192 #if !wxUSE_UNICODE_WCHAR
 193 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 194                     wxStringWCharConversionCache);
 195 static wxStringWCharConversionCache gs_stringsWCharCache;
 196
 197 const wchar_t* wxCStrData::AsWChar() const
 198 {
 199     // remove previously cache value, if any (see FIXMEs above):
 200     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 201
 202     // convert the string and keep it:
 203     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 204         m_str->wc_str().release();
 205
 206     return s + m_offset;
 207 }
 208 #endif // !wxUSE_UNICODE_WCHAR
 209
 210 wxString::~wxString()
 211 {
 212 #if wxUSE_UNICODE
 213     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 214     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 215 #endif
 216 #if !wxUSE_UNICODE_WCHAR
 217     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 218 #endif
 219 }
 220 #endif
 221
 222 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 223 const char* wxCStrData::AsChar() const
 224 {
 225 #if wxUSE_UNICODE_UTF8
 226     if ( wxLocaleIsUtf8 )
 227         return AsInternal();
 228 #endif
 229     // under non-UTF8 locales, we have to convert the internal UTF-8
 230     // representation using wxConvLibc and cache the result
 231
 232     wxString *str = wxConstCast(m_str, wxString);
 233
 234     // convert the string:
 235     //
 236     // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
 237     //             have it) but it's unfortunately not obvious to implement
 238     //             because we don't know how big buffer do we need for the
 239     //             given string length (in case of multibyte encodings, e.g.
 240     //             ISO-2022-JP or UTF-8 when internal representation is wchar_t)
 241     //
 242     //             One idea would be to store more than just m_convertedToChar
 243     //             in wxString: then we could record the length of the string
 244     //             which was converted the last time and try to reuse the same
 245     //             buffer if the current length is not greater than it (this
 246     //             could still fail because string could have been modified in
 247     //             place but it would work most of the time, so we'd do it and
 248     //             only allocate the new buffer if in-place conversion returned
 249     //             an error). We could also store a bit saying if the string
 250     //             was modified since the last conversion (and update it in all
 251     //             operation modifying the string, of course) to avoid unneeded
 252     //             consequential conversions. But both of these ideas require
 253     //             adding more fields to wxString and require profiling results
 254     //             to be sure that we really gain enough from them to justify
 255     //             doing it.
 256     wxCharBuffer buf(str->mb_str());
 257
 258     // if it failed, return empty string and not NULL to avoid crashes in code
 259     // written with either wxWidgets 2 wxString or std::string behaviour in
 260     // mind: neither of them ever returns NULL and so we shouldn't neither
 261     if ( !buf )
 262         return "";
 263
 264     if ( str->m_convertedToChar &&
 265          strlen(buf) == strlen(str->m_convertedToChar) )
 266     {
 267         // keep the same buffer for as long as possible, so that several calls
 268         // to c_str() in a row still work:
 269         strcpy(str->m_convertedToChar, buf);
 270     }
 271     else
 272     {
 273         str->m_convertedToChar = buf.release();
 274     }
 275
 276     // and keep it:
 277     return str->m_convertedToChar + m_offset;
 278 }
 279 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 280
 281 #if !wxUSE_UNICODE_WCHAR
 282 const wchar_t* wxCStrData::AsWChar() const
 283 {
 284     wxString *str = wxConstCast(m_str, wxString);
 285
 286     // convert the string:
 287     wxWCharBuffer buf(str->wc_str());
 288
 289     // notice that here, unlike above in AsChar(), conversion can't fail as our
 290     // internal UTF-8 is always well-formed -- or the string was corrupted and
 291     // all bets are off anyhow
 292
 293     // FIXME-UTF8: do the conversion in-place in the existing buffer
 294     if ( str->m_convertedToWChar &&
 295          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 296     {
 297         // keep the same buffer for as long as possible, so that several calls
 298         // to c_str() in a row still work:
 299         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 300     }
 301     else
 302     {
 303         str->m_convertedToWChar = buf.release();
 304     }
 305
 306     // and keep it:
 307     return str->m_convertedToWChar + m_offset;
 308 }
 309 #endif // !wxUSE_UNICODE_WCHAR
 310
 311 // ===========================================================================
 312 // wxString class core
 313 // ===========================================================================
 314
 315 // ---------------------------------------------------------------------------
 316 // construction and conversion
 317 // ---------------------------------------------------------------------------
 318
 319 #if wxUSE_UNICODE_WCHAR
 320 /* static */
 321 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 322                                                const wxMBConv& conv)
 323 {
 324     // anything to do?
 325     if ( !psz || nLength == 0 )
 326         return SubstrBufFromMB(L"", 0);
 327
 328     if ( nLength == npos )
 329         nLength = wxNO_LEN;
 330
 331     size_t wcLen;
 332     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 333     if ( !wcLen )
 334         return SubstrBufFromMB(_T(""), 0);
 335     else
 336         return SubstrBufFromMB(wcBuf, wcLen);
 337 }
 338 #endif // wxUSE_UNICODE_WCHAR
 339
 340 #if wxUSE_UNICODE_UTF8
 341 /* static */
 342 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 343                                                const wxMBConv& conv)
 344 {
 345     // anything to do?
 346     if ( !psz || nLength == 0 )
 347         return SubstrBufFromMB("", 0);
 348
 349     // if psz is already in UTF-8, we don't have to do the roundtrip to
 350     // wchar_t* and back:
 351     if ( conv.IsUTF8() )
 352     {
 353         // we need to validate the input because UTF8 iterators assume valid
 354         // UTF-8 sequence and psz may be invalid:
 355         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 356         {
 357             return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
 358         }
 359         // else: do the roundtrip through wchar_t*
 360     }
 361
 362     if ( nLength == npos )
 363         nLength = wxNO_LEN;
 364
 365     // first convert to wide string:
 366     size_t wcLen;
 367     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 368     if ( !wcLen )
 369         return SubstrBufFromMB("", 0);
 370
 371     // and then to UTF-8:
 372     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
 373     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 374     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 375
 376     return buf;
 377 }
 378 #endif // wxUSE_UNICODE_UTF8
 379
 380 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 381 /* static */
 382 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 383                                                const wxMBConv& conv)
 384 {
 385     // anything to do?
 386     if ( !pwz || nLength == 0 )
 387         return SubstrBufFromWC("", 0);
 388
 389     if ( nLength == npos )
 390         nLength = wxNO_LEN;
 391
 392     size_t mbLen;
 393     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 394     if ( !mbLen )
 395         return SubstrBufFromWC("", 0);
 396     else
 397         return SubstrBufFromWC(mbBuf, mbLen);
 398 }
 399 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 400
 401
 402 #if wxUSE_UNICODE_WCHAR
 403
 404 //Convert wxString in Unicode mode to a multi-byte string
 405 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 406 {
 407     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 408 }
 409
 410 #elif wxUSE_UNICODE_UTF8
 411
 412 const wxWCharBuffer wxString::wc_str() const
 413 {
 414     return wxMBConvStrictUTF8().cMB2WC
 415                                 (
 416                                     m_impl.c_str(),
 417                                     m_impl.length() + 1, // size, not length
 418                                     NULL
 419                                 );
 420 }
 421
 422 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 423 {
 424     if ( conv.IsUTF8() )
 425         return wxCharBuffer::CreateNonOwned(m_impl.c_str());
 426
 427     // FIXME-UTF8: use wc_str() here once we have buffers with length
 428
 429     size_t wcLen;
 430     wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
 431                                              (
 432                                                 m_impl.c_str(),
 433                                                 m_impl.length() + 1, // size
 434                                                 &wcLen
 435                                              ));
 436     if ( !wcLen )
 437         return wxCharBuffer("");
 438
 439     return conv.cWC2MB(wcBuf, wcLen+1, NULL);
 440 }
 441
 442 #else // ANSI
 443
 444 //Converts this string to a wide character string if unicode
 445 //mode is not enabled and wxUSE_WCHAR_T is enabled
 446 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 447 {
 448     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 449 }
 450
 451 #endif // Unicode/ANSI
 452
 453 // shrink to minimal size (releasing extra memory)
 454 bool wxString::Shrink()
 455 {
 456   wxString tmp(begin(), end());
 457   swap(tmp);
 458   return tmp.length() == length();
 459 }
 460
 461 // deprecated compatibility code:
 462 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 463 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 464 {
 465     return DoGetWriteBuf(nLen);
 466 }
 467
 468 void wxString::UngetWriteBuf()
 469 {
 470     DoUngetWriteBuf();
 471 }
 472
 473 void wxString::UngetWriteBuf(size_t nLen)
 474 {
 475     DoUngetWriteBuf(nLen);
 476 }
 477 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 478
 479
 480 // ---------------------------------------------------------------------------
 481 // data access
 482 // ---------------------------------------------------------------------------
 483
 484 // all functions are inline in string.h
 485
 486 // ---------------------------------------------------------------------------
 487 // concatenation operators
 488 // ---------------------------------------------------------------------------
 489
 490 /*
 491  * concatenation functions come in 5 flavours:
 492  *  string + string
 493  *  char   + string      and      string + char
 494  *  C str  + string      and      string + C str
 495  */
 496
 497 wxString operator+(const wxString& str1, const wxString& str2)
 498 {
 499 #if !wxUSE_STL_BASED_WXSTRING
 500     wxASSERT( str1.IsValid() );
 501     wxASSERT( str2.IsValid() );
 502 #endif
 503
 504     wxString s = str1;
 505     s += str2;
 506
 507     return s;
 508 }
 509
 510 wxString operator+(const wxString& str, wxUniChar ch)
 511 {
 512 #if !wxUSE_STL_BASED_WXSTRING
 513     wxASSERT( str.IsValid() );
 514 #endif
 515
 516     wxString s = str;
 517     s += ch;
 518
 519     return s;
 520 }
 521
 522 wxString operator+(wxUniChar ch, const wxString& str)
 523 {
 524 #if !wxUSE_STL_BASED_WXSTRING
 525     wxASSERT( str.IsValid() );
 526 #endif
 527
 528     wxString s = ch;
 529     s += str;
 530
 531     return s;
 532 }
 533
 534 wxString operator+(const wxString& str, const char *psz)
 535 {
 536 #if !wxUSE_STL_BASED_WXSTRING
 537     wxASSERT( str.IsValid() );
 538 #endif
 539
 540     wxString s;
 541     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 542         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 543     }
 544     s += str;
 545     s += psz;
 546
 547     return s;
 548 }
 549
 550 wxString operator+(const wxString& str, const wchar_t *pwz)
 551 {
 552 #if !wxUSE_STL_BASED_WXSTRING
 553     wxASSERT( str.IsValid() );
 554 #endif
 555
 556     wxString s;
 557     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 558         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 559     }
 560     s += str;
 561     s += pwz;
 562
 563     return s;
 564 }
 565
 566 wxString operator+(const char *psz, const wxString& str)
 567 {
 568 #if !wxUSE_STL_BASED_WXSTRING
 569     wxASSERT( str.IsValid() );
 570 #endif
 571
 572     wxString s;
 573     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 574         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 575     }
 576     s = psz;
 577     s += str;
 578
 579     return s;
 580 }
 581
 582 wxString operator+(const wchar_t *pwz, const wxString& str)
 583 {
 584 #if !wxUSE_STL_BASED_WXSTRING
 585     wxASSERT( str.IsValid() );
 586 #endif
 587
 588     wxString s;
 589     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 590         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 591     }
 592     s = pwz;
 593     s += str;
 594
 595     return s;
 596 }
 597
 598 // ---------------------------------------------------------------------------
 599 // string comparison
 600 // ---------------------------------------------------------------------------
 601
 602 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 603 {
 604     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 605                                : wxToupper(GetChar(0u)) == wxToupper(c));
 606 }
 607
 608 #ifdef HAVE_STD_STRING_COMPARE
 609
 610 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 611 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 612 //     sort strings in characters code point order by sorting the byte sequence
 613 //     in byte values order (i.e. what strcmp() and memcmp() do).
 614
 615 int wxString::compare(const wxString& str) const
 616 {
 617     return m_impl.compare(str.m_impl);
 618 }
 619
 620 int wxString::compare(size_t nStart, size_t nLen,
 621                       const wxString& str) const
 622 {
 623     size_t pos, len;
 624     PosLenToImpl(nStart, nLen, &pos, &len);
 625     return m_impl.compare(pos, len, str.m_impl);
 626 }
 627
 628 int wxString::compare(size_t nStart, size_t nLen,
 629                       const wxString& str,
 630                       size_t nStart2, size_t nLen2) const
 631 {
 632     size_t pos, len;
 633     PosLenToImpl(nStart, nLen, &pos, &len);
 634
 635     size_t pos2, len2;
 636     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 637
 638     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 639 }
 640
 641 int wxString::compare(const char* sz) const
 642 {
 643     return m_impl.compare(ImplStr(sz));
 644 }
 645
 646 int wxString::compare(const wchar_t* sz) const
 647 {
 648     return m_impl.compare(ImplStr(sz));
 649 }
 650
 651 int wxString::compare(size_t nStart, size_t nLen,
 652                       const char* sz, size_t nCount) const
 653 {
 654     size_t pos, len;
 655     PosLenToImpl(nStart, nLen, &pos, &len);
 656
 657     SubstrBufFromMB str(ImplStr(sz, nCount));
 658
 659     return m_impl.compare(pos, len, str.data, str.len);
 660 }
 661
 662 int wxString::compare(size_t nStart, size_t nLen,
 663                       const wchar_t* sz, size_t nCount) const
 664 {
 665     size_t pos, len;
 666     PosLenToImpl(nStart, nLen, &pos, &len);
 667
 668     SubstrBufFromWC str(ImplStr(sz, nCount));
 669
 670     return m_impl.compare(pos, len, str.data, str.len);
 671 }
 672
 673 #else // !HAVE_STD_STRING_COMPARE
 674
 675 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 676                           const wxStringCharType* s2, size_t l2)
 677 {
 678     if( l1 == l2 )
 679         return wxStringMemcmp(s1, s2, l1);
 680     else if( l1 < l2 )
 681     {
 682         int ret = wxStringMemcmp(s1, s2, l1);
 683         return ret == 0 ? -1 : ret;
 684     }
 685     else
 686     {
 687         int ret = wxStringMemcmp(s1, s2, l2);
 688         return ret == 0 ? +1 : ret;
 689     }
 690 }
 691
 692 int wxString::compare(const wxString& str) const
 693 {
 694     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 695                      str.m_impl.data(), str.m_impl.length());
 696 }
 697
 698 int wxString::compare(size_t nStart, size_t nLen,
 699                       const wxString& str) const
 700 {
 701     wxASSERT(nStart <= length());
 702     size_type strLen = length() - nStart;
 703     nLen = strLen < nLen ? strLen : nLen;
 704
 705     size_t pos, len;
 706     PosLenToImpl(nStart, nLen, &pos, &len);
 707
 708     return ::wxDoCmp(m_impl.data() + pos,  len,
 709                      str.m_impl.data(), str.m_impl.length());
 710 }
 711
 712 int wxString::compare(size_t nStart, size_t nLen,
 713                       const wxString& str,
 714                       size_t nStart2, size_t nLen2) const
 715 {
 716     wxASSERT(nStart <= length());
 717     wxASSERT(nStart2 <= str.length());
 718     size_type strLen  =     length() - nStart,
 719               strLen2 = str.length() - nStart2;
 720     nLen  = strLen  < nLen  ? strLen  : nLen;
 721     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 722
 723     size_t pos, len;
 724     PosLenToImpl(nStart, nLen, &pos, &len);
 725     size_t pos2, len2;
 726     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 727
 728     return ::wxDoCmp(m_impl.data() + pos, len,
 729                      str.m_impl.data() + pos2, len2);
 730 }
 731
 732 int wxString::compare(const char* sz) const
 733 {
 734     SubstrBufFromMB str(ImplStr(sz, npos));
 735     if ( str.len == npos )
 736         str.len = wxStringStrlen(str.data);
 737     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 738 }
 739
 740 int wxString::compare(const wchar_t* sz) const
 741 {
 742     SubstrBufFromWC str(ImplStr(sz, npos));
 743     if ( str.len == npos )
 744         str.len = wxStringStrlen(str.data);
 745     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 746 }
 747
 748 int wxString::compare(size_t nStart, size_t nLen,
 749                       const char* sz, size_t nCount) const
 750 {
 751     wxASSERT(nStart <= length());
 752     size_type strLen = length() - nStart;
 753     nLen = strLen < nLen ? strLen : nLen;
 754
 755     size_t pos, len;
 756     PosLenToImpl(nStart, nLen, &pos, &len);
 757
 758     SubstrBufFromMB str(ImplStr(sz, nCount));
 759     if ( str.len == npos )
 760         str.len = wxStringStrlen(str.data);
 761
 762     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 763 }
 764
 765 int wxString::compare(size_t nStart, size_t nLen,
 766                       const wchar_t* sz, size_t nCount) const
 767 {
 768     wxASSERT(nStart <= length());
 769     size_type strLen = length() - nStart;
 770     nLen = strLen < nLen ? strLen : nLen;
 771
 772     size_t pos, len;
 773     PosLenToImpl(nStart, nLen, &pos, &len);
 774
 775     SubstrBufFromWC str(ImplStr(sz, nCount));
 776     if ( str.len == npos )
 777         str.len = wxStringStrlen(str.data);
 778
 779     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 780 }
 781
 782 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 783
 784
 785 // ---------------------------------------------------------------------------
 786 // find_{first,last}_[not]_of functions
 787 // ---------------------------------------------------------------------------
 788
 789 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 790
 791 // NB: All these functions are implemented  with the argument being wxChar*,
 792 //     i.e. widechar string in any Unicode build, even though native string
 793 //     representation is char* in the UTF-8 build. This is because we couldn't
 794 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 795
 796 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 797 {
 798     return find_first_of(sz, nStart, wxStrlen(sz));
 799 }
 800
 801 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 802 {
 803     return find_first_not_of(sz, nStart, wxStrlen(sz));
 804 }
 805
 806 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 807 {
 808     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 809
 810     size_t idx = nStart;
 811     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 812     {
 813         if ( wxTmemchr(sz, *i, n) )
 814             return idx;
 815     }
 816
 817     return npos;
 818 }
 819
 820 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 821 {
 822     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 823
 824     size_t idx = nStart;
 825     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 826     {
 827         if ( !wxTmemchr(sz, *i, n) )
 828             return idx;
 829     }
 830
 831     return npos;
 832 }
 833
 834
 835 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 836 {
 837     return find_last_of(sz, nStart, wxStrlen(sz));
 838 }
 839
 840 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 841 {
 842     return find_last_not_of(sz, nStart, wxStrlen(sz));
 843 }
 844
 845 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 846 {
 847     size_t len = length();
 848
 849     if ( nStart == npos )
 850     {
 851         nStart = len - 1;
 852     }
 853     else
 854     {
 855         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 856     }
 857
 858     size_t idx = nStart;
 859     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 860           i != rend(); --idx, ++i )
 861     {
 862         if ( wxTmemchr(sz, *i, n) )
 863             return idx;
 864     }
 865
 866     return npos;
 867 }
 868
 869 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 870 {
 871     size_t len = length();
 872
 873     if ( nStart == npos )
 874     {
 875         nStart = len - 1;
 876     }
 877     else
 878     {
 879         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 880     }
 881
 882     size_t idx = nStart;
 883     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 884           i != rend(); --idx, ++i )
 885     {
 886         if ( !wxTmemchr(sz, *i, n) )
 887             return idx;
 888     }
 889
 890     return npos;
 891 }
 892
 893 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
 894 {
 895     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 896
 897     size_t idx = nStart;
 898     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 899     {
 900         if ( *i != ch )
 901             return idx;
 902     }
 903
 904     return npos;
 905 }
 906
 907 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
 908 {
 909     size_t len = length();
 910
 911     if ( nStart == npos )
 912     {
 913         nStart = len - 1;
 914     }
 915     else
 916     {
 917         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 918     }
 919
 920     size_t idx = nStart;
 921     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 922           i != rend(); --idx, ++i )
 923     {
 924         if ( *i != ch )
 925             return idx;
 926     }
 927
 928     return npos;
 929 }
 930
 931 // the functions above were implemented for wchar_t* arguments in Unicode
 932 // build and char* in ANSI build; below are implementations for the other
 933 // version:
 934 #if wxUSE_UNICODE
 935     #define wxOtherCharType char
 936     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
 937 #else
 938     #define wxOtherCharType wchar_t
 939     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
 940 #endif
 941
 942 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
 943     { return find_first_of(STRCONV(sz), nStart); }
 944
 945 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
 946                                size_t n) const
 947     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
 948 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
 949     { return find_last_of(STRCONV(sz), nStart); }
 950 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
 951                               size_t n) const
 952     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
 953 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
 954     { return find_first_not_of(STRCONV(sz), nStart); }
 955 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
 956                                    size_t n) const
 957     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
 958 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
 959     { return find_last_not_of(STRCONV(sz), nStart); }
 960 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
 961                                   size_t n) const
 962     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
 963
 964 #undef wxOtherCharType
 965 #undef STRCONV
 966
 967 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 968
 969 // ===========================================================================
 970 // other common string functions
 971 // ===========================================================================
 972
 973 int wxString::CmpNoCase(const wxString& s) const
 974 {
 975     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
 976
 977     const_iterator i1 = begin();
 978     const_iterator end1 = end();
 979     const_iterator i2 = s.begin();
 980     const_iterator end2 = s.end();
 981
 982     for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
 983     {
 984         wxUniChar lower1 = (wxChar)wxTolower(*i1);
 985         wxUniChar lower2 = (wxChar)wxTolower(*i2);
 986         if ( lower1 != lower2 )
 987             return lower1 < lower2 ? -1 : 1;
 988     }
 989
 990     size_t len1 = length();
 991     size_t len2 = s.length();
 992
 993     if ( len1 < len2 )
 994         return -1;
 995     else if ( len1 > len2 )
 996         return 1;
 997     return 0;
 998 }
 999
1000
1001 #if wxUSE_UNICODE
1002
1003 #ifdef __MWERKS__
1004 #ifndef __SCHAR_MAX__
1005 #define __SCHAR_MAX__ 127
1006 #endif
1007 #endif
1008
1009 wxString wxString::FromAscii(const char *ascii, size_t len)
1010 {
1011     if (!ascii || len == 0)
1012        return wxEmptyString;
1013
1014     wxString res;
1015
1016     {
1017         wxStringInternalBuffer buf(res, len);
1018         wxStringCharType *dest = buf;
1019
1020         for ( ; len > 0; --len )
1021         {
1022             unsigned char c = (unsigned char)*ascii++;
1023             wxASSERT_MSG( c < 0x80,
1024                           _T("Non-ASCII value passed to FromAscii().") );
1025
1026             *dest++ = (wchar_t)c;
1027         }
1028     }
1029
1030     return res;
1031 }
1032
1033 wxString wxString::FromAscii(const char *ascii)
1034 {
1035     return FromAscii(ascii, wxStrlen(ascii));
1036 }
1037
1038 wxString wxString::FromAscii(char ascii)
1039 {
1040     // What do we do with '\0' ?
1041
1042     unsigned char c = (unsigned char)ascii;
1043
1044     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1045
1046     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1047     return wxString(wxUniChar((wchar_t)c));
1048 }
1049
1050 const wxCharBuffer wxString::ToAscii() const
1051 {
1052     // this will allocate enough space for the terminating NUL too
1053     wxCharBuffer buffer(length());
1054     char *dest = buffer.data();
1055
1056     for ( const_iterator i = begin(); i != end(); ++i )
1057     {
1058         wxUniChar c(*i);
1059         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1060         *dest++ = c.IsAscii() ? (char)c : '_';
1061
1062         // the output string can't have embedded NULs anyhow, so we can safely
1063         // stop at first of them even if we do have any
1064         if ( !c )
1065             break;
1066     }
1067
1068     return buffer;
1069 }
1070
1071 #endif // wxUSE_UNICODE
1072
1073 // extract string of length nCount starting at nFirst
1074 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1075 {
1076     size_t nLen = length();
1077
1078     // default value of nCount is npos and means "till the end"
1079     if ( nCount == npos )
1080     {
1081         nCount = nLen - nFirst;
1082     }
1083
1084     // out-of-bounds requests return sensible things
1085     if ( nFirst + nCount > nLen )
1086     {
1087         nCount = nLen - nFirst;
1088     }
1089
1090     if ( nFirst > nLen )
1091     {
1092         // AllocCopy() will return empty string
1093         return wxEmptyString;
1094     }
1095
1096     wxString dest(*this, nFirst, nCount);
1097     if ( dest.length() != nCount )
1098     {
1099         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1100     }
1101
1102     return dest;
1103 }
1104
1105 // check that the string starts with prefix and return the rest of the string
1106 // in the provided pointer if it is not NULL, otherwise return false
1107 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1108 {
1109     if ( compare(0, prefix.length(), prefix) != 0 )
1110         return false;
1111
1112     if ( rest )
1113     {
1114         // put the rest of the string into provided pointer
1115         rest->assign(*this, prefix.length(), npos);
1116     }
1117
1118     return true;
1119 }
1120
1121
1122 // check that the string ends with suffix and return the rest of it in the
1123 // provided pointer if it is not NULL, otherwise return false
1124 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1125 {
1126     int start = length() - suffix.length();
1127
1128     if ( start < 0 || compare(start, npos, suffix) != 0 )
1129         return false;
1130
1131     if ( rest )
1132     {
1133         // put the rest of the string into provided pointer
1134         rest->assign(*this, 0, start);
1135     }
1136
1137     return true;
1138 }
1139
1140
1141 // extract nCount last (rightmost) characters
1142 wxString wxString::Right(size_t nCount) const
1143 {
1144   if ( nCount > length() )
1145     nCount = length();
1146
1147   wxString dest(*this, length() - nCount, nCount);
1148   if ( dest.length() != nCount ) {
1149     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1150   }
1151   return dest;
1152 }
1153
1154 // get all characters after the last occurence of ch
1155 // (returns the whole string if ch not found)
1156 wxString wxString::AfterLast(wxUniChar ch) const
1157 {
1158   wxString str;
1159   int iPos = Find(ch, true);
1160   if ( iPos == wxNOT_FOUND )
1161     str = *this;
1162   else
1163     str = wx_str() + iPos + 1;
1164
1165   return str;
1166 }
1167
1168 // extract nCount first (leftmost) characters
1169 wxString wxString::Left(size_t nCount) const
1170 {
1171   if ( nCount > length() )
1172     nCount = length();
1173
1174   wxString dest(*this, 0, nCount);
1175   if ( dest.length() != nCount ) {
1176     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1177   }
1178   return dest;
1179 }
1180
1181 // get all characters before the first occurence of ch
1182 // (returns the whole string if ch not found)
1183 wxString wxString::BeforeFirst(wxUniChar ch) const
1184 {
1185   int iPos = Find(ch);
1186   if ( iPos == wxNOT_FOUND ) iPos = length();
1187   return wxString(*this, 0, iPos);
1188 }
1189
1190 /// get all characters before the last occurence of ch
1191 /// (returns empty string if ch not found)
1192 wxString wxString::BeforeLast(wxUniChar ch) const
1193 {
1194   wxString str;
1195   int iPos = Find(ch, true);
1196   if ( iPos != wxNOT_FOUND && iPos != 0 )
1197     str = wxString(c_str(), iPos);
1198
1199   return str;
1200 }
1201
1202 /// get all characters after the first occurence of ch
1203 /// (returns empty string if ch not found)
1204 wxString wxString::AfterFirst(wxUniChar ch) const
1205 {
1206   wxString str;
1207   int iPos = Find(ch);
1208   if ( iPos != wxNOT_FOUND )
1209     str = wx_str() + iPos + 1;
1210
1211   return str;
1212 }
1213
1214 // replace first (or all) occurences of some substring with another one
1215 size_t wxString::Replace(const wxString& strOld,
1216                          const wxString& strNew, bool bReplaceAll)
1217 {
1218     // if we tried to replace an empty string we'd enter an infinite loop below
1219     wxCHECK_MSG( !strOld.empty(), 0,
1220                  _T("wxString::Replace(): invalid parameter") );
1221
1222     size_t uiCount = 0;   // count of replacements made
1223
1224     size_t uiOldLen = strOld.length();
1225     size_t uiNewLen = strNew.length();
1226
1227     size_t dwPos = 0;
1228
1229     while ( (*this)[dwPos] != wxT('\0') )
1230     {
1231         //DO NOT USE STRSTR HERE
1232         //this string can contain embedded null characters,
1233         //so strstr will function incorrectly
1234         dwPos = find(strOld, dwPos);
1235         if ( dwPos == npos )
1236             break;                  // exit the loop
1237         else
1238         {
1239             //replace this occurance of the old string with the new one
1240             replace(dwPos, uiOldLen, strNew, uiNewLen);
1241
1242             //move up pos past the string that was replaced
1243             dwPos += uiNewLen;
1244
1245             //increase replace count
1246             ++uiCount;
1247
1248             // stop now?
1249             if ( !bReplaceAll )
1250                 break;                  // exit the loop
1251         }
1252     }
1253
1254     return uiCount;
1255 }
1256
1257 bool wxString::IsAscii() const
1258 {
1259     for ( const_iterator i = begin(); i != end(); ++i )
1260     {
1261         if ( !(*i).IsAscii() )
1262             return false;
1263     }
1264
1265     return true;
1266 }
1267
1268 bool wxString::IsWord() const
1269 {
1270     for ( const_iterator i = begin(); i != end(); ++i )
1271     {
1272         if ( !wxIsalpha(*i) )
1273             return false;
1274     }
1275
1276     return true;
1277 }
1278
1279 bool wxString::IsNumber() const
1280 {
1281     if ( empty() )
1282         return true;
1283
1284     const_iterator i = begin();
1285
1286     if ( *i == _T('-') || *i == _T('+') )
1287         ++i;
1288
1289     for ( ; i != end(); ++i )
1290     {
1291         if ( !wxIsdigit(*i) )
1292             return false;
1293     }
1294
1295     return true;
1296 }
1297
1298 wxString wxString::Strip(stripType w) const
1299 {
1300     wxString s = *this;
1301     if ( w & leading ) s.Trim(false);
1302     if ( w & trailing ) s.Trim(true);
1303     return s;
1304 }
1305
1306 // ---------------------------------------------------------------------------
1307 // case conversion
1308 // ---------------------------------------------------------------------------
1309
1310 wxString& wxString::MakeUpper()
1311 {
1312   for ( iterator it = begin(), en = end(); it != en; ++it )
1313     *it = (wxChar)wxToupper(*it);
1314
1315   return *this;
1316 }
1317
1318 wxString& wxString::MakeLower()
1319 {
1320   for ( iterator it = begin(), en = end(); it != en; ++it )
1321     *it = (wxChar)wxTolower(*it);
1322
1323   return *this;
1324 }
1325
1326 // ---------------------------------------------------------------------------
1327 // trimming and padding
1328 // ---------------------------------------------------------------------------
1329
1330 // some compilers (VC++ 6.0 not to name them) return true for a call to
1331 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1332 // live with this by checking that the character is a 7 bit one - even if this
1333 // may fail to detect some spaces (I don't know if Unicode doesn't have
1334 // space-like symbols somewhere except in the first 128 chars), it is arguably
1335 // still better than trimming away accented letters
1336 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1337
1338 // trims spaces (in the sense of isspace) from left or right side
1339 wxString& wxString::Trim(bool bFromRight)
1340 {
1341     // first check if we're going to modify the string at all
1342     if ( !empty() &&
1343          (
1344           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1345           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1346          )
1347        )
1348     {
1349         if ( bFromRight )
1350         {
1351             // find last non-space character
1352             reverse_iterator psz = rbegin();
1353             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1354                 ++psz;
1355
1356             // truncate at trailing space start
1357             erase(psz.base(), end());
1358         }
1359         else
1360         {
1361             // find first non-space character
1362             iterator psz = begin();
1363             while ( (psz != end()) && wxSafeIsspace(*psz) )
1364                 ++psz;
1365
1366             // fix up data and length
1367             erase(begin(), psz);
1368         }
1369     }
1370
1371     return *this;
1372 }
1373
1374 // adds nCount characters chPad to the string from either side
1375 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1376 {
1377     wxString s(chPad, nCount);
1378
1379     if ( bFromRight )
1380         *this += s;
1381     else
1382     {
1383         s += *this;
1384         swap(s);
1385     }
1386
1387     return *this;
1388 }
1389
1390 // truncate the string
1391 wxString& wxString::Truncate(size_t uiLen)
1392 {
1393     if ( uiLen < length() )
1394     {
1395         erase(begin() + uiLen, end());
1396     }
1397     //else: nothing to do, string is already short enough
1398
1399     return *this;
1400 }
1401
1402 // ---------------------------------------------------------------------------
1403 // finding (return wxNOT_FOUND if not found and index otherwise)
1404 // ---------------------------------------------------------------------------
1405
1406 // find a character
1407 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1408 {
1409     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1410
1411     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1412 }
1413
1414 // ----------------------------------------------------------------------------
1415 // conversion to numbers
1416 // ----------------------------------------------------------------------------
1417
1418 // The implementation of all the functions below is exactly the same so factor
1419 // it out. Note that number extraction works correctly on UTF-8 strings, so
1420 // we can use wxStringCharType and wx_str() for maximum efficiency.
1421
1422 #ifndef __WXWINCE__
1423     #define DO_IF_NOT_WINCE(x) x
1424 #else
1425     #define DO_IF_NOT_WINCE(x)
1426 #endif
1427
1428 #define WX_STRING_TO_INT_TYPE(val, base, func)                              \
1429     wxCHECK_MSG( val, false, _T("NULL output pointer") );                   \
1430     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );  \
1431                                                                             \
1432     DO_IF_NOT_WINCE( errno = 0; )                                           \
1433                                                                             \
1434     const wxStringCharType *start = wx_str();                               \
1435     wxStringCharType *end;                                                  \
1436     *val = func(start, &end, base);                                         \
1437                                                                             \
1438     /* return true only if scan was stopped by the terminating NUL and */   \
1439     /* if the string was not empty to start with and no under/overflow */   \
1440     /* occurred: */                                                         \
1441     return !*end && (end != start)                                          \
1442         DO_IF_NOT_WINCE( && (errno != ERANGE) )
1443
1444 bool wxString::ToLong(long *val, int base) const
1445 {
1446     WX_STRING_TO_INT_TYPE(val, base, wxStrtol);
1447 }
1448
1449 bool wxString::ToULong(unsigned long *val, int base) const
1450 {
1451     WX_STRING_TO_INT_TYPE(val, base, wxStrtoul);
1452 }
1453
1454 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1455 {
1456     WX_STRING_TO_INT_TYPE(val, base, wxStrtoll);
1457 }
1458
1459 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1460 {
1461     WX_STRING_TO_INT_TYPE(val, base, wxStrtoull);
1462 }
1463
1464 bool wxString::ToDouble(double *val) const
1465 {
1466     wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1467
1468 #ifndef __WXWINCE__
1469     errno = 0;
1470 #endif
1471
1472     const wxChar *start = c_str();
1473     wxChar *end;
1474     *val = wxStrtod(start, &end);
1475
1476     // return true only if scan was stopped by the terminating NUL and if the
1477     // string was not empty to start with and no under/overflow occurred
1478     return !*end && (end != start)
1479 #ifndef __WXWINCE__
1480         && (errno != ERANGE)
1481 #endif
1482     ;
1483 }
1484
1485 // ---------------------------------------------------------------------------
1486 // formatted output
1487 // ---------------------------------------------------------------------------
1488
1489 #if !wxUSE_UTF8_LOCALE_ONLY
1490 /* static */
1491 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1492 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1493 #else
1494 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1495 #endif
1496 {
1497     va_list argptr;
1498     va_start(argptr, format);
1499
1500     wxString s;
1501     s.PrintfV(format, argptr);
1502
1503     va_end(argptr);
1504
1505     return s;
1506 }
1507 #endif // !wxUSE_UTF8_LOCALE_ONLY
1508
1509 #if wxUSE_UNICODE_UTF8
1510 /* static */
1511 wxString wxString::DoFormatUtf8(const char *format, ...)
1512 {
1513     va_list argptr;
1514     va_start(argptr, format);
1515
1516     wxString s;
1517     s.PrintfV(format, argptr);
1518
1519     va_end(argptr);
1520
1521     return s;
1522 }
1523 #endif // wxUSE_UNICODE_UTF8
1524
1525 /* static */
1526 wxString wxString::FormatV(const wxString& format, va_list argptr)
1527 {
1528     wxString s;
1529     s.PrintfV(format, argptr);
1530     return s;
1531 }
1532
1533 #if !wxUSE_UTF8_LOCALE_ONLY
1534 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1535 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1536 #else
1537 int wxString::DoPrintfWchar(const wxChar *format, ...)
1538 #endif
1539 {
1540     va_list argptr;
1541     va_start(argptr, format);
1542
1543 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1544     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1545     // because it's the only cast that works safely for downcasting when
1546     // multiple inheritance is used:
1547     wxString *str = static_cast<wxString*>(this);
1548 #else
1549     wxString *str = this;
1550 #endif
1551
1552     int iLen = str->PrintfV(format, argptr);
1553
1554     va_end(argptr);
1555
1556     return iLen;
1557 }
1558 #endif // !wxUSE_UTF8_LOCALE_ONLY
1559
1560 #if wxUSE_UNICODE_UTF8
1561 int wxString::DoPrintfUtf8(const char *format, ...)
1562 {
1563     va_list argptr;
1564     va_start(argptr, format);
1565
1566     int iLen = PrintfV(format, argptr);
1567
1568     va_end(argptr);
1569
1570     return iLen;
1571 }
1572 #endif // wxUSE_UNICODE_UTF8
1573
1574 #if wxUSE_UNICODE_UTF8
1575 template<typename BufferType>
1576 #else
1577 // we only need one version in non-UTF8 builds and at least two Windows
1578 // compilers have problems with this function template, so use just one
1579 // normal function here
1580 #endif
1581 static int DoStringPrintfV(wxString& str,
1582                            const wxString& format, va_list argptr)
1583 {
1584     int size = 1024;
1585
1586     for ( ;; )
1587     {
1588 #if wxUSE_UNICODE_UTF8
1589         BufferType tmp(str, size + 1);
1590         typename BufferType::CharType *buf = tmp;
1591 #else
1592         wxStringBuffer tmp(str, size + 1);
1593         wxChar *buf = tmp;
1594 #endif
1595
1596         if ( !buf )
1597         {
1598             // out of memory
1599
1600             // in UTF-8 build, leaving uninitialized junk in the buffer
1601             // could result in invalid non-empty UTF-8 string, so just
1602             // reset the string to empty on failure:
1603             buf[0] = '\0';
1604             return -1;
1605         }
1606
1607         // wxVsnprintf() may modify the original arg pointer, so pass it
1608         // only a copy
1609         va_list argptrcopy;
1610         wxVaCopy(argptrcopy, argptr);
1611         int len = wxVsnprintf(buf, size, format, argptrcopy);
1612         va_end(argptrcopy);
1613
1614         // some implementations of vsnprintf() don't NUL terminate
1615         // the string if there is not enough space for it so
1616         // always do it manually
1617         buf[size] = _T('\0');
1618
1619         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1620         // total number of characters which would have been written if the
1621         // buffer were large enough (newer standards such as Unix98)
1622         if ( len < 0 )
1623         {
1624             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1625             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1626             //     is true if *both* of them use our own implementation,
1627             //     otherwise we can't be sure
1628 #if wxUSE_WXVSNPRINTF
1629             // we know that our own implementation of wxVsnprintf() returns -1
1630             // only for a format error - thus there's something wrong with
1631             // the user's format string
1632             buf[0] = '\0';
1633             return -1;
1634 #else // possibly using system version
1635             // assume it only returns error if there is not enough space, but
1636             // as we don't know how much we need, double the current size of
1637             // the buffer
1638             size *= 2;
1639 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1640         }
1641         else if ( len >= size )
1642         {
1643 #if wxUSE_WXVSNPRINTF
1644             // we know that our own implementation of wxVsnprintf() returns
1645             // size+1 when there's not enough space but that's not the size
1646             // of the required buffer!
1647             size *= 2;      // so we just double the current size of the buffer
1648 #else
1649             // some vsnprintf() implementations NUL-terminate the buffer and
1650             // some don't in len == size case, to be safe always add 1
1651             size = len + 1;
1652 #endif
1653         }
1654         else // ok, there was enough space
1655         {
1656             break;
1657         }
1658     }
1659
1660     // we could have overshot
1661     str.Shrink();
1662
1663     return str.length();
1664 }
1665
1666 int wxString::PrintfV(const wxString& format, va_list argptr)
1667 {
1668 #if wxUSE_UNICODE_UTF8
1669     #if wxUSE_STL_BASED_WXSTRING
1670         typedef wxStringTypeBuffer<char> Utf8Buffer;
1671     #else
1672         typedef wxStringInternalBuffer Utf8Buffer;
1673     #endif
1674 #endif
1675
1676 #if wxUSE_UTF8_LOCALE_ONLY
1677     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1678 #else
1679     #if wxUSE_UNICODE_UTF8
1680     if ( wxLocaleIsUtf8 )
1681         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1682     else
1683         // wxChar* version
1684         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1685     #else
1686         return DoStringPrintfV(*this, format, argptr);
1687     #endif // UTF8/WCHAR
1688 #endif
1689 }
1690
1691 // ----------------------------------------------------------------------------
1692 // misc other operations
1693 // ----------------------------------------------------------------------------
1694
1695 // returns true if the string matches the pattern which may contain '*' and
1696 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1697 // of them)
1698 bool wxString::Matches(const wxString& mask) const
1699 {
1700     // I disable this code as it doesn't seem to be faster (in fact, it seems
1701     // to be much slower) than the old, hand-written code below and using it
1702     // here requires always linking with libregex even if the user code doesn't
1703     // use it
1704 #if 0 // wxUSE_REGEX
1705     // first translate the shell-like mask into a regex
1706     wxString pattern;
1707     pattern.reserve(wxStrlen(pszMask));
1708
1709     pattern += _T('^');
1710     while ( *pszMask )
1711     {
1712         switch ( *pszMask )
1713         {
1714             case _T('?'):
1715                 pattern += _T('.');
1716                 break;
1717
1718             case _T('*'):
1719                 pattern += _T(".*");
1720                 break;
1721
1722             case _T('^'):
1723             case _T('.'):
1724             case _T('$'):
1725             case _T('('):
1726             case _T(')'):
1727             case _T('|'):
1728             case _T('+'):
1729             case _T('\\'):
1730                 // these characters are special in a RE, quote them
1731                 // (however note that we don't quote '[' and ']' to allow
1732                 // using them for Unix shell like matching)
1733                 pattern += _T('\\');
1734                 // fall through
1735
1736             default:
1737                 pattern += *pszMask;
1738         }
1739
1740         pszMask++;
1741     }
1742     pattern += _T('$');
1743
1744     // and now use it
1745     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1746 #else // !wxUSE_REGEX
1747   // TODO: this is, of course, awfully inefficient...
1748
1749   // FIXME-UTF8: implement using iterators, remove #if
1750 #if wxUSE_UNICODE_UTF8
1751   wxWCharBuffer maskBuf = mask.wc_str();
1752   wxWCharBuffer txtBuf = wc_str();
1753   const wxChar *pszMask = maskBuf.data();
1754   const wxChar *pszTxt = txtBuf.data();
1755 #else
1756   const wxChar *pszMask = mask.wx_str();
1757   // the char currently being checked
1758   const wxChar *pszTxt = wx_str();
1759 #endif
1760
1761   // the last location where '*' matched
1762   const wxChar *pszLastStarInText = NULL;
1763   const wxChar *pszLastStarInMask = NULL;
1764
1765 match:
1766   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1767     switch ( *pszMask ) {
1768       case wxT('?'):
1769         if ( *pszTxt == wxT('\0') )
1770           return false;
1771
1772         // pszTxt and pszMask will be incremented in the loop statement
1773
1774         break;
1775
1776       case wxT('*'):
1777         {
1778           // remember where we started to be able to backtrack later
1779           pszLastStarInText = pszTxt;
1780           pszLastStarInMask = pszMask;
1781
1782           // ignore special chars immediately following this one
1783           // (should this be an error?)
1784           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1785             pszMask++;
1786
1787           // if there is nothing more, match
1788           if ( *pszMask == wxT('\0') )
1789             return true;
1790
1791           // are there any other metacharacters in the mask?
1792           size_t uiLenMask;
1793           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1794
1795           if ( pEndMask != NULL ) {
1796             // we have to match the string between two metachars
1797             uiLenMask = pEndMask - pszMask;
1798           }
1799           else {
1800             // we have to match the remainder of the string
1801             uiLenMask = wxStrlen(pszMask);
1802           }
1803
1804           wxString strToMatch(pszMask, uiLenMask);
1805           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1806           if ( pMatch == NULL )
1807             return false;
1808
1809           // -1 to compensate "++" in the loop
1810           pszTxt = pMatch + uiLenMask - 1;
1811           pszMask += uiLenMask - 1;
1812         }
1813         break;
1814
1815       default:
1816         if ( *pszMask != *pszTxt )
1817           return false;
1818         break;
1819     }
1820   }
1821
1822   // match only if nothing left
1823   if ( *pszTxt == wxT('\0') )
1824     return true;
1825
1826   // if we failed to match, backtrack if we can
1827   if ( pszLastStarInText ) {
1828     pszTxt = pszLastStarInText + 1;
1829     pszMask = pszLastStarInMask;
1830
1831     pszLastStarInText = NULL;
1832
1833     // don't bother resetting pszLastStarInMask, it's unnecessary
1834
1835     goto match;
1836   }
1837
1838   return false;
1839 #endif // wxUSE_REGEX/!wxUSE_REGEX
1840 }
1841
1842 // Count the number of chars
1843 int wxString::Freq(wxUniChar ch) const
1844 {
1845     int count = 0;
1846     for ( const_iterator i = begin(); i != end(); ++i )
1847     {
1848         if ( *i == ch )
1849             count ++;
1850     }
1851     return count;
1852 }
1853
1854 // convert to upper case, return the copy of the string
1855 wxString wxString::Upper() const
1856 { wxString s(*this); return s.MakeUpper(); }
1857
1858 // convert to lower case, return the copy of the string
1859 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
1860
1861 // ----------------------------------------------------------------------------
1862 // wxUTF8StringBuffer
1863 // ----------------------------------------------------------------------------
1864
1865 #if wxUSE_UNICODE_WCHAR
1866 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1867 {
1868     wxMBConvStrictUTF8 conv;
1869     size_t wlen = conv.ToWChar(NULL, 0, m_buf);
1870     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1871
1872     wxStringInternalBuffer wbuf(m_str, wlen);
1873     conv.ToWChar(wbuf, wlen, m_buf);
1874 }
1875
1876 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
1877 {
1878     wxCHECK_RET(m_lenSet, "length not set");
1879
1880     wxMBConvStrictUTF8 conv;
1881     size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
1882     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1883
1884     wxStringInternalBufferLength wbuf(m_str, wlen);
1885     conv.ToWChar(wbuf, wlen, m_buf, m_len);
1886     wbuf.SetLength(wlen);
1887 }
1888 #endif // wxUSE_UNICODE_WCHAR