src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #ifdef __SALFORDC__
  39     #include <clib.h>
  40 #endif
  41
  42 #include "wx/hashmap.h"
  43
  44 // string handling functions used by wxString:
  45 #if wxUSE_UNICODE_UTF8
  46     #define wxStringMemcpy   memcpy
  47     #define wxStringMemcmp   memcmp
  48     #define wxStringMemchr   memchr
  49     #define wxStringStrlen   strlen
  50 #else
  51     #define wxStringMemcpy   wxTmemcpy
  52     #define wxStringMemcmp   wxTmemcmp
  53     #define wxStringMemchr   wxTmemchr
  54     #define wxStringStrlen   wxStrlen
  55 #endif
  56
  57
  58 // ---------------------------------------------------------------------------
  59 // static class variables definition
  60 // ---------------------------------------------------------------------------
  61
  62 //According to STL _must_ be a -1 size_t
  63 const size_t wxString::npos = (size_t) -1;
  64
  65 // ----------------------------------------------------------------------------
  66 // global functions
  67 // ----------------------------------------------------------------------------
  68
  69 #if wxUSE_STD_IOSTREAM
  70
  71 #include <iostream>
  72
  73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
  74 {
  75 // FIXME-UTF8: always, not only if wxUSE_UNICODE
  76 #if wxUSE_UNICODE && !defined(__BORLANDC__)
  77     return os << (const wchar_t*)str.AsWCharBuf();
  78 #else
  79     return os << (const char*)str.AsCharBuf();
  80 #endif
  81 }
  82
  83 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
  84 {
  85     return os << str.c_str();
  86 }
  87
  88 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
  89 {
  90     return os << str.data();
  91 }
  92
  93 #ifndef __BORLANDC__
  94 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
  95 {
  96     return os << str.data();
  97 }
  98 #endif
  99
 100 #endif // wxUSE_STD_IOSTREAM
 101
 102 // ===========================================================================
 103 // wxString class core
 104 // ===========================================================================
 105
 106 #if wxUSE_UNICODE_UTF8
 107
 108 void wxString::PosLenToImpl(size_t pos, size_t len,
 109                             size_t *implPos, size_t *implLen) const
 110 {
 111     if ( pos == npos )
 112         *implPos = npos;
 113     else
 114     {
 115         const_iterator i = begin() + pos;
 116         *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
 117         if ( len == npos )
 118             *implLen = npos;
 119         else
 120         {
 121             // too large length is interpreted as "to the end of the string"
 122             // FIXME-UTF8: verify this is the case in std::string, assert
 123             // otherwise
 124             if ( pos + len > length() )
 125                 len = length() - pos;
 126
 127             *implLen = (i + len).impl() - i.impl();
 128         }
 129     }
 130 }
 131
 132 #endif // wxUSE_UNICODE_UTF8
 133
 134 // ----------------------------------------------------------------------------
 135 // wxCStrData converted strings caching
 136 // ----------------------------------------------------------------------------
 137
 138 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 139 //             string objects; re-enable after fixing this bug and benchmarking
 140 //             performance to see if using a hash is a good idea at all
 141 #if 0
 142
 143 // For backward compatibility reasons, it must be possible to assign the value
 144 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 145 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 146 // because the memory would be freed immediately, but it has to be valid as long
 147 // as the string is not modified, so that code like this still works:
 148 //
 149 // const wxChar *s = str.c_str();
 150 // while ( s ) { ... }
 151
 152 // FIXME-UTF8: not thread safe!
 153 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 154 //             destroyed, but we should do it when the string is modified, to
 155 //             keep memory usage down
 156 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 157 //             invalidated the cache on every change, we could keep the previous
 158 //             conversion
 159 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 160 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 161
 162 template<typename T>
 163 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 164 {
 165     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 166     if ( i != hash.end() )
 167     {
 168         free(i->second);
 169         hash.erase(i);
 170     }
 171 }
 172
 173 #if wxUSE_UNICODE
 174 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 175 //     so we have to use wxString* here and const-cast when used
 176 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 177                     wxStringCharConversionCache);
 178 static wxStringCharConversionCache gs_stringsCharCache;
 179
 180 const char* wxCStrData::AsChar() const
 181 {
 182     // remove previously cache value, if any (see FIXMEs above):
 183     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 184
 185     // convert the string and keep it:
 186     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 187         m_str->mb_str().release();
 188
 189     return s + m_offset;
 190 }
 191 #endif // wxUSE_UNICODE
 192
 193 #if !wxUSE_UNICODE_WCHAR
 194 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 195                     wxStringWCharConversionCache);
 196 static wxStringWCharConversionCache gs_stringsWCharCache;
 197
 198 const wchar_t* wxCStrData::AsWChar() const
 199 {
 200     // remove previously cache value, if any (see FIXMEs above):
 201     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 202
 203     // convert the string and keep it:
 204     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 205         m_str->wc_str().release();
 206
 207     return s + m_offset;
 208 }
 209 #endif // !wxUSE_UNICODE_WCHAR
 210
 211 wxString::~wxString()
 212 {
 213 #if wxUSE_UNICODE
 214     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 215     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 216 #endif
 217 #if !wxUSE_UNICODE_WCHAR
 218     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 219 #endif
 220 }
 221 #endif
 222
 223 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 224 const char* wxCStrData::AsChar() const
 225 {
 226 #if wxUSE_UNICODE_UTF8
 227     if ( wxLocaleIsUtf8 )
 228         return AsInternal();
 229 #endif
 230     // under non-UTF8 locales, we have to convert the internal UTF-8
 231     // representation using wxConvLibc and cache the result
 232
 233     wxString *str = wxConstCast(m_str, wxString);
 234
 235     // convert the string:
 236     wxCharBuffer buf(str->mb_str());
 237
 238     // FIXME-UTF8: do the conversion in-place in the existing buffer
 239     if ( str->m_convertedToChar &&
 240          strlen(buf) == strlen(str->m_convertedToChar) )
 241     {
 242         // keep the same buffer for as long as possible, so that several calls
 243         // to c_str() in a row still work:
 244         strcpy(str->m_convertedToChar, buf);
 245     }
 246     else
 247     {
 248         str->m_convertedToChar = buf.release();
 249     }
 250
 251     // and keep it:
 252     return str->m_convertedToChar + m_offset;
 253 }
 254 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 255
 256 #if !wxUSE_UNICODE_WCHAR
 257 const wchar_t* wxCStrData::AsWChar() const
 258 {
 259     wxString *str = wxConstCast(m_str, wxString);
 260
 261     // convert the string:
 262     wxWCharBuffer buf(str->wc_str());
 263
 264     // FIXME-UTF8: do the conversion in-place in the existing buffer
 265     if ( str->m_convertedToWChar &&
 266          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 267     {
 268         // keep the same buffer for as long as possible, so that several calls
 269         // to c_str() in a row still work:
 270         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 271     }
 272     else
 273     {
 274         str->m_convertedToWChar = buf.release();
 275     }
 276
 277     // and keep it:
 278     return str->m_convertedToWChar + m_offset;
 279 }
 280 #endif // !wxUSE_UNICODE_WCHAR
 281
 282 // ===========================================================================
 283 // wxString class core
 284 // ===========================================================================
 285
 286 // ---------------------------------------------------------------------------
 287 // construction and conversion
 288 // ---------------------------------------------------------------------------
 289
 290 #if wxUSE_UNICODE_WCHAR
 291 /* static */
 292 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 293                                                const wxMBConv& conv)
 294 {
 295     // anything to do?
 296     if ( !psz || nLength == 0 )
 297         return SubstrBufFromMB(L"", 0);
 298
 299     if ( nLength == npos )
 300         nLength = wxNO_LEN;
 301
 302     size_t wcLen;
 303     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 304     if ( !wcLen )
 305         return SubstrBufFromMB(_T(""), 0);
 306     else
 307         return SubstrBufFromMB(wcBuf, wcLen);
 308 }
 309 #endif // wxUSE_UNICODE_WCHAR
 310
 311 #if wxUSE_UNICODE_UTF8
 312 /* static */
 313 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 314                                                const wxMBConv& conv)
 315 {
 316     // anything to do?
 317     if ( !psz || nLength == 0 )
 318         return SubstrBufFromMB("", 0);
 319
 320     // if psz is already in UTF-8, we don't have to do the roundtrip to
 321     // wchar_t* and back:
 322     if ( conv.IsUTF8() )
 323     {
 324         // we need to validate the input because UTF8 iterators assume valid
 325         // UTF-8 sequence and psz may be invalid:
 326         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 327         {
 328             return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
 329         }
 330         // else: do the roundtrip through wchar_t*
 331     }
 332
 333     if ( nLength == npos )
 334         nLength = wxNO_LEN;
 335
 336     // first convert to wide string:
 337     size_t wcLen;
 338     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 339     if ( !wcLen )
 340         return SubstrBufFromMB("", 0);
 341
 342     // and then to UTF-8:
 343     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
 344     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 345     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 346
 347     return buf;
 348 }
 349 #endif // wxUSE_UNICODE_UTF8
 350
 351 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 352 /* static */
 353 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 354                                                const wxMBConv& conv)
 355 {
 356     // anything to do?
 357     if ( !pwz || nLength == 0 )
 358         return SubstrBufFromWC("", 0);
 359
 360     if ( nLength == npos )
 361         nLength = wxNO_LEN;
 362
 363     size_t mbLen;
 364     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 365     if ( !mbLen )
 366         return SubstrBufFromWC("", 0);
 367     else
 368         return SubstrBufFromWC(mbBuf, mbLen);
 369 }
 370 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 371
 372
 373 #if wxUSE_UNICODE_WCHAR
 374
 375 //Convert wxString in Unicode mode to a multi-byte string
 376 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 377 {
 378     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 379 }
 380
 381 #elif wxUSE_UNICODE_UTF8
 382
 383 const wxWCharBuffer wxString::wc_str() const
 384 {
 385     return wxMBConvStrictUTF8().cMB2WC
 386                                 (
 387                                     m_impl.c_str(),
 388                                     m_impl.length() + 1, // size, not length
 389                                     NULL
 390                                 );
 391 }
 392
 393 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 394 {
 395     if ( conv.IsUTF8() )
 396         return wxCharBuffer::CreateNonOwned(m_impl.c_str());
 397
 398     // FIXME-UTF8: use wc_str() here once we have buffers with length
 399
 400     size_t wcLen;
 401     wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
 402                                              (
 403                                                 m_impl.c_str(),
 404                                                 m_impl.length() + 1, // size
 405                                                 &wcLen
 406                                              ));
 407     if ( !wcLen )
 408         return wxCharBuffer("");
 409
 410     return conv.cWC2MB(wcBuf, wcLen+1, NULL);
 411 }
 412
 413 #else // ANSI
 414
 415 //Converts this string to a wide character string if unicode
 416 //mode is not enabled and wxUSE_WCHAR_T is enabled
 417 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 418 {
 419     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 420 }
 421
 422 #endif // Unicode/ANSI
 423
 424 // shrink to minimal size (releasing extra memory)
 425 bool wxString::Shrink()
 426 {
 427   wxString tmp(begin(), end());
 428   swap(tmp);
 429   return tmp.length() == length();
 430 }
 431
 432 // deprecated compatibility code:
 433 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 434 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 435 {
 436     return DoGetWriteBuf(nLen);
 437 }
 438
 439 void wxString::UngetWriteBuf()
 440 {
 441     DoUngetWriteBuf();
 442 }
 443
 444 void wxString::UngetWriteBuf(size_t nLen)
 445 {
 446     DoUngetWriteBuf(nLen);
 447 }
 448 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 449
 450
 451 // ---------------------------------------------------------------------------
 452 // data access
 453 // ---------------------------------------------------------------------------
 454
 455 // all functions are inline in string.h
 456
 457 // ---------------------------------------------------------------------------
 458 // concatenation operators
 459 // ---------------------------------------------------------------------------
 460
 461 /*
 462  * concatenation functions come in 5 flavours:
 463  *  string + string
 464  *  char   + string      and      string + char
 465  *  C str  + string      and      string + C str
 466  */
 467
 468 wxString operator+(const wxString& str1, const wxString& str2)
 469 {
 470 #if !wxUSE_STL_BASED_WXSTRING
 471     wxASSERT( str1.IsValid() );
 472     wxASSERT( str2.IsValid() );
 473 #endif
 474
 475     wxString s = str1;
 476     s += str2;
 477
 478     return s;
 479 }
 480
 481 wxString operator+(const wxString& str, wxUniChar ch)
 482 {
 483 #if !wxUSE_STL_BASED_WXSTRING
 484     wxASSERT( str.IsValid() );
 485 #endif
 486
 487     wxString s = str;
 488     s += ch;
 489
 490     return s;
 491 }
 492
 493 wxString operator+(wxUniChar ch, const wxString& str)
 494 {
 495 #if !wxUSE_STL_BASED_WXSTRING
 496     wxASSERT( str.IsValid() );
 497 #endif
 498
 499     wxString s = ch;
 500     s += str;
 501
 502     return s;
 503 }
 504
 505 wxString operator+(const wxString& str, const char *psz)
 506 {
 507 #if !wxUSE_STL_BASED_WXSTRING
 508     wxASSERT( str.IsValid() );
 509 #endif
 510
 511     wxString s;
 512     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 513         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 514     }
 515     s += str;
 516     s += psz;
 517
 518     return s;
 519 }
 520
 521 wxString operator+(const wxString& str, const wchar_t *pwz)
 522 {
 523 #if !wxUSE_STL_BASED_WXSTRING
 524     wxASSERT( str.IsValid() );
 525 #endif
 526
 527     wxString s;
 528     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 529         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 530     }
 531     s += str;
 532     s += pwz;
 533
 534     return s;
 535 }
 536
 537 wxString operator+(const char *psz, const wxString& str)
 538 {
 539 #if !wxUSE_STL_BASED_WXSTRING
 540     wxASSERT( str.IsValid() );
 541 #endif
 542
 543     wxString s;
 544     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 545         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 546     }
 547     s = psz;
 548     s += str;
 549
 550     return s;
 551 }
 552
 553 wxString operator+(const wchar_t *pwz, const wxString& str)
 554 {
 555 #if !wxUSE_STL_BASED_WXSTRING
 556     wxASSERT( str.IsValid() );
 557 #endif
 558
 559     wxString s;
 560     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 561         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 562     }
 563     s = pwz;
 564     s += str;
 565
 566     return s;
 567 }
 568
 569 // ---------------------------------------------------------------------------
 570 // string comparison
 571 // ---------------------------------------------------------------------------
 572
 573 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 574 {
 575     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 576                                : wxToupper(GetChar(0u)) == wxToupper(c));
 577 }
 578
 579 #ifdef HAVE_STD_STRING_COMPARE
 580
 581 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 582 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 583 //     sort strings in characters code point order by sorting the byte sequence
 584 //     in byte values order (i.e. what strcmp() and memcmp() do).
 585
 586 int wxString::compare(const wxString& str) const
 587 {
 588     return m_impl.compare(str.m_impl);
 589 }
 590
 591 int wxString::compare(size_t nStart, size_t nLen,
 592                       const wxString& str) const
 593 {
 594     size_t pos, len;
 595     PosLenToImpl(nStart, nLen, &pos, &len);
 596     return m_impl.compare(pos, len, str.m_impl);
 597 }
 598
 599 int wxString::compare(size_t nStart, size_t nLen,
 600                       const wxString& str,
 601                       size_t nStart2, size_t nLen2) const
 602 {
 603     size_t pos, len;
 604     PosLenToImpl(nStart, nLen, &pos, &len);
 605
 606     size_t pos2, len2;
 607     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 608
 609     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 610 }
 611
 612 int wxString::compare(const char* sz) const
 613 {
 614     return m_impl.compare(ImplStr(sz));
 615 }
 616
 617 int wxString::compare(const wchar_t* sz) const
 618 {
 619     return m_impl.compare(ImplStr(sz));
 620 }
 621
 622 int wxString::compare(size_t nStart, size_t nLen,
 623                       const char* sz, size_t nCount) const
 624 {
 625     size_t pos, len;
 626     PosLenToImpl(nStart, nLen, &pos, &len);
 627
 628     SubstrBufFromMB str(ImplStr(sz, nCount));
 629
 630     return m_impl.compare(pos, len, str.data, str.len);
 631 }
 632
 633 int wxString::compare(size_t nStart, size_t nLen,
 634                       const wchar_t* sz, size_t nCount) const
 635 {
 636     size_t pos, len;
 637     PosLenToImpl(nStart, nLen, &pos, &len);
 638
 639     SubstrBufFromWC str(ImplStr(sz, nCount));
 640
 641     return m_impl.compare(pos, len, str.data, str.len);
 642 }
 643
 644 #else // !HAVE_STD_STRING_COMPARE
 645
 646 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 647                           const wxStringCharType* s2, size_t l2)
 648 {
 649     if( l1 == l2 )
 650         return wxStringMemcmp(s1, s2, l1);
 651     else if( l1 < l2 )
 652     {
 653         int ret = wxStringMemcmp(s1, s2, l1);
 654         return ret == 0 ? -1 : ret;
 655     }
 656     else
 657     {
 658         int ret = wxStringMemcmp(s1, s2, l2);
 659         return ret == 0 ? +1 : ret;
 660     }
 661 }
 662
 663 int wxString::compare(const wxString& str) const
 664 {
 665     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 666                      str.m_impl.data(), str.m_impl.length());
 667 }
 668
 669 int wxString::compare(size_t nStart, size_t nLen,
 670                       const wxString& str) const
 671 {
 672     wxASSERT(nStart <= length());
 673     size_type strLen = length() - nStart;
 674     nLen = strLen < nLen ? strLen : nLen;
 675
 676     size_t pos, len;
 677     PosLenToImpl(nStart, nLen, &pos, &len);
 678
 679     return ::wxDoCmp(m_impl.data() + pos,  len,
 680                      str.m_impl.data(), str.m_impl.length());
 681 }
 682
 683 int wxString::compare(size_t nStart, size_t nLen,
 684                       const wxString& str,
 685                       size_t nStart2, size_t nLen2) const
 686 {
 687     wxASSERT(nStart <= length());
 688     wxASSERT(nStart2 <= str.length());
 689     size_type strLen  =     length() - nStart,
 690               strLen2 = str.length() - nStart2;
 691     nLen  = strLen  < nLen  ? strLen  : nLen;
 692     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 693
 694     size_t pos, len;
 695     PosLenToImpl(nStart, nLen, &pos, &len);
 696     size_t pos2, len2;
 697     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 698
 699     return ::wxDoCmp(m_impl.data() + pos, len,
 700                      str.m_impl.data() + pos2, len2);
 701 }
 702
 703 int wxString::compare(const char* sz) const
 704 {
 705     SubstrBufFromMB str(ImplStr(sz, npos));
 706     if ( str.len == npos )
 707         str.len = wxStringStrlen(str.data);
 708     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 709 }
 710
 711 int wxString::compare(const wchar_t* sz) const
 712 {
 713     SubstrBufFromWC str(ImplStr(sz, npos));
 714     if ( str.len == npos )
 715         str.len = wxStringStrlen(str.data);
 716     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 717 }
 718
 719 int wxString::compare(size_t nStart, size_t nLen,
 720                       const char* sz, size_t nCount) const
 721 {
 722     wxASSERT(nStart <= length());
 723     size_type strLen = length() - nStart;
 724     nLen = strLen < nLen ? strLen : nLen;
 725
 726     size_t pos, len;
 727     PosLenToImpl(nStart, nLen, &pos, &len);
 728
 729     SubstrBufFromMB str(ImplStr(sz, nCount));
 730     if ( str.len == npos )
 731         str.len = wxStringStrlen(str.data);
 732
 733     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 734 }
 735
 736 int wxString::compare(size_t nStart, size_t nLen,
 737                       const wchar_t* sz, size_t nCount) const
 738 {
 739     wxASSERT(nStart <= length());
 740     size_type strLen = length() - nStart;
 741     nLen = strLen < nLen ? strLen : nLen;
 742
 743     size_t pos, len;
 744     PosLenToImpl(nStart, nLen, &pos, &len);
 745
 746     SubstrBufFromWC str(ImplStr(sz, nCount));
 747     if ( str.len == npos )
 748         str.len = wxStringStrlen(str.data);
 749
 750     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 751 }
 752
 753 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 754
 755
 756 // ---------------------------------------------------------------------------
 757 // find_{first,last}_[not]_of functions
 758 // ---------------------------------------------------------------------------
 759
 760 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 761
 762 // NB: All these functions are implemented  with the argument being wxChar*,
 763 //     i.e. widechar string in any Unicode build, even though native string
 764 //     representation is char* in the UTF-8 build. This is because we couldn't
 765 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 766
 767 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 768 {
 769     return find_first_of(sz, nStart, wxStrlen(sz));
 770 }
 771
 772 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 773 {
 774     return find_first_not_of(sz, nStart, wxStrlen(sz));
 775 }
 776
 777 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 778 {
 779     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 780
 781     size_t idx = nStart;
 782     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 783     {
 784         if ( wxTmemchr(sz, *i, n) )
 785             return idx;
 786     }
 787
 788     return npos;
 789 }
 790
 791 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 792 {
 793     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 794
 795     size_t idx = nStart;
 796     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 797     {
 798         if ( !wxTmemchr(sz, *i, n) )
 799             return idx;
 800     }
 801
 802     return npos;
 803 }
 804
 805
 806 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 807 {
 808     return find_last_of(sz, nStart, wxStrlen(sz));
 809 }
 810
 811 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 812 {
 813     return find_last_not_of(sz, nStart, wxStrlen(sz));
 814 }
 815
 816 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 817 {
 818     size_t len = length();
 819
 820     if ( nStart == npos )
 821     {
 822         nStart = len - 1;
 823     }
 824     else
 825     {
 826         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 827     }
 828
 829     size_t idx = nStart;
 830     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 831           i != rend(); --idx, ++i )
 832     {
 833         if ( wxTmemchr(sz, *i, n) )
 834             return idx;
 835     }
 836
 837     return npos;
 838 }
 839
 840 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 841 {
 842     size_t len = length();
 843
 844     if ( nStart == npos )
 845     {
 846         nStart = len - 1;
 847     }
 848     else
 849     {
 850         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 851     }
 852
 853     size_t idx = nStart;
 854     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 855           i != rend(); --idx, ++i )
 856     {
 857         if ( !wxTmemchr(sz, *i, n) )
 858             return idx;
 859     }
 860
 861     return npos;
 862 }
 863
 864 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
 865 {
 866     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 867
 868     size_t idx = nStart;
 869     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 870     {
 871         if ( *i != ch )
 872             return idx;
 873     }
 874
 875     return npos;
 876 }
 877
 878 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
 879 {
 880     size_t len = length();
 881
 882     if ( nStart == npos )
 883     {
 884         nStart = len - 1;
 885     }
 886     else
 887     {
 888         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 889     }
 890
 891     size_t idx = nStart;
 892     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 893           i != rend(); --idx, ++i )
 894     {
 895         if ( *i != ch )
 896             return idx;
 897     }
 898
 899     return npos;
 900 }
 901
 902 // the functions above were implemented for wchar_t* arguments in Unicode
 903 // build and char* in ANSI build; below are implementations for the other
 904 // version:
 905 #if wxUSE_UNICODE
 906     #define wxOtherCharType char
 907     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
 908 #else
 909     #define wxOtherCharType wchar_t
 910     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
 911 #endif
 912
 913 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
 914     { return find_first_of(STRCONV(sz), nStart); }
 915
 916 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
 917                                size_t n) const
 918     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
 919 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
 920     { return find_last_of(STRCONV(sz), nStart); }
 921 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
 922                               size_t n) const
 923     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
 924 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
 925     { return find_first_not_of(STRCONV(sz), nStart); }
 926 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
 927                                    size_t n) const
 928     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
 929 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
 930     { return find_last_not_of(STRCONV(sz), nStart); }
 931 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
 932                                   size_t n) const
 933     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
 934
 935 #undef wxOtherCharType
 936 #undef STRCONV
 937
 938 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 939
 940 // ===========================================================================
 941 // other common string functions
 942 // ===========================================================================
 943
 944 int wxString::CmpNoCase(const wxString& s) const
 945 {
 946     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
 947
 948     size_t idx = 0;
 949     const_iterator i1 = begin();
 950     const_iterator end1 = end();
 951     const_iterator i2 = s.begin();
 952     const_iterator end2 = s.end();
 953
 954     for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
 955     {
 956         wxUniChar lower1 = (wxChar)wxTolower(*i1);
 957         wxUniChar lower2 = (wxChar)wxTolower(*i2);
 958         if ( lower1 != lower2 )
 959             return lower1 < lower2 ? -1 : 1;
 960     }
 961
 962     size_t len1 = length();
 963     size_t len2 = s.length();
 964
 965     if ( len1 < len2 )
 966         return -1;
 967     else if ( len1 > len2 )
 968         return 1;
 969     return 0;
 970 }
 971
 972
 973 #if wxUSE_UNICODE
 974
 975 #ifdef __MWERKS__
 976 #ifndef __SCHAR_MAX__
 977 #define __SCHAR_MAX__ 127
 978 #endif
 979 #endif
 980
 981 wxString wxString::FromAscii(const char *ascii, size_t len)
 982 {
 983     if (!ascii || len == 0)
 984        return wxEmptyString;
 985
 986     wxString res;
 987
 988     {
 989         wxStringInternalBuffer buf(res, len);
 990         wxStringCharType *dest = buf;
 991
 992         for ( ; len > 0; --len )
 993         {
 994             unsigned char c = (unsigned char)*ascii++;
 995             wxASSERT_MSG( c < 0x80,
 996                           _T("Non-ASCII value passed to FromAscii().") );
 997
 998             *dest++ = (wchar_t)c;
 999         }
1000     }
1001
1002     return res;
1003 }
1004
1005 wxString wxString::FromAscii(const char *ascii)
1006 {
1007     return FromAscii(ascii, wxStrlen(ascii));
1008 }
1009
1010 wxString wxString::FromAscii(char ascii)
1011 {
1012     // What do we do with '\0' ?
1013
1014     unsigned char c = (unsigned char)ascii;
1015
1016     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1017
1018     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1019     return wxString(wxUniChar((wchar_t)c));
1020 }
1021
1022 const wxCharBuffer wxString::ToAscii() const
1023 {
1024     // this will allocate enough space for the terminating NUL too
1025     wxCharBuffer buffer(length());
1026     char *dest = buffer.data();
1027
1028     for ( const_iterator i = begin(); i != end(); ++i )
1029     {
1030         wxUniChar c(*i);
1031         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1032         *dest++ = c.IsAscii() ? (char)c : '_';
1033
1034         // the output string can't have embedded NULs anyhow, so we can safely
1035         // stop at first of them even if we do have any
1036         if ( !c )
1037             break;
1038     }
1039
1040     return buffer;
1041 }
1042
1043 #endif // wxUSE_UNICODE
1044
1045 // extract string of length nCount starting at nFirst
1046 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1047 {
1048     size_t nLen = length();
1049
1050     // default value of nCount is npos and means "till the end"
1051     if ( nCount == npos )
1052     {
1053         nCount = nLen - nFirst;
1054     }
1055
1056     // out-of-bounds requests return sensible things
1057     if ( nFirst + nCount > nLen )
1058     {
1059         nCount = nLen - nFirst;
1060     }
1061
1062     if ( nFirst > nLen )
1063     {
1064         // AllocCopy() will return empty string
1065         return wxEmptyString;
1066     }
1067
1068     wxString dest(*this, nFirst, nCount);
1069     if ( dest.length() != nCount )
1070     {
1071         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1072     }
1073
1074     return dest;
1075 }
1076
1077 // check that the string starts with prefix and return the rest of the string
1078 // in the provided pointer if it is not NULL, otherwise return false
1079 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1080 {
1081     if ( compare(0, prefix.length(), prefix) != 0 )
1082         return false;
1083
1084     if ( rest )
1085     {
1086         // put the rest of the string into provided pointer
1087         rest->assign(*this, prefix.length(), npos);
1088     }
1089
1090     return true;
1091 }
1092
1093
1094 // check that the string ends with suffix and return the rest of it in the
1095 // provided pointer if it is not NULL, otherwise return false
1096 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1097 {
1098     int start = length() - suffix.length();
1099
1100     if ( start < 0 || compare(start, npos, suffix) != 0 )
1101         return false;
1102
1103     if ( rest )
1104     {
1105         // put the rest of the string into provided pointer
1106         rest->assign(*this, 0, start);
1107     }
1108
1109     return true;
1110 }
1111
1112
1113 // extract nCount last (rightmost) characters
1114 wxString wxString::Right(size_t nCount) const
1115 {
1116   if ( nCount > length() )
1117     nCount = length();
1118
1119   wxString dest(*this, length() - nCount, nCount);
1120   if ( dest.length() != nCount ) {
1121     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1122   }
1123   return dest;
1124 }
1125
1126 // get all characters after the last occurence of ch
1127 // (returns the whole string if ch not found)
1128 wxString wxString::AfterLast(wxUniChar ch) const
1129 {
1130   wxString str;
1131   int iPos = Find(ch, true);
1132   if ( iPos == wxNOT_FOUND )
1133     str = *this;
1134   else
1135     str = wx_str() + iPos + 1;
1136
1137   return str;
1138 }
1139
1140 // extract nCount first (leftmost) characters
1141 wxString wxString::Left(size_t nCount) const
1142 {
1143   if ( nCount > length() )
1144     nCount = length();
1145
1146   wxString dest(*this, 0, nCount);
1147   if ( dest.length() != nCount ) {
1148     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1149   }
1150   return dest;
1151 }
1152
1153 // get all characters before the first occurence of ch
1154 // (returns the whole string if ch not found)
1155 wxString wxString::BeforeFirst(wxUniChar ch) const
1156 {
1157   int iPos = Find(ch);
1158   if ( iPos == wxNOT_FOUND ) iPos = length();
1159   return wxString(*this, 0, iPos);
1160 }
1161
1162 /// get all characters before the last occurence of ch
1163 /// (returns empty string if ch not found)
1164 wxString wxString::BeforeLast(wxUniChar ch) const
1165 {
1166   wxString str;
1167   int iPos = Find(ch, true);
1168   if ( iPos != wxNOT_FOUND && iPos != 0 )
1169     str = wxString(c_str(), iPos);
1170
1171   return str;
1172 }
1173
1174 /// get all characters after the first occurence of ch
1175 /// (returns empty string if ch not found)
1176 wxString wxString::AfterFirst(wxUniChar ch) const
1177 {
1178   wxString str;
1179   int iPos = Find(ch);
1180   if ( iPos != wxNOT_FOUND )
1181     str = wx_str() + iPos + 1;
1182
1183   return str;
1184 }
1185
1186 // replace first (or all) occurences of some substring with another one
1187 size_t wxString::Replace(const wxString& strOld,
1188                          const wxString& strNew, bool bReplaceAll)
1189 {
1190     // if we tried to replace an empty string we'd enter an infinite loop below
1191     wxCHECK_MSG( !strOld.empty(), 0,
1192                  _T("wxString::Replace(): invalid parameter") );
1193
1194     size_t uiCount = 0;   // count of replacements made
1195
1196     size_t uiOldLen = strOld.length();
1197     size_t uiNewLen = strNew.length();
1198
1199     size_t dwPos = 0;
1200
1201     while ( (*this)[dwPos] != wxT('\0') )
1202     {
1203         //DO NOT USE STRSTR HERE
1204         //this string can contain embedded null characters,
1205         //so strstr will function incorrectly
1206         dwPos = find(strOld, dwPos);
1207         if ( dwPos == npos )
1208             break;                  // exit the loop
1209         else
1210         {
1211             //replace this occurance of the old string with the new one
1212             replace(dwPos, uiOldLen, strNew, uiNewLen);
1213
1214             //move up pos past the string that was replaced
1215             dwPos += uiNewLen;
1216
1217             //increase replace count
1218             ++uiCount;
1219
1220             // stop now?
1221             if ( !bReplaceAll )
1222                 break;                  // exit the loop
1223         }
1224     }
1225
1226     return uiCount;
1227 }
1228
1229 bool wxString::IsAscii() const
1230 {
1231     for ( const_iterator i = begin(); i != end(); ++i )
1232     {
1233         if ( !(*i).IsAscii() )
1234             return false;
1235     }
1236
1237     return true;
1238 }
1239
1240 bool wxString::IsWord() const
1241 {
1242     for ( const_iterator i = begin(); i != end(); ++i )
1243     {
1244         if ( !wxIsalpha(*i) )
1245             return false;
1246     }
1247
1248     return true;
1249 }
1250
1251 bool wxString::IsNumber() const
1252 {
1253     if ( empty() )
1254         return true;
1255
1256     const_iterator i = begin();
1257
1258     if ( *i == _T('-') || *i == _T('+') )
1259         ++i;
1260
1261     for ( ; i != end(); ++i )
1262     {
1263         if ( !wxIsdigit(*i) )
1264             return false;
1265     }
1266
1267     return true;
1268 }
1269
1270 wxString wxString::Strip(stripType w) const
1271 {
1272     wxString s = *this;
1273     if ( w & leading ) s.Trim(false);
1274     if ( w & trailing ) s.Trim(true);
1275     return s;
1276 }
1277
1278 // ---------------------------------------------------------------------------
1279 // case conversion
1280 // ---------------------------------------------------------------------------
1281
1282 wxString& wxString::MakeUpper()
1283 {
1284   for ( iterator it = begin(), en = end(); it != en; ++it )
1285     *it = (wxChar)wxToupper(*it);
1286
1287   return *this;
1288 }
1289
1290 wxString& wxString::MakeLower()
1291 {
1292   for ( iterator it = begin(), en = end(); it != en; ++it )
1293     *it = (wxChar)wxTolower(*it);
1294
1295   return *this;
1296 }
1297
1298 // ---------------------------------------------------------------------------
1299 // trimming and padding
1300 // ---------------------------------------------------------------------------
1301
1302 // some compilers (VC++ 6.0 not to name them) return true for a call to
1303 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1304 // live with this by checking that the character is a 7 bit one - even if this
1305 // may fail to detect some spaces (I don't know if Unicode doesn't have
1306 // space-like symbols somewhere except in the first 128 chars), it is arguably
1307 // still better than trimming away accented letters
1308 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1309
1310 // trims spaces (in the sense of isspace) from left or right side
1311 wxString& wxString::Trim(bool bFromRight)
1312 {
1313     // first check if we're going to modify the string at all
1314     if ( !empty() &&
1315          (
1316           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1317           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1318          )
1319        )
1320     {
1321         if ( bFromRight )
1322         {
1323             // find last non-space character
1324             reverse_iterator psz = rbegin();
1325             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1326                 psz++;
1327
1328             // truncate at trailing space start
1329             erase(psz.base(), end());
1330         }
1331         else
1332         {
1333             // find first non-space character
1334             iterator psz = begin();
1335             while ( (psz != end()) && wxSafeIsspace(*psz) )
1336                 psz++;
1337
1338             // fix up data and length
1339             erase(begin(), psz);
1340         }
1341     }
1342
1343     return *this;
1344 }
1345
1346 // adds nCount characters chPad to the string from either side
1347 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1348 {
1349     wxString s(chPad, nCount);
1350
1351     if ( bFromRight )
1352         *this += s;
1353     else
1354     {
1355         s += *this;
1356         swap(s);
1357     }
1358
1359     return *this;
1360 }
1361
1362 // truncate the string
1363 wxString& wxString::Truncate(size_t uiLen)
1364 {
1365     if ( uiLen < length() )
1366     {
1367         erase(begin() + uiLen, end());
1368     }
1369     //else: nothing to do, string is already short enough
1370
1371     return *this;
1372 }
1373
1374 // ---------------------------------------------------------------------------
1375 // finding (return wxNOT_FOUND if not found and index otherwise)
1376 // ---------------------------------------------------------------------------
1377
1378 // find a character
1379 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1380 {
1381     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1382
1383     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1384 }
1385
1386 // ----------------------------------------------------------------------------
1387 // conversion to numbers
1388 // ----------------------------------------------------------------------------
1389
1390 // The implementation of all the functions below is exactly the same so factor
1391 // it out. Note that number extraction works correctly on UTF-8 strings, so
1392 // we can use wxStringCharType and wx_str() for maximum efficiency.
1393
1394 #ifndef __WXWINCE__
1395     #define DO_IF_NOT_WINCE(x) x
1396 #else
1397     #define DO_IF_NOT_WINCE(x)
1398 #endif
1399
1400 #define WX_STRING_TO_INT_TYPE(val, base, func)                              \
1401     wxCHECK_MSG( val, false, _T("NULL output pointer") );                   \
1402     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );  \
1403                                                                             \
1404     DO_IF_NOT_WINCE( errno = 0; )                                           \
1405                                                                             \
1406     const wxStringCharType *start = wx_str();                               \
1407     wxStringCharType *end;                                                  \
1408     *val = func(start, &end, base);                                         \
1409                                                                             \
1410     /* return true only if scan was stopped by the terminating NUL and */   \
1411     /* if the string was not empty to start with and no under/overflow */   \
1412     /* occurred: */                                                         \
1413     return !*end && (end != start)                                          \
1414         DO_IF_NOT_WINCE( && (errno != ERANGE) )
1415
1416 bool wxString::ToLong(long *val, int base) const
1417 {
1418     WX_STRING_TO_INT_TYPE(val, base, wxStrtol);
1419 }
1420
1421 bool wxString::ToULong(unsigned long *val, int base) const
1422 {
1423     WX_STRING_TO_INT_TYPE(val, base, wxStrtoul);
1424 }
1425
1426 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1427 {
1428     WX_STRING_TO_INT_TYPE(val, base, wxStrtoll);
1429 }
1430
1431 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1432 {
1433     WX_STRING_TO_INT_TYPE(val, base, wxStrtoull);
1434 }
1435
1436 bool wxString::ToDouble(double *val) const
1437 {
1438     wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1439
1440 #ifndef __WXWINCE__
1441     errno = 0;
1442 #endif
1443
1444     const wxChar *start = c_str();
1445     wxChar *end;
1446     *val = wxStrtod(start, &end);
1447
1448     // return true only if scan was stopped by the terminating NUL and if the
1449     // string was not empty to start with and no under/overflow occurred
1450     return !*end && (end != start)
1451 #ifndef __WXWINCE__
1452         && (errno != ERANGE)
1453 #endif
1454     ;
1455 }
1456
1457 // ---------------------------------------------------------------------------
1458 // formatted output
1459 // ---------------------------------------------------------------------------
1460
1461 #if !wxUSE_UTF8_LOCALE_ONLY
1462 /* static */
1463 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1464 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1465 #else
1466 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1467 #endif
1468 {
1469     va_list argptr;
1470     va_start(argptr, format);
1471
1472     wxString s;
1473     s.PrintfV(format, argptr);
1474
1475     va_end(argptr);
1476
1477     return s;
1478 }
1479 #endif // !wxUSE_UTF8_LOCALE_ONLY
1480
1481 #if wxUSE_UNICODE_UTF8
1482 /* static */
1483 wxString wxString::DoFormatUtf8(const char *format, ...)
1484 {
1485     va_list argptr;
1486     va_start(argptr, format);
1487
1488     wxString s;
1489     s.PrintfV(format, argptr);
1490
1491     va_end(argptr);
1492
1493     return s;
1494 }
1495 #endif // wxUSE_UNICODE_UTF8
1496
1497 /* static */
1498 wxString wxString::FormatV(const wxString& format, va_list argptr)
1499 {
1500     wxString s;
1501     s.PrintfV(format, argptr);
1502     return s;
1503 }
1504
1505 #if !wxUSE_UTF8_LOCALE_ONLY
1506 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1507 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1508 #else
1509 int wxString::DoPrintfWchar(const wxChar *format, ...)
1510 #endif
1511 {
1512     va_list argptr;
1513     va_start(argptr, format);
1514
1515 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1516     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1517     // because it's the only cast that works safely for downcasting when
1518     // multiple inheritance is used:
1519     wxString *str = static_cast<wxString*>(this);
1520 #else
1521     wxString *str = this;
1522 #endif
1523
1524     int iLen = str->PrintfV(format, argptr);
1525
1526     va_end(argptr);
1527
1528     return iLen;
1529 }
1530 #endif // !wxUSE_UTF8_LOCALE_ONLY
1531
1532 #if wxUSE_UNICODE_UTF8
1533 int wxString::DoPrintfUtf8(const char *format, ...)
1534 {
1535     va_list argptr;
1536     va_start(argptr, format);
1537
1538     int iLen = PrintfV(format, argptr);
1539
1540     va_end(argptr);
1541
1542     return iLen;
1543 }
1544 #endif // wxUSE_UNICODE_UTF8
1545
1546 #if wxUSE_UNICODE_UTF8
1547 template<typename BufferType>
1548 #else
1549 // we only need one version in non-UTF8 builds and at least two Windows
1550 // compilers have problems with this function template, so use just one
1551 // normal function here
1552 #endif
1553 static int DoStringPrintfV(wxString& str,
1554                            const wxString& format, va_list argptr)
1555 {
1556     int size = 1024;
1557
1558     for ( ;; )
1559     {
1560 #if wxUSE_UNICODE_UTF8
1561         BufferType tmp(str, size + 1);
1562         typename BufferType::CharType *buf = tmp;
1563 #else
1564         wxStringBuffer tmp(str, size + 1);
1565         wxChar *buf = tmp;
1566 #endif
1567
1568         if ( !buf )
1569         {
1570             // out of memory
1571
1572             // in UTF-8 build, leaving uninitialized junk in the buffer
1573             // could result in invalid non-empty UTF-8 string, so just
1574             // reset the string to empty on failure:
1575             buf[0] = '\0';
1576             return -1;
1577         }
1578
1579         // wxVsnprintf() may modify the original arg pointer, so pass it
1580         // only a copy
1581         va_list argptrcopy;
1582         wxVaCopy(argptrcopy, argptr);
1583         int len = wxVsnprintf(buf, size, format, argptrcopy);
1584         va_end(argptrcopy);
1585
1586         // some implementations of vsnprintf() don't NUL terminate
1587         // the string if there is not enough space for it so
1588         // always do it manually
1589         buf[size] = _T('\0');
1590
1591         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1592         // total number of characters which would have been written if the
1593         // buffer were large enough (newer standards such as Unix98)
1594         if ( len < 0 )
1595         {
1596             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1597             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1598             //     is true if *both* of them use our own implementation,
1599             //     otherwise we can't be sure
1600 #if wxUSE_WXVSNPRINTF
1601             // we know that our own implementation of wxVsnprintf() returns -1
1602             // only for a format error - thus there's something wrong with
1603             // the user's format string
1604             buf[0] = '\0';
1605             return -1;
1606 #else // possibly using system version
1607             // assume it only returns error if there is not enough space, but
1608             // as we don't know how much we need, double the current size of
1609             // the buffer
1610             size *= 2;
1611 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1612         }
1613         else if ( len >= size )
1614         {
1615 #if wxUSE_WXVSNPRINTF
1616             // we know that our own implementation of wxVsnprintf() returns
1617             // size+1 when there's not enough space but that's not the size
1618             // of the required buffer!
1619             size *= 2;      // so we just double the current size of the buffer
1620 #else
1621             // some vsnprintf() implementations NUL-terminate the buffer and
1622             // some don't in len == size case, to be safe always add 1
1623             size = len + 1;
1624 #endif
1625         }
1626         else // ok, there was enough space
1627         {
1628             break;
1629         }
1630     }
1631
1632     // we could have overshot
1633     str.Shrink();
1634
1635     return str.length();
1636 }
1637
1638 int wxString::PrintfV(const wxString& format, va_list argptr)
1639 {
1640 #if wxUSE_UNICODE_UTF8
1641     #if wxUSE_STL_BASED_WXSTRING
1642         typedef wxStringTypeBuffer<char> Utf8Buffer;
1643     #else
1644         typedef wxStringInternalBuffer Utf8Buffer;
1645     #endif
1646 #endif
1647
1648 #if wxUSE_UTF8_LOCALE_ONLY
1649     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1650 #else
1651     #if wxUSE_UNICODE_UTF8
1652     if ( wxLocaleIsUtf8 )
1653         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1654     else
1655         // wxChar* version
1656         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1657     #else
1658         return DoStringPrintfV(*this, format, argptr);
1659     #endif // UTF8/WCHAR
1660 #endif
1661 }
1662
1663 // ----------------------------------------------------------------------------
1664 // misc other operations
1665 // ----------------------------------------------------------------------------
1666
1667 // returns true if the string matches the pattern which may contain '*' and
1668 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1669 // of them)
1670 bool wxString::Matches(const wxString& mask) const
1671 {
1672     // I disable this code as it doesn't seem to be faster (in fact, it seems
1673     // to be much slower) than the old, hand-written code below and using it
1674     // here requires always linking with libregex even if the user code doesn't
1675     // use it
1676 #if 0 // wxUSE_REGEX
1677     // first translate the shell-like mask into a regex
1678     wxString pattern;
1679     pattern.reserve(wxStrlen(pszMask));
1680
1681     pattern += _T('^');
1682     while ( *pszMask )
1683     {
1684         switch ( *pszMask )
1685         {
1686             case _T('?'):
1687                 pattern += _T('.');
1688                 break;
1689
1690             case _T('*'):
1691                 pattern += _T(".*");
1692                 break;
1693
1694             case _T('^'):
1695             case _T('.'):
1696             case _T('$'):
1697             case _T('('):
1698             case _T(')'):
1699             case _T('|'):
1700             case _T('+'):
1701             case _T('\\'):
1702                 // these characters are special in a RE, quote them
1703                 // (however note that we don't quote '[' and ']' to allow
1704                 // using them for Unix shell like matching)
1705                 pattern += _T('\\');
1706                 // fall through
1707
1708             default:
1709                 pattern += *pszMask;
1710         }
1711
1712         pszMask++;
1713     }
1714     pattern += _T('$');
1715
1716     // and now use it
1717     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1718 #else // !wxUSE_REGEX
1719   // TODO: this is, of course, awfully inefficient...
1720
1721   // FIXME-UTF8: implement using iterators, remove #if
1722 #if wxUSE_UNICODE_UTF8
1723   wxWCharBuffer maskBuf = mask.wc_str();
1724   wxWCharBuffer txtBuf = wc_str();
1725   const wxChar *pszMask = maskBuf.data();
1726   const wxChar *pszTxt = txtBuf.data();
1727 #else
1728   const wxChar *pszMask = mask.wx_str();
1729   // the char currently being checked
1730   const wxChar *pszTxt = wx_str();
1731 #endif
1732
1733   // the last location where '*' matched
1734   const wxChar *pszLastStarInText = NULL;
1735   const wxChar *pszLastStarInMask = NULL;
1736
1737 match:
1738   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1739     switch ( *pszMask ) {
1740       case wxT('?'):
1741         if ( *pszTxt == wxT('\0') )
1742           return false;
1743
1744         // pszTxt and pszMask will be incremented in the loop statement
1745
1746         break;
1747
1748       case wxT('*'):
1749         {
1750           // remember where we started to be able to backtrack later
1751           pszLastStarInText = pszTxt;
1752           pszLastStarInMask = pszMask;
1753
1754           // ignore special chars immediately following this one
1755           // (should this be an error?)
1756           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1757             pszMask++;
1758
1759           // if there is nothing more, match
1760           if ( *pszMask == wxT('\0') )
1761             return true;
1762
1763           // are there any other metacharacters in the mask?
1764           size_t uiLenMask;
1765           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1766
1767           if ( pEndMask != NULL ) {
1768             // we have to match the string between two metachars
1769             uiLenMask = pEndMask - pszMask;
1770           }
1771           else {
1772             // we have to match the remainder of the string
1773             uiLenMask = wxStrlen(pszMask);
1774           }
1775
1776           wxString strToMatch(pszMask, uiLenMask);
1777           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1778           if ( pMatch == NULL )
1779             return false;
1780
1781           // -1 to compensate "++" in the loop
1782           pszTxt = pMatch + uiLenMask - 1;
1783           pszMask += uiLenMask - 1;
1784         }
1785         break;
1786
1787       default:
1788         if ( *pszMask != *pszTxt )
1789           return false;
1790         break;
1791     }
1792   }
1793
1794   // match only if nothing left
1795   if ( *pszTxt == wxT('\0') )
1796     return true;
1797
1798   // if we failed to match, backtrack if we can
1799   if ( pszLastStarInText ) {
1800     pszTxt = pszLastStarInText + 1;
1801     pszMask = pszLastStarInMask;
1802
1803     pszLastStarInText = NULL;
1804
1805     // don't bother resetting pszLastStarInMask, it's unnecessary
1806
1807     goto match;
1808   }
1809
1810   return false;
1811 #endif // wxUSE_REGEX/!wxUSE_REGEX
1812 }
1813
1814 // Count the number of chars
1815 int wxString::Freq(wxUniChar ch) const
1816 {
1817     int count = 0;
1818     for ( const_iterator i = begin(); i != end(); ++i )
1819     {
1820         if ( *i == ch )
1821             count ++;
1822     }
1823     return count;
1824 }
1825
1826 // convert to upper case, return the copy of the string
1827 wxString wxString::Upper() const
1828 { wxString s(*this); return s.MakeUpper(); }
1829
1830 // convert to lower case, return the copy of the string
1831 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
1832
1833 // ----------------------------------------------------------------------------
1834 // wxUTF8StringBuffer
1835 // ----------------------------------------------------------------------------
1836
1837 #if wxUSE_UNICODE_WCHAR
1838 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1839 {
1840     wxMBConvStrictUTF8 conv;
1841     size_t wlen = conv.ToWChar(NULL, 0, m_buf);
1842     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1843
1844     wxStringInternalBuffer wbuf(m_str, wlen);
1845     conv.ToWChar(wbuf, wlen, m_buf);
1846 }
1847
1848 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
1849 {
1850     wxCHECK_RET(m_lenSet, "length not set");
1851
1852     wxMBConvStrictUTF8 conv;
1853     size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
1854     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1855
1856     wxStringInternalBufferLength wbuf(m_str, wlen);
1857     conv.ToWChar(wbuf, wlen, m_buf, m_len);
1858     wbuf.SetLength(wlen);
1859 }
1860 #endif // wxUSE_UNICODE_WCHAR