src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #ifdef __SALFORDC__
  39     #include <clib.h>
  40 #endif
  41
  42 #include "wx/hashmap.h"
  43
  44 // string handling functions used by wxString:
  45 #if wxUSE_UNICODE_UTF8
  46     #define wxStringMemcpy   memcpy
  47     #define wxStringMemcmp   memcmp
  48     #define wxStringMemchr   memchr
  49     #define wxStringStrlen   strlen
  50 #else
  51     #define wxStringMemcpy   wxTmemcpy
  52     #define wxStringMemcmp   wxTmemcmp
  53     #define wxStringMemchr   wxTmemchr
  54     #define wxStringStrlen   wxStrlen
  55 #endif
  56
  57
  58 // ---------------------------------------------------------------------------
  59 // static class variables definition
  60 // ---------------------------------------------------------------------------
  61
  62 //According to STL _must_ be a -1 size_t
  63 const size_t wxString::npos = (size_t) -1;
  64
  65 // ----------------------------------------------------------------------------
  66 // global functions
  67 // ----------------------------------------------------------------------------
  68
  69 #if wxUSE_STD_IOSTREAM
  70
  71 #include <iostream>
  72
  73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
  74 {
  75 // FIXME-UTF8: always, not only if wxUSE_UNICODE
  76 #if wxUSE_UNICODE && !defined(__BORLANDC__)
  77     return os << (const wchar_t*)str.AsWCharBuf();
  78 #else
  79     return os << (const char*)str.AsCharBuf();
  80 #endif
  81 }
  82
  83 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
  84 {
  85     return os << str.c_str();
  86 }
  87
  88 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
  89 {
  90     return os << str.data();
  91 }
  92
  93 #ifndef __BORLANDC__
  94 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
  95 {
  96     return os << str.data();
  97 }
  98 #endif
  99
 100 #endif // wxUSE_STD_IOSTREAM
 101
 102 // ===========================================================================
 103 // wxString class core
 104 // ===========================================================================
 105
 106 #if wxUSE_UNICODE_UTF8
 107
 108 void wxString::PosLenToImpl(size_t pos, size_t len,
 109                             size_t *implPos, size_t *implLen) const
 110 {
 111     if ( pos == npos )
 112         *implPos = npos;
 113     else
 114     {
 115         const_iterator i = begin() + pos;
 116         *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
 117         if ( len == npos )
 118             *implLen = npos;
 119         else
 120         {
 121             // too large length is interpreted as "to the end of the string"
 122             // FIXME-UTF8: verify this is the case in std::string, assert
 123             // otherwise
 124             if ( pos + len > length() )
 125                 len = length() - pos;
 126
 127             *implLen = (i + len).impl() - i.impl();
 128         }
 129     }
 130 }
 131
 132 #endif // wxUSE_UNICODE_UTF8
 133
 134 // ----------------------------------------------------------------------------
 135 // wxCStrData converted strings caching
 136 // ----------------------------------------------------------------------------
 137
 138 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 139 //             string objects; re-enable after fixing this bug and benchmarking
 140 //             performance to see if using a hash is a good idea at all
 141 #if 0
 142
 143 // For backward compatibility reasons, it must be possible to assign the value
 144 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 145 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 146 // because the memory would be freed immediately, but it has to be valid as long
 147 // as the string is not modified, so that code like this still works:
 148 //
 149 // const wxChar *s = str.c_str();
 150 // while ( s ) { ... }
 151
 152 // FIXME-UTF8: not thread safe!
 153 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 154 //             destroyed, but we should do it when the string is modified, to
 155 //             keep memory usage down
 156 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 157 //             invalidated the cache on every change, we could keep the previous
 158 //             conversion
 159 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 160 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 161
 162 template<typename T>
 163 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 164 {
 165     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 166     if ( i != hash.end() )
 167     {
 168         free(i->second);
 169         hash.erase(i);
 170     }
 171 }
 172
 173 #if wxUSE_UNICODE
 174 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 175 //     so we have to use wxString* here and const-cast when used
 176 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 177                     wxStringCharConversionCache);
 178 static wxStringCharConversionCache gs_stringsCharCache;
 179
 180 const char* wxCStrData::AsChar() const
 181 {
 182     // remove previously cache value, if any (see FIXMEs above):
 183     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 184
 185     // convert the string and keep it:
 186     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 187         m_str->mb_str().release();
 188
 189     return s + m_offset;
 190 }
 191 #endif // wxUSE_UNICODE
 192
 193 #if !wxUSE_UNICODE_WCHAR
 194 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 195                     wxStringWCharConversionCache);
 196 static wxStringWCharConversionCache gs_stringsWCharCache;
 197
 198 const wchar_t* wxCStrData::AsWChar() const
 199 {
 200     // remove previously cache value, if any (see FIXMEs above):
 201     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 202
 203     // convert the string and keep it:
 204     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 205         m_str->wc_str().release();
 206
 207     return s + m_offset;
 208 }
 209 #endif // !wxUSE_UNICODE_WCHAR
 210
 211 wxString::~wxString()
 212 {
 213 #if wxUSE_UNICODE
 214     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 215     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 216 #endif
 217 #if !wxUSE_UNICODE_WCHAR
 218     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 219 #endif
 220 }
 221 #endif
 222
 223 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 224 const char* wxCStrData::AsChar() const
 225 {
 226 #if wxUSE_UNICODE_UTF8
 227     if ( wxLocaleIsUtf8 )
 228         return AsInternal();
 229 #endif
 230     // under non-UTF8 locales, we have to convert the internal UTF-8
 231     // representation using wxConvLibc and cache the result
 232
 233     wxString *str = wxConstCast(m_str, wxString);
 234
 235     // convert the string:
 236     wxCharBuffer buf(str->mb_str());
 237
 238     // FIXME-UTF8: do the conversion in-place in the existing buffer
 239     if ( str->m_convertedToChar &&
 240          strlen(buf) == strlen(str->m_convertedToChar) )
 241     {
 242         // keep the same buffer for as long as possible, so that several calls
 243         // to c_str() in a row still work:
 244         strcpy(str->m_convertedToChar, buf);
 245     }
 246     else
 247     {
 248         str->m_convertedToChar = buf.release();
 249     }
 250
 251     // and keep it:
 252     return str->m_convertedToChar + m_offset;
 253 }
 254 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 255
 256 #if !wxUSE_UNICODE_WCHAR
 257 const wchar_t* wxCStrData::AsWChar() const
 258 {
 259     wxString *str = wxConstCast(m_str, wxString);
 260
 261     // convert the string:
 262     wxWCharBuffer buf(str->wc_str());
 263
 264     // FIXME-UTF8: do the conversion in-place in the existing buffer
 265     if ( str->m_convertedToWChar &&
 266          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 267     {
 268         // keep the same buffer for as long as possible, so that several calls
 269         // to c_str() in a row still work:
 270         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 271     }
 272     else
 273     {
 274         str->m_convertedToWChar = buf.release();
 275     }
 276
 277     // and keep it:
 278     return str->m_convertedToWChar + m_offset;
 279 }
 280 #endif // !wxUSE_UNICODE_WCHAR
 281
 282 // ===========================================================================
 283 // wxString class core
 284 // ===========================================================================
 285
 286 // ---------------------------------------------------------------------------
 287 // construction and conversion
 288 // ---------------------------------------------------------------------------
 289
 290 #if wxUSE_UNICODE_WCHAR
 291 /* static */
 292 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 293                                                const wxMBConv& conv)
 294 {
 295     // anything to do?
 296     if ( !psz || nLength == 0 )
 297         return SubstrBufFromMB(L"", 0);
 298
 299     if ( nLength == npos )
 300         nLength = wxNO_LEN;
 301
 302     size_t wcLen;
 303     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 304     if ( !wcLen )
 305         return SubstrBufFromMB(_T(""), 0);
 306     else
 307         return SubstrBufFromMB(wcBuf, wcLen);
 308 }
 309 #endif // wxUSE_UNICODE_WCHAR
 310
 311 #if wxUSE_UNICODE_UTF8
 312 /* static */
 313 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 314                                                const wxMBConv& conv)
 315 {
 316     // anything to do?
 317     if ( !psz || nLength == 0 )
 318         return SubstrBufFromMB("", 0);
 319
 320     // if psz is already in UTF-8, we don't have to do the roundtrip to
 321     // wchar_t* and back:
 322     if ( conv.IsUTF8() )
 323     {
 324         // we need to validate the input because UTF8 iterators assume valid
 325         // UTF-8 sequence and psz may be invalid:
 326         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 327         {
 328             return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
 329         }
 330         // else: do the roundtrip through wchar_t*
 331     }
 332
 333     if ( nLength == npos )
 334         nLength = wxNO_LEN;
 335
 336     // first convert to wide string:
 337     size_t wcLen;
 338     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 339     if ( !wcLen )
 340         return SubstrBufFromMB("", 0);
 341
 342     // and then to UTF-8:
 343     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvUTF8()));
 344     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 345     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 346
 347     return buf;
 348 }
 349 #endif // wxUSE_UNICODE_UTF8
 350
 351 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 352 /* static */
 353 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 354                                                const wxMBConv& conv)
 355 {
 356     // anything to do?
 357     if ( !pwz || nLength == 0 )
 358         return SubstrBufFromWC("", 0);
 359
 360     if ( nLength == npos )
 361         nLength = wxNO_LEN;
 362
 363     size_t mbLen;
 364     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 365     if ( !mbLen )
 366         return SubstrBufFromWC("", 0);
 367     else
 368         return SubstrBufFromWC(mbBuf, mbLen);
 369 }
 370 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 371
 372
 373 #if wxUSE_UNICODE_WCHAR
 374
 375 //Convert wxString in Unicode mode to a multi-byte string
 376 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 377 {
 378     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 379 }
 380
 381 #elif wxUSE_UNICODE_UTF8
 382
 383 const wxWCharBuffer wxString::wc_str() const
 384 {
 385     return wxMBConvUTF8().cMB2WC(m_impl.c_str(),
 386                                  m_impl.length() + 1 /* size, not length */,
 387                                  NULL);
 388 }
 389
 390 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 391 {
 392     if ( conv.IsUTF8() )
 393         return wxCharBuffer::CreateNonOwned(m_impl.c_str());
 394
 395     // FIXME-UTF8: use wc_str() here once we have buffers with length
 396
 397     size_t wcLen;
 398     wxWCharBuffer wcBuf(
 399             wxMBConvUTF8().cMB2WC(m_impl.c_str(),
 400                                   m_impl.length() + 1 /* size, not length */,
 401                                   &wcLen));
 402     if ( !wcLen )
 403         return wxCharBuffer("");
 404
 405     return conv.cWC2MB(wcBuf, wcLen, NULL);
 406 }
 407
 408 #else // ANSI
 409
 410 //Converts this string to a wide character string if unicode
 411 //mode is not enabled and wxUSE_WCHAR_T is enabled
 412 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 413 {
 414     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 415 }
 416
 417 #endif // Unicode/ANSI
 418
 419 // shrink to minimal size (releasing extra memory)
 420 bool wxString::Shrink()
 421 {
 422   wxString tmp(begin(), end());
 423   swap(tmp);
 424   return tmp.length() == length();
 425 }
 426
 427 // deprecated compatibility code:
 428 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 429 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 430 {
 431     return DoGetWriteBuf(nLen);
 432 }
 433
 434 void wxString::UngetWriteBuf()
 435 {
 436     DoUngetWriteBuf();
 437 }
 438
 439 void wxString::UngetWriteBuf(size_t nLen)
 440 {
 441     DoUngetWriteBuf(nLen);
 442 }
 443 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 444
 445
 446 // ---------------------------------------------------------------------------
 447 // data access
 448 // ---------------------------------------------------------------------------
 449
 450 // all functions are inline in string.h
 451
 452 // ---------------------------------------------------------------------------
 453 // concatenation operators
 454 // ---------------------------------------------------------------------------
 455
 456 /*
 457  * concatenation functions come in 5 flavours:
 458  *  string + string
 459  *  char   + string      and      string + char
 460  *  C str  + string      and      string + C str
 461  */
 462
 463 wxString operator+(const wxString& str1, const wxString& str2)
 464 {
 465 #if !wxUSE_STL_BASED_WXSTRING
 466     wxASSERT( str1.IsValid() );
 467     wxASSERT( str2.IsValid() );
 468 #endif
 469
 470     wxString s = str1;
 471     s += str2;
 472
 473     return s;
 474 }
 475
 476 wxString operator+(const wxString& str, wxUniChar ch)
 477 {
 478 #if !wxUSE_STL_BASED_WXSTRING
 479     wxASSERT( str.IsValid() );
 480 #endif
 481
 482     wxString s = str;
 483     s += ch;
 484
 485     return s;
 486 }
 487
 488 wxString operator+(wxUniChar ch, const wxString& str)
 489 {
 490 #if !wxUSE_STL_BASED_WXSTRING
 491     wxASSERT( str.IsValid() );
 492 #endif
 493
 494     wxString s = ch;
 495     s += str;
 496
 497     return s;
 498 }
 499
 500 wxString operator+(const wxString& str, const char *psz)
 501 {
 502 #if !wxUSE_STL_BASED_WXSTRING
 503     wxASSERT( str.IsValid() );
 504 #endif
 505
 506     wxString s;
 507     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 508         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 509     }
 510     s += str;
 511     s += psz;
 512
 513     return s;
 514 }
 515
 516 wxString operator+(const wxString& str, const wchar_t *pwz)
 517 {
 518 #if !wxUSE_STL_BASED_WXSTRING
 519     wxASSERT( str.IsValid() );
 520 #endif
 521
 522     wxString s;
 523     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 524         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 525     }
 526     s += str;
 527     s += pwz;
 528
 529     return s;
 530 }
 531
 532 wxString operator+(const char *psz, const wxString& str)
 533 {
 534 #if !wxUSE_STL_BASED_WXSTRING
 535     wxASSERT( str.IsValid() );
 536 #endif
 537
 538     wxString s;
 539     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 540         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 541     }
 542     s = psz;
 543     s += str;
 544
 545     return s;
 546 }
 547
 548 wxString operator+(const wchar_t *pwz, const wxString& str)
 549 {
 550 #if !wxUSE_STL_BASED_WXSTRING
 551     wxASSERT( str.IsValid() );
 552 #endif
 553
 554     wxString s;
 555     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 556         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 557     }
 558     s = pwz;
 559     s += str;
 560
 561     return s;
 562 }
 563
 564 // ---------------------------------------------------------------------------
 565 // string comparison
 566 // ---------------------------------------------------------------------------
 567
 568 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 569 {
 570     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 571                                : wxToupper(GetChar(0u)) == wxToupper(c));
 572 }
 573
 574 #ifdef HAVE_STD_STRING_COMPARE
 575
 576 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 577 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 578 //     sort strings in characters code point order by sorting the byte sequence
 579 //     in byte values order (i.e. what strcmp() and memcmp() do).
 580
 581 int wxString::compare(const wxString& str) const
 582 {
 583     return m_impl.compare(str.m_impl);
 584 }
 585
 586 int wxString::compare(size_t nStart, size_t nLen,
 587                       const wxString& str) const
 588 {
 589     size_t pos, len;
 590     PosLenToImpl(nStart, nLen, &pos, &len);
 591     return m_impl.compare(pos, len, str.m_impl);
 592 }
 593
 594 int wxString::compare(size_t nStart, size_t nLen,
 595                       const wxString& str,
 596                       size_t nStart2, size_t nLen2) const
 597 {
 598     size_t pos, len;
 599     PosLenToImpl(nStart, nLen, &pos, &len);
 600
 601     size_t pos2, len2;
 602     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 603
 604     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 605 }
 606
 607 int wxString::compare(const char* sz) const
 608 {
 609     return m_impl.compare(ImplStr(sz));
 610 }
 611
 612 int wxString::compare(const wchar_t* sz) const
 613 {
 614     return m_impl.compare(ImplStr(sz));
 615 }
 616
 617 int wxString::compare(size_t nStart, size_t nLen,
 618                       const char* sz, size_t nCount) const
 619 {
 620     size_t pos, len;
 621     PosLenToImpl(nStart, nLen, &pos, &len);
 622
 623     SubstrBufFromMB str(ImplStr(sz, nCount));
 624
 625     return m_impl.compare(pos, len, str.data, str.len);
 626 }
 627
 628 int wxString::compare(size_t nStart, size_t nLen,
 629                       const wchar_t* sz, size_t nCount) const
 630 {
 631     size_t pos, len;
 632     PosLenToImpl(nStart, nLen, &pos, &len);
 633
 634     SubstrBufFromWC str(ImplStr(sz, nCount));
 635
 636     return m_impl.compare(pos, len, str.data, str.len);
 637 }
 638
 639 #else // !HAVE_STD_STRING_COMPARE
 640
 641 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 642                           const wxStringCharType* s2, size_t l2)
 643 {
 644     if( l1 == l2 )
 645         return wxStringMemcmp(s1, s2, l1);
 646     else if( l1 < l2 )
 647     {
 648         int ret = wxStringMemcmp(s1, s2, l1);
 649         return ret == 0 ? -1 : ret;
 650     }
 651     else
 652     {
 653         int ret = wxStringMemcmp(s1, s2, l2);
 654         return ret == 0 ? +1 : ret;
 655     }
 656 }
 657
 658 int wxString::compare(const wxString& str) const
 659 {
 660     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 661                      str.m_impl.data(), str.m_impl.length());
 662 }
 663
 664 int wxString::compare(size_t nStart, size_t nLen,
 665                       const wxString& str) const
 666 {
 667     wxASSERT(nStart <= length());
 668     size_type strLen = length() - nStart;
 669     nLen = strLen < nLen ? strLen : nLen;
 670
 671     size_t pos, len;
 672     PosLenToImpl(nStart, nLen, &pos, &len);
 673
 674     return ::wxDoCmp(m_impl.data() + pos,  len,
 675                      str.m_impl.data(), str.m_impl.length());
 676 }
 677
 678 int wxString::compare(size_t nStart, size_t nLen,
 679                       const wxString& str,
 680                       size_t nStart2, size_t nLen2) const
 681 {
 682     wxASSERT(nStart <= length());
 683     wxASSERT(nStart2 <= str.length());
 684     size_type strLen  =     length() - nStart,
 685               strLen2 = str.length() - nStart2;
 686     nLen  = strLen  < nLen  ? strLen  : nLen;
 687     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 688
 689     size_t pos, len;
 690     PosLenToImpl(nStart, nLen, &pos, &len);
 691     size_t pos2, len2;
 692     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 693
 694     return ::wxDoCmp(m_impl.data() + pos, len,
 695                      str.m_impl.data() + pos2, len2);
 696 }
 697
 698 int wxString::compare(const char* sz) const
 699 {
 700     SubstrBufFromMB str(ImplStr(sz, npos));
 701     if ( str.len == npos )
 702         str.len = wxStringStrlen(str.data);
 703     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 704 }
 705
 706 int wxString::compare(const wchar_t* sz) const
 707 {
 708     SubstrBufFromWC str(ImplStr(sz, npos));
 709     if ( str.len == npos )
 710         str.len = wxStringStrlen(str.data);
 711     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 712 }
 713
 714 int wxString::compare(size_t nStart, size_t nLen,
 715                       const char* sz, size_t nCount) const
 716 {
 717     wxASSERT(nStart <= length());
 718     size_type strLen = length() - nStart;
 719     nLen = strLen < nLen ? strLen : nLen;
 720
 721     size_t pos, len;
 722     PosLenToImpl(nStart, nLen, &pos, &len);
 723
 724     SubstrBufFromMB str(ImplStr(sz, nCount));
 725     if ( str.len == npos )
 726         str.len = wxStringStrlen(str.data);
 727
 728     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 729 }
 730
 731 int wxString::compare(size_t nStart, size_t nLen,
 732                       const wchar_t* sz, size_t nCount) const
 733 {
 734     wxASSERT(nStart <= length());
 735     size_type strLen = length() - nStart;
 736     nLen = strLen < nLen ? strLen : nLen;
 737
 738     size_t pos, len;
 739     PosLenToImpl(nStart, nLen, &pos, &len);
 740
 741     SubstrBufFromWC str(ImplStr(sz, nCount));
 742     if ( str.len == npos )
 743         str.len = wxStringStrlen(str.data);
 744
 745     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 746 }
 747
 748 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 749
 750
 751 // ---------------------------------------------------------------------------
 752 // find_{first,last}_[not]_of functions
 753 // ---------------------------------------------------------------------------
 754
 755 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 756
 757 // NB: All these functions are implemented  with the argument being wxChar*,
 758 //     i.e. widechar string in any Unicode build, even though native string
 759 //     representation is char* in the UTF-8 build. This is because we couldn't
 760 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 761
 762 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 763 {
 764     return find_first_of(sz, nStart, wxStrlen(sz));
 765 }
 766
 767 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 768 {
 769     return find_first_not_of(sz, nStart, wxStrlen(sz));
 770 }
 771
 772 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 773 {
 774     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 775
 776     size_t idx = nStart;
 777     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 778     {
 779         if ( wxTmemchr(sz, *i, n) )
 780             return idx;
 781     }
 782
 783     return npos;
 784 }
 785
 786 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 787 {
 788     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 789
 790     size_t idx = nStart;
 791     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 792     {
 793         if ( !wxTmemchr(sz, *i, n) )
 794             return idx;
 795     }
 796
 797     return npos;
 798 }
 799
 800
 801 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 802 {
 803     return find_last_of(sz, nStart, wxStrlen(sz));
 804 }
 805
 806 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 807 {
 808     return find_last_not_of(sz, nStart, wxStrlen(sz));
 809 }
 810
 811 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 812 {
 813     size_t len = length();
 814
 815     if ( nStart == npos )
 816     {
 817         nStart = len - 1;
 818     }
 819     else
 820     {
 821         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 822     }
 823
 824     size_t idx = nStart;
 825     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 826           i != rend(); --idx, ++i )
 827     {
 828         if ( wxTmemchr(sz, *i, n) )
 829             return idx;
 830     }
 831
 832     return npos;
 833 }
 834
 835 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 836 {
 837     size_t len = length();
 838
 839     if ( nStart == npos )
 840     {
 841         nStart = len - 1;
 842     }
 843     else
 844     {
 845         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 846     }
 847
 848     size_t idx = nStart;
 849     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 850           i != rend(); --idx, ++i )
 851     {
 852         if ( !wxTmemchr(sz, *i, n) )
 853             return idx;
 854     }
 855
 856     return npos;
 857 }
 858
 859 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
 860 {
 861     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 862
 863     size_t idx = nStart;
 864     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 865     {
 866         if ( *i != ch )
 867             return idx;
 868     }
 869
 870     return npos;
 871 }
 872
 873 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
 874 {
 875     size_t len = length();
 876
 877     if ( nStart == npos )
 878     {
 879         nStart = len - 1;
 880     }
 881     else
 882     {
 883         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 884     }
 885
 886     size_t idx = nStart;
 887     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 888           i != rend(); --idx, ++i )
 889     {
 890         if ( *i != ch )
 891             return idx;
 892     }
 893
 894     return npos;
 895 }
 896
 897 // the functions above were implemented for wchar_t* arguments in Unicode
 898 // build and char* in ANSI build; below are implementations for the other
 899 // version:
 900 #if wxUSE_UNICODE
 901     #define wxOtherCharType char
 902     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
 903 #else
 904     #define wxOtherCharType wchar_t
 905     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
 906 #endif
 907
 908 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
 909     { return find_first_of(STRCONV(sz), nStart); }
 910
 911 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
 912                                size_t n) const
 913     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
 914 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
 915     { return find_last_of(STRCONV(sz), nStart); }
 916 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
 917                               size_t n) const
 918     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
 919 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
 920     { return find_first_not_of(STRCONV(sz), nStart); }
 921 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
 922                                    size_t n) const
 923     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
 924 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
 925     { return find_last_not_of(STRCONV(sz), nStart); }
 926 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
 927                                   size_t n) const
 928     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
 929
 930 #undef wxOtherCharType
 931 #undef STRCONV
 932
 933 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 934
 935 // ===========================================================================
 936 // other common string functions
 937 // ===========================================================================
 938
 939 int wxString::CmpNoCase(const wxString& s) const
 940 {
 941     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
 942
 943     size_t idx = 0;
 944     const_iterator i1 = begin();
 945     const_iterator end1 = end();
 946     const_iterator i2 = s.begin();
 947     const_iterator end2 = s.end();
 948
 949     for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
 950     {
 951         wxUniChar lower1 = (wxChar)wxTolower(*i1);
 952         wxUniChar lower2 = (wxChar)wxTolower(*i2);
 953         if ( lower1 != lower2 )
 954             return lower1 < lower2 ? -1 : 1;
 955     }
 956
 957     size_t len1 = length();
 958     size_t len2 = s.length();
 959
 960     if ( len1 < len2 )
 961         return -1;
 962     else if ( len1 > len2 )
 963         return 1;
 964     return 0;
 965 }
 966
 967
 968 #if wxUSE_UNICODE
 969
 970 #ifdef __MWERKS__
 971 #ifndef __SCHAR_MAX__
 972 #define __SCHAR_MAX__ 127
 973 #endif
 974 #endif
 975
 976 wxString wxString::FromAscii(const char *ascii, size_t len)
 977 {
 978     if (!ascii || len == 0)
 979        return wxEmptyString;
 980
 981     wxString res;
 982
 983     wxImplStringBuffer buf(res, len);
 984     wxStringCharType *dest = buf;
 985
 986     for ( ;; )
 987     {
 988         unsigned char c = (unsigned char)*ascii++;
 989         wxASSERT_MSG( c < 0x80,
 990                       _T("Non-ASCII value passed to FromAscii().") );
 991
 992         *dest++ = (wchar_t)c;
 993
 994         if ( c == '\0' )
 995             break;
 996     }
 997
 998     return res;
 999 }
1000
1001 wxString wxString::FromAscii(const char *ascii)
1002 {
1003     return FromAscii(ascii, strlen(ascii));
1004 }
1005
1006 wxString wxString::FromAscii(const char ascii)
1007 {
1008     // What do we do with '\0' ?
1009
1010     unsigned char c = (unsigned char)ascii;
1011
1012     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1013
1014     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1015     return wxString(wxUniChar((wchar_t)c));
1016 }
1017
1018 const wxCharBuffer wxString::ToAscii() const
1019 {
1020     // this will allocate enough space for the terminating NUL too
1021     wxCharBuffer buffer(length());
1022     char *dest = buffer.data();
1023
1024     for ( const_iterator i = begin(); i != end(); ++i )
1025     {
1026         wxUniChar c(*i);
1027         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1028         *dest++ = c.IsAscii() ? (char)c : '_';
1029
1030         // the output string can't have embedded NULs anyhow, so we can safely
1031         // stop at first of them even if we do have any
1032         if ( !c )
1033             break;
1034     }
1035
1036     return buffer;
1037 }
1038
1039 #endif // wxUSE_UNICODE
1040
1041 // extract string of length nCount starting at nFirst
1042 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1043 {
1044     size_t nLen = length();
1045
1046     // default value of nCount is npos and means "till the end"
1047     if ( nCount == npos )
1048     {
1049         nCount = nLen - nFirst;
1050     }
1051
1052     // out-of-bounds requests return sensible things
1053     if ( nFirst + nCount > nLen )
1054     {
1055         nCount = nLen - nFirst;
1056     }
1057
1058     if ( nFirst > nLen )
1059     {
1060         // AllocCopy() will return empty string
1061         return wxEmptyString;
1062     }
1063
1064     wxString dest(*this, nFirst, nCount);
1065     if ( dest.length() != nCount )
1066     {
1067         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1068     }
1069
1070     return dest;
1071 }
1072
1073 // check that the string starts with prefix and return the rest of the string
1074 // in the provided pointer if it is not NULL, otherwise return false
1075 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1076 {
1077     if ( compare(0, prefix.length(), prefix) != 0 )
1078         return false;
1079
1080     if ( rest )
1081     {
1082         // put the rest of the string into provided pointer
1083         rest->assign(*this, prefix.length(), npos);
1084     }
1085
1086     return true;
1087 }
1088
1089
1090 // check that the string ends with suffix and return the rest of it in the
1091 // provided pointer if it is not NULL, otherwise return false
1092 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1093 {
1094     int start = length() - suffix.length();
1095
1096     if ( start < 0 || compare(start, npos, suffix) != 0 )
1097         return false;
1098
1099     if ( rest )
1100     {
1101         // put the rest of the string into provided pointer
1102         rest->assign(*this, 0, start);
1103     }
1104
1105     return true;
1106 }
1107
1108
1109 // extract nCount last (rightmost) characters
1110 wxString wxString::Right(size_t nCount) const
1111 {
1112   if ( nCount > length() )
1113     nCount = length();
1114
1115   wxString dest(*this, length() - nCount, nCount);
1116   if ( dest.length() != nCount ) {
1117     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1118   }
1119   return dest;
1120 }
1121
1122 // get all characters after the last occurence of ch
1123 // (returns the whole string if ch not found)
1124 wxString wxString::AfterLast(wxUniChar ch) const
1125 {
1126   wxString str;
1127   int iPos = Find(ch, true);
1128   if ( iPos == wxNOT_FOUND )
1129     str = *this;
1130   else
1131     str = wx_str() + iPos + 1;
1132
1133   return str;
1134 }
1135
1136 // extract nCount first (leftmost) characters
1137 wxString wxString::Left(size_t nCount) const
1138 {
1139   if ( nCount > length() )
1140     nCount = length();
1141
1142   wxString dest(*this, 0, nCount);
1143   if ( dest.length() != nCount ) {
1144     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1145   }
1146   return dest;
1147 }
1148
1149 // get all characters before the first occurence of ch
1150 // (returns the whole string if ch not found)
1151 wxString wxString::BeforeFirst(wxUniChar ch) const
1152 {
1153   int iPos = Find(ch);
1154   if ( iPos == wxNOT_FOUND ) iPos = length();
1155   return wxString(*this, 0, iPos);
1156 }
1157
1158 /// get all characters before the last occurence of ch
1159 /// (returns empty string if ch not found)
1160 wxString wxString::BeforeLast(wxUniChar ch) const
1161 {
1162   wxString str;
1163   int iPos = Find(ch, true);
1164   if ( iPos != wxNOT_FOUND && iPos != 0 )
1165     str = wxString(c_str(), iPos);
1166
1167   return str;
1168 }
1169
1170 /// get all characters after the first occurence of ch
1171 /// (returns empty string if ch not found)
1172 wxString wxString::AfterFirst(wxUniChar ch) const
1173 {
1174   wxString str;
1175   int iPos = Find(ch);
1176   if ( iPos != wxNOT_FOUND )
1177     str = wx_str() + iPos + 1;
1178
1179   return str;
1180 }
1181
1182 // replace first (or all) occurences of some substring with another one
1183 size_t wxString::Replace(const wxString& strOld,
1184                          const wxString& strNew, bool bReplaceAll)
1185 {
1186     // if we tried to replace an empty string we'd enter an infinite loop below
1187     wxCHECK_MSG( !strOld.empty(), 0,
1188                  _T("wxString::Replace(): invalid parameter") );
1189
1190     size_t uiCount = 0;   // count of replacements made
1191
1192     size_t uiOldLen = strOld.length();
1193     size_t uiNewLen = strNew.length();
1194
1195     size_t dwPos = 0;
1196
1197     while ( (*this)[dwPos] != wxT('\0') )
1198     {
1199         //DO NOT USE STRSTR HERE
1200         //this string can contain embedded null characters,
1201         //so strstr will function incorrectly
1202         dwPos = find(strOld, dwPos);
1203         if ( dwPos == npos )
1204             break;                  // exit the loop
1205         else
1206         {
1207             //replace this occurance of the old string with the new one
1208             replace(dwPos, uiOldLen, strNew, uiNewLen);
1209
1210             //move up pos past the string that was replaced
1211             dwPos += uiNewLen;
1212
1213             //increase replace count
1214             ++uiCount;
1215
1216             // stop now?
1217             if ( !bReplaceAll )
1218                 break;                  // exit the loop
1219         }
1220     }
1221
1222     return uiCount;
1223 }
1224
1225 bool wxString::IsAscii() const
1226 {
1227     for ( const_iterator i = begin(); i != end(); ++i )
1228     {
1229         if ( !(*i).IsAscii() )
1230             return false;
1231     }
1232
1233     return true;
1234 }
1235
1236 bool wxString::IsWord() const
1237 {
1238     for ( const_iterator i = begin(); i != end(); ++i )
1239     {
1240         if ( !wxIsalpha(*i) )
1241             return false;
1242     }
1243
1244     return true;
1245 }
1246
1247 bool wxString::IsNumber() const
1248 {
1249     if ( empty() )
1250         return true;
1251
1252     const_iterator i = begin();
1253
1254     if ( *i == _T('-') || *i == _T('+') )
1255         ++i;
1256
1257     for ( ; i != end(); ++i )
1258     {
1259         if ( !wxIsdigit(*i) )
1260             return false;
1261     }
1262
1263     return true;
1264 }
1265
1266 wxString wxString::Strip(stripType w) const
1267 {
1268     wxString s = *this;
1269     if ( w & leading ) s.Trim(false);
1270     if ( w & trailing ) s.Trim(true);
1271     return s;
1272 }
1273
1274 // ---------------------------------------------------------------------------
1275 // case conversion
1276 // ---------------------------------------------------------------------------
1277
1278 wxString& wxString::MakeUpper()
1279 {
1280   for ( iterator it = begin(), en = end(); it != en; ++it )
1281     *it = (wxChar)wxToupper(*it);
1282
1283   return *this;
1284 }
1285
1286 wxString& wxString::MakeLower()
1287 {
1288   for ( iterator it = begin(), en = end(); it != en; ++it )
1289     *it = (wxChar)wxTolower(*it);
1290
1291   return *this;
1292 }
1293
1294 // ---------------------------------------------------------------------------
1295 // trimming and padding
1296 // ---------------------------------------------------------------------------
1297
1298 // some compilers (VC++ 6.0 not to name them) return true for a call to
1299 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1300 // live with this by checking that the character is a 7 bit one - even if this
1301 // may fail to detect some spaces (I don't know if Unicode doesn't have
1302 // space-like symbols somewhere except in the first 128 chars), it is arguably
1303 // still better than trimming away accented letters
1304 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1305
1306 // trims spaces (in the sense of isspace) from left or right side
1307 wxString& wxString::Trim(bool bFromRight)
1308 {
1309     // first check if we're going to modify the string at all
1310     if ( !empty() &&
1311          (
1312           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1313           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1314          )
1315        )
1316     {
1317         if ( bFromRight )
1318         {
1319             // find last non-space character
1320             reverse_iterator psz = rbegin();
1321             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1322                 psz++;
1323
1324             // truncate at trailing space start
1325             erase(psz.base(), end());
1326         }
1327         else
1328         {
1329             // find first non-space character
1330             iterator psz = begin();
1331             while ( (psz != end()) && wxSafeIsspace(*psz) )
1332                 psz++;
1333
1334             // fix up data and length
1335             erase(begin(), psz);
1336         }
1337     }
1338
1339     return *this;
1340 }
1341
1342 // adds nCount characters chPad to the string from either side
1343 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1344 {
1345     wxString s(chPad, nCount);
1346
1347     if ( bFromRight )
1348         *this += s;
1349     else
1350     {
1351         s += *this;
1352         swap(s);
1353     }
1354
1355     return *this;
1356 }
1357
1358 // truncate the string
1359 wxString& wxString::Truncate(size_t uiLen)
1360 {
1361     if ( uiLen < length() )
1362     {
1363         erase(begin() + uiLen, end());
1364     }
1365     //else: nothing to do, string is already short enough
1366
1367     return *this;
1368 }
1369
1370 // ---------------------------------------------------------------------------
1371 // finding (return wxNOT_FOUND if not found and index otherwise)
1372 // ---------------------------------------------------------------------------
1373
1374 // find a character
1375 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1376 {
1377     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1378
1379     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1380 }
1381
1382 // ----------------------------------------------------------------------------
1383 // conversion to numbers
1384 // ----------------------------------------------------------------------------
1385
1386 // The implementation of all the functions below is exactly the same so factor
1387 // it out. Note that number extraction works correctly on UTF-8 strings, so
1388 // we can use wxStringCharType and wx_str() for maximum efficiency.
1389
1390 template <typename T>
1391 bool wxStringToIntType(const wxStringCharType *start,
1392                        T *val,
1393                        int base,
1394                        T (*func)(const wxStringCharType*, wxStringCharType**, int))
1395 {
1396     wxCHECK_MSG( val, false, _T("NULL output pointer") );
1397     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1398
1399 #ifndef __WXWINCE__
1400     errno = 0;
1401 #endif
1402
1403     wxStringCharType *end;
1404     *val = (*func)(start, &end, base);
1405
1406     // return true only if scan was stopped by the terminating NUL and if the
1407     // string was not empty to start with and no under/overflow occurred
1408     return !*end && (end != start)
1409 #ifndef __WXWINCE__
1410         && (errno != ERANGE)
1411 #endif
1412     ;
1413 }
1414
1415 bool wxString::ToLong(long *val, int base) const
1416 {
1417     return wxStringToIntType(wx_str(), val, base, wxStrtol);
1418 }
1419
1420 bool wxString::ToULong(unsigned long *val, int base) const
1421 {
1422     return wxStringToIntType(wx_str(), val, base, wxStrtoul);
1423 }
1424
1425 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1426 {
1427     return wxStringToIntType(wx_str(), val, base, wxStrtoll);
1428 }
1429
1430 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1431 {
1432     return wxStringToIntType(wx_str(), val, base, wxStrtoull);
1433 }
1434
1435 bool wxString::ToDouble(double *val) const
1436 {
1437     wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1438
1439 #ifndef __WXWINCE__
1440     errno = 0;
1441 #endif
1442
1443     const wxChar *start = c_str();
1444     wxChar *end;
1445     *val = wxStrtod(start, &end);
1446
1447     // return true only if scan was stopped by the terminating NUL and if the
1448     // string was not empty to start with and no under/overflow occurred
1449     return !*end && (end != start)
1450 #ifndef __WXWINCE__
1451         && (errno != ERANGE)
1452 #endif
1453     ;
1454 }
1455
1456 // ---------------------------------------------------------------------------
1457 // formatted output
1458 // ---------------------------------------------------------------------------
1459
1460 #if !wxUSE_UTF8_LOCALE_ONLY
1461 /* static */
1462 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1463 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1464 #else
1465 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1466 #endif
1467 {
1468     va_list argptr;
1469     va_start(argptr, format);
1470
1471     wxString s;
1472     s.PrintfV(format, argptr);
1473
1474     va_end(argptr);
1475
1476     return s;
1477 }
1478 #endif // !wxUSE_UTF8_LOCALE_ONLY
1479
1480 #if wxUSE_UNICODE_UTF8
1481 /* static */
1482 wxString wxString::DoFormatUtf8(const char *format, ...)
1483 {
1484     va_list argptr;
1485     va_start(argptr, format);
1486
1487     wxString s;
1488     s.PrintfV(format, argptr);
1489
1490     va_end(argptr);
1491
1492     return s;
1493 }
1494 #endif // wxUSE_UNICODE_UTF8
1495
1496 /* static */
1497 wxString wxString::FormatV(const wxString& format, va_list argptr)
1498 {
1499     wxString s;
1500     s.PrintfV(format, argptr);
1501     return s;
1502 }
1503
1504 #if !wxUSE_UTF8_LOCALE_ONLY
1505 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1506 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1507 #else
1508 int wxString::DoPrintfWchar(const wxChar *format, ...)
1509 #endif
1510 {
1511     va_list argptr;
1512     va_start(argptr, format);
1513
1514 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1515     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1516     // because it's the only cast that works safely for downcasting when
1517     // multiple inheritance is used:
1518     wxString *str = static_cast<wxString*>(this);
1519 #else
1520     wxString *str = this;
1521 #endif
1522
1523     int iLen = str->PrintfV(format, argptr);
1524
1525     va_end(argptr);
1526
1527     return iLen;
1528 }
1529 #endif // !wxUSE_UTF8_LOCALE_ONLY
1530
1531 #if wxUSE_UNICODE_UTF8
1532 int wxString::DoPrintfUtf8(const char *format, ...)
1533 {
1534     va_list argptr;
1535     va_start(argptr, format);
1536
1537     int iLen = PrintfV(format, argptr);
1538
1539     va_end(argptr);
1540
1541     return iLen;
1542 }
1543 #endif // wxUSE_UNICODE_UTF8
1544
1545 #if wxUSE_UNICODE_UTF8
1546 template<typename BufferType>
1547 #else
1548 // we only need one version in non-UTF8 builds and at least two Windows
1549 // compilers have problems with this function template, so use just one
1550 // normal function here
1551 #endif
1552 static int DoStringPrintfV(wxString& str,
1553                            const wxString& format, va_list argptr)
1554 {
1555     int size = 1024;
1556
1557     for ( ;; )
1558     {
1559 #if wxUSE_UNICODE_UTF8
1560         BufferType tmp(str, size + 1);
1561         typename BufferType::CharType *buf = tmp;
1562 #else
1563         wxStringBuffer tmp(str, size + 1);
1564         wxChar *buf = tmp;
1565 #endif
1566
1567         if ( !buf )
1568         {
1569             // out of memory
1570
1571             // in UTF-8 build, leaving uninitialized junk in the buffer
1572             // could result in invalid non-empty UTF-8 string, so just
1573             // reset the string to empty on failure:
1574             buf[0] = '\0';
1575             return -1;
1576         }
1577
1578         // wxVsnprintf() may modify the original arg pointer, so pass it
1579         // only a copy
1580         va_list argptrcopy;
1581         wxVaCopy(argptrcopy, argptr);
1582         int len = wxVsnprintf(buf, size, format, argptrcopy);
1583         va_end(argptrcopy);
1584
1585         // some implementations of vsnprintf() don't NUL terminate
1586         // the string if there is not enough space for it so
1587         // always do it manually
1588         buf[size] = _T('\0');
1589
1590         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1591         // total number of characters which would have been written if the
1592         // buffer were large enough (newer standards such as Unix98)
1593         if ( len < 0 )
1594         {
1595             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1596             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1597             //     is true if *both* of them use our own implementation,
1598             //     otherwise we can't be sure
1599 #if wxUSE_WXVSNPRINTF
1600             // we know that our own implementation of wxVsnprintf() returns -1
1601             // only for a format error - thus there's something wrong with
1602             // the user's format string
1603             buf[0] = '\0';
1604             return -1;
1605 #else // possibly using system version
1606             // assume it only returns error if there is not enough space, but
1607             // as we don't know how much we need, double the current size of
1608             // the buffer
1609             size *= 2;
1610 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1611         }
1612         else if ( len >= size )
1613         {
1614 #if wxUSE_WXVSNPRINTF
1615             // we know that our own implementation of wxVsnprintf() returns
1616             // size+1 when there's not enough space but that's not the size
1617             // of the required buffer!
1618             size *= 2;      // so we just double the current size of the buffer
1619 #else
1620             // some vsnprintf() implementations NUL-terminate the buffer and
1621             // some don't in len == size case, to be safe always add 1
1622             size = len + 1;
1623 #endif
1624         }
1625         else // ok, there was enough space
1626         {
1627             break;
1628         }
1629     }
1630
1631     // we could have overshot
1632     str.Shrink();
1633
1634     return str.length();
1635 }
1636
1637 int wxString::PrintfV(const wxString& format, va_list argptr)
1638 {
1639 #if wxUSE_UNICODE_UTF8
1640     #if wxUSE_STL_BASED_WXSTRING
1641         typedef wxStringTypeBuffer<char> Utf8Buffer;
1642     #else
1643         typedef wxImplStringBuffer Utf8Buffer;
1644     #endif
1645 #endif
1646
1647 #if wxUSE_UTF8_LOCALE_ONLY
1648     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1649 #else
1650     #if wxUSE_UNICODE_UTF8
1651     if ( wxLocaleIsUtf8 )
1652         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1653     else
1654         // wxChar* version
1655         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1656     #else
1657         return DoStringPrintfV(*this, format, argptr);
1658     #endif // UTF8/WCHAR
1659 #endif
1660 }
1661
1662 // ----------------------------------------------------------------------------
1663 // misc other operations
1664 // ----------------------------------------------------------------------------
1665
1666 // returns true if the string matches the pattern which may contain '*' and
1667 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1668 // of them)
1669 bool wxString::Matches(const wxString& mask) const
1670 {
1671     // I disable this code as it doesn't seem to be faster (in fact, it seems
1672     // to be much slower) than the old, hand-written code below and using it
1673     // here requires always linking with libregex even if the user code doesn't
1674     // use it
1675 #if 0 // wxUSE_REGEX
1676     // first translate the shell-like mask into a regex
1677     wxString pattern;
1678     pattern.reserve(wxStrlen(pszMask));
1679
1680     pattern += _T('^');
1681     while ( *pszMask )
1682     {
1683         switch ( *pszMask )
1684         {
1685             case _T('?'):
1686                 pattern += _T('.');
1687                 break;
1688
1689             case _T('*'):
1690                 pattern += _T(".*");
1691                 break;
1692
1693             case _T('^'):
1694             case _T('.'):
1695             case _T('$'):
1696             case _T('('):
1697             case _T(')'):
1698             case _T('|'):
1699             case _T('+'):
1700             case _T('\\'):
1701                 // these characters are special in a RE, quote them
1702                 // (however note that we don't quote '[' and ']' to allow
1703                 // using them for Unix shell like matching)
1704                 pattern += _T('\\');
1705                 // fall through
1706
1707             default:
1708                 pattern += *pszMask;
1709         }
1710
1711         pszMask++;
1712     }
1713     pattern += _T('$');
1714
1715     // and now use it
1716     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1717 #else // !wxUSE_REGEX
1718   // TODO: this is, of course, awfully inefficient...
1719
1720   // FIXME-UTF8: implement using iterators, remove #if
1721 #if wxUSE_UNICODE_UTF8
1722   wxWCharBuffer maskBuf = mask.wc_str();
1723   wxWCharBuffer txtBuf = wc_str();
1724   const wxChar *pszMask = maskBuf.data();
1725   const wxChar *pszTxt = txtBuf.data();
1726 #else
1727   const wxChar *pszMask = mask.wx_str();
1728   // the char currently being checked
1729   const wxChar *pszTxt = wx_str();
1730 #endif
1731
1732   // the last location where '*' matched
1733   const wxChar *pszLastStarInText = NULL;
1734   const wxChar *pszLastStarInMask = NULL;
1735
1736 match:
1737   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1738     switch ( *pszMask ) {
1739       case wxT('?'):
1740         if ( *pszTxt == wxT('\0') )
1741           return false;
1742
1743         // pszTxt and pszMask will be incremented in the loop statement
1744
1745         break;
1746
1747       case wxT('*'):
1748         {
1749           // remember where we started to be able to backtrack later
1750           pszLastStarInText = pszTxt;
1751           pszLastStarInMask = pszMask;
1752
1753           // ignore special chars immediately following this one
1754           // (should this be an error?)
1755           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1756             pszMask++;
1757
1758           // if there is nothing more, match
1759           if ( *pszMask == wxT('\0') )
1760             return true;
1761
1762           // are there any other metacharacters in the mask?
1763           size_t uiLenMask;
1764           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1765
1766           if ( pEndMask != NULL ) {
1767             // we have to match the string between two metachars
1768             uiLenMask = pEndMask - pszMask;
1769           }
1770           else {
1771             // we have to match the remainder of the string
1772             uiLenMask = wxStrlen(pszMask);
1773           }
1774
1775           wxString strToMatch(pszMask, uiLenMask);
1776           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1777           if ( pMatch == NULL )
1778             return false;
1779
1780           // -1 to compensate "++" in the loop
1781           pszTxt = pMatch + uiLenMask - 1;
1782           pszMask += uiLenMask - 1;
1783         }
1784         break;
1785
1786       default:
1787         if ( *pszMask != *pszTxt )
1788           return false;
1789         break;
1790     }
1791   }
1792
1793   // match only if nothing left
1794   if ( *pszTxt == wxT('\0') )
1795     return true;
1796
1797   // if we failed to match, backtrack if we can
1798   if ( pszLastStarInText ) {
1799     pszTxt = pszLastStarInText + 1;
1800     pszMask = pszLastStarInMask;
1801
1802     pszLastStarInText = NULL;
1803
1804     // don't bother resetting pszLastStarInMask, it's unnecessary
1805
1806     goto match;
1807   }
1808
1809   return false;
1810 #endif // wxUSE_REGEX/!wxUSE_REGEX
1811 }
1812
1813 // Count the number of chars
1814 int wxString::Freq(wxUniChar ch) const
1815 {
1816     int count = 0;
1817     for ( const_iterator i = begin(); i != end(); ++i )
1818     {
1819         if ( *i == ch )
1820             count ++;
1821     }
1822     return count;
1823 }
1824
1825 // convert to upper case, return the copy of the string
1826 wxString wxString::Upper() const
1827 { wxString s(*this); return s.MakeUpper(); }
1828
1829 // convert to lower case, return the copy of the string
1830 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }