src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #ifdef __SALFORDC__
  39     #include <clib.h>
  40 #endif
  41
  42 #include "wx/hashmap.h"
  43
  44 // string handling functions used by wxString:
  45 #if wxUSE_UNICODE_UTF8
  46     #define wxStringMemcpy   memcpy
  47     #define wxStringMemcmp   memcmp
  48     #define wxStringMemchr   memchr
  49     #define wxStringStrlen   strlen
  50 #else
  51     #define wxStringMemcpy   wxTmemcpy
  52     #define wxStringMemcmp   wxTmemcmp
  53     #define wxStringMemchr   wxTmemchr
  54     #define wxStringStrlen   wxStrlen
  55 #endif
  56
  57
  58 // ---------------------------------------------------------------------------
  59 // static class variables definition
  60 // ---------------------------------------------------------------------------
  61
  62 //According to STL _must_ be a -1 size_t
  63 const size_t wxString::npos = (size_t) -1;
  64
  65 // ----------------------------------------------------------------------------
  66 // global functions
  67 // ----------------------------------------------------------------------------
  68
  69 #if wxUSE_STD_IOSTREAM
  70
  71 #include <iostream>
  72
  73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
  74 {
  75 // FIXME-UTF8: always, not only if wxUSE_UNICODE
  76 #if wxUSE_UNICODE && !defined(__BORLANDC__)
  77     return os << (const wchar_t*)str.AsWCharBuf();
  78 #else
  79     return os << (const char*)str.AsCharBuf();
  80 #endif
  81 }
  82
  83 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
  84 {
  85     return os << str.c_str();
  86 }
  87
  88 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
  89 {
  90     return os << str.data();
  91 }
  92
  93 #ifndef __BORLANDC__
  94 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
  95 {
  96     return os << str.data();
  97 }
  98 #endif
  99
 100 #endif // wxUSE_STD_IOSTREAM
 101
 102 // ===========================================================================
 103 // wxString class core
 104 // ===========================================================================
 105
 106 #if wxUSE_UNICODE_UTF8
 107
 108 void wxString::PosLenToImpl(size_t pos, size_t len,
 109                             size_t *implPos, size_t *implLen) const
 110 {
 111     if ( pos == npos )
 112         *implPos = npos;
 113     else
 114     {
 115         const_iterator i = begin() + pos;
 116         *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
 117         if ( len == npos )
 118             *implLen = npos;
 119         else
 120         {
 121             // too large length is interpreted as "to the end of the string"
 122             // FIXME-UTF8: verify this is the case in std::string, assert
 123             // otherwise
 124             if ( pos + len > length() )
 125                 len = length() - pos;
 126
 127             *implLen = (i + len).impl() - i.impl();
 128         }
 129     }
 130 }
 131
 132 #endif // wxUSE_UNICODE_UTF8
 133
 134 // ----------------------------------------------------------------------------
 135 // wxCStrData converted strings caching
 136 // ----------------------------------------------------------------------------
 137
 138 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 139 //             string objects; re-enable after fixing this bug and benchmarking
 140 //             performance to see if using a hash is a good idea at all
 141 #if 0
 142
 143 // For backward compatibility reasons, it must be possible to assign the value
 144 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 145 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 146 // because the memory would be freed immediately, but it has to be valid as long
 147 // as the string is not modified, so that code like this still works:
 148 //
 149 // const wxChar *s = str.c_str();
 150 // while ( s ) { ... }
 151
 152 // FIXME-UTF8: not thread safe!
 153 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 154 //             destroyed, but we should do it when the string is modified, to
 155 //             keep memory usage down
 156 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 157 //             invalidated the cache on every change, we could keep the previous
 158 //             conversion
 159 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 160 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 161
 162 template<typename T>
 163 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 164 {
 165     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 166     if ( i != hash.end() )
 167     {
 168         free(i->second);
 169         hash.erase(i);
 170     }
 171 }
 172
 173 #if wxUSE_UNICODE
 174 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 175 //     so we have to use wxString* here and const-cast when used
 176 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 177                     wxStringCharConversionCache);
 178 static wxStringCharConversionCache gs_stringsCharCache;
 179
 180 const char* wxCStrData::AsChar() const
 181 {
 182     // remove previously cache value, if any (see FIXMEs above):
 183     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 184
 185     // convert the string and keep it:
 186     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 187         m_str->mb_str().release();
 188
 189     return s + m_offset;
 190 }
 191 #endif // wxUSE_UNICODE
 192
 193 #if !wxUSE_UNICODE_WCHAR
 194 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 195                     wxStringWCharConversionCache);
 196 static wxStringWCharConversionCache gs_stringsWCharCache;
 197
 198 const wchar_t* wxCStrData::AsWChar() const
 199 {
 200     // remove previously cache value, if any (see FIXMEs above):
 201     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 202
 203     // convert the string and keep it:
 204     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 205         m_str->wc_str().release();
 206
 207     return s + m_offset;
 208 }
 209 #endif // !wxUSE_UNICODE_WCHAR
 210
 211 wxString::~wxString()
 212 {
 213 #if wxUSE_UNICODE
 214     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 215     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 216 #endif
 217 #if !wxUSE_UNICODE_WCHAR
 218     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 219 #endif
 220 }
 221 #endif
 222
 223 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 224 const char* wxCStrData::AsChar() const
 225 {
 226 #if wxUSE_UNICODE_UTF8
 227     if ( wxLocaleIsUtf8 )
 228         return AsInternal();
 229 #endif
 230     // under non-UTF8 locales, we have to convert the internal UTF-8
 231     // representation using wxConvLibc and cache the result
 232
 233     wxString *str = wxConstCast(m_str, wxString);
 234
 235     // convert the string:
 236     wxCharBuffer buf(str->mb_str());
 237
 238     // FIXME-UTF8: do the conversion in-place in the existing buffer
 239     if ( str->m_convertedToChar &&
 240          strlen(buf) == strlen(str->m_convertedToChar) )
 241     {
 242         // keep the same buffer for as long as possible, so that several calls
 243         // to c_str() in a row still work:
 244         strcpy(str->m_convertedToChar, buf);
 245     }
 246     else
 247     {
 248         str->m_convertedToChar = buf.release();
 249     }
 250
 251     // and keep it:
 252     return str->m_convertedToChar + m_offset;
 253 }
 254 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 255
 256 #if !wxUSE_UNICODE_WCHAR
 257 const wchar_t* wxCStrData::AsWChar() const
 258 {
 259     wxString *str = wxConstCast(m_str, wxString);
 260
 261     // convert the string:
 262     wxWCharBuffer buf(str->wc_str());
 263
 264     // FIXME-UTF8: do the conversion in-place in the existing buffer
 265     if ( str->m_convertedToWChar &&
 266          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 267     {
 268         // keep the same buffer for as long as possible, so that several calls
 269         // to c_str() in a row still work:
 270         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 271     }
 272     else
 273     {
 274         str->m_convertedToWChar = buf.release();
 275     }
 276
 277     // and keep it:
 278     return str->m_convertedToWChar + m_offset;
 279 }
 280 #endif // !wxUSE_UNICODE_WCHAR
 281
 282 // ===========================================================================
 283 // wxString class core
 284 // ===========================================================================
 285
 286 // ---------------------------------------------------------------------------
 287 // construction and conversion
 288 // ---------------------------------------------------------------------------
 289
 290 #if wxUSE_UNICODE_WCHAR
 291 /* static */
 292 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 293                                                const wxMBConv& conv)
 294 {
 295     // anything to do?
 296     if ( !psz || nLength == 0 )
 297         return SubstrBufFromMB(L"", 0);
 298
 299     if ( nLength == npos )
 300         nLength = wxNO_LEN;
 301
 302     size_t wcLen;
 303     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 304     if ( !wcLen )
 305         return SubstrBufFromMB(_T(""), 0);
 306     else
 307         return SubstrBufFromMB(wcBuf, wcLen);
 308 }
 309 #endif // wxUSE_UNICODE_WCHAR
 310
 311 #if wxUSE_UNICODE_UTF8
 312 /* static */
 313 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 314                                                const wxMBConv& conv)
 315 {
 316     // anything to do?
 317     if ( !psz || nLength == 0 )
 318         return SubstrBufFromMB("", 0);
 319
 320     // if psz is already in UTF-8, we don't have to do the roundtrip to
 321     // wchar_t* and back:
 322     if ( conv.IsUTF8() )
 323     {
 324         // we need to validate the input because UTF8 iterators assume valid
 325         // UTF-8 sequence and psz may be invalid:
 326         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 327         {
 328             return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
 329         }
 330         // else: do the roundtrip through wchar_t*
 331     }
 332
 333     if ( nLength == npos )
 334         nLength = wxNO_LEN;
 335
 336     // first convert to wide string:
 337     size_t wcLen;
 338     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 339     if ( !wcLen )
 340         return SubstrBufFromMB("", 0);
 341
 342     // and then to UTF-8:
 343     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvUTF8()));
 344     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 345     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 346
 347     return buf;
 348 }
 349 #endif // wxUSE_UNICODE_UTF8
 350
 351 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 352 /* static */
 353 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 354                                                const wxMBConv& conv)
 355 {
 356     // anything to do?
 357     if ( !pwz || nLength == 0 )
 358         return SubstrBufFromWC("", 0);
 359
 360     if ( nLength == npos )
 361         nLength = wxNO_LEN;
 362
 363     size_t mbLen;
 364     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 365     if ( !mbLen )
 366         return SubstrBufFromWC("", 0);
 367     else
 368         return SubstrBufFromWC(mbBuf, mbLen);
 369 }
 370 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 371
 372
 373 #if wxUSE_UNICODE_WCHAR
 374
 375 //Convert wxString in Unicode mode to a multi-byte string
 376 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 377 {
 378     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 379 }
 380
 381 #elif wxUSE_UNICODE_UTF8
 382
 383 const wxWCharBuffer wxString::wc_str() const
 384 {
 385     return wxMBConvUTF8().cMB2WC(m_impl.c_str(),
 386                                  m_impl.length() + 1 /* size, not length */,
 387                                  NULL);
 388 }
 389
 390 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 391 {
 392     if ( conv.IsUTF8() )
 393         return wxCharBuffer::CreateNonOwned(m_impl.c_str());
 394
 395     // FIXME-UTF8: use wc_str() here once we have buffers with length
 396
 397     size_t wcLen;
 398     wxWCharBuffer wcBuf(
 399             wxMBConvUTF8().cMB2WC(m_impl.c_str(),
 400                                   m_impl.length() + 1 /* size, not length */,
 401                                   &wcLen));
 402     if ( !wcLen )
 403         return wxCharBuffer("");
 404
 405     return conv.cWC2MB(wcBuf, wcLen, NULL);
 406 }
 407
 408 #else // ANSI
 409
 410 //Converts this string to a wide character string if unicode
 411 //mode is not enabled and wxUSE_WCHAR_T is enabled
 412 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 413 {
 414     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 415 }
 416
 417 #endif // Unicode/ANSI
 418
 419 // shrink to minimal size (releasing extra memory)
 420 bool wxString::Shrink()
 421 {
 422   wxString tmp(begin(), end());
 423   swap(tmp);
 424   return tmp.length() == length();
 425 }
 426
 427 // deprecated compatibility code:
 428 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 429 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 430 {
 431     return DoGetWriteBuf(nLen);
 432 }
 433
 434 void wxString::UngetWriteBuf()
 435 {
 436     DoUngetWriteBuf();
 437 }
 438
 439 void wxString::UngetWriteBuf(size_t nLen)
 440 {
 441     DoUngetWriteBuf(nLen);
 442 }
 443 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 444
 445
 446 // ---------------------------------------------------------------------------
 447 // data access
 448 // ---------------------------------------------------------------------------
 449
 450 // all functions are inline in string.h
 451
 452 // ---------------------------------------------------------------------------
 453 // concatenation operators
 454 // ---------------------------------------------------------------------------
 455
 456 /*
 457  * concatenation functions come in 5 flavours:
 458  *  string + string
 459  *  char   + string      and      string + char
 460  *  C str  + string      and      string + C str
 461  */
 462
 463 wxString operator+(const wxString& str1, const wxString& str2)
 464 {
 465 #if !wxUSE_STL_BASED_WXSTRING
 466     wxASSERT( str1.IsValid() );
 467     wxASSERT( str2.IsValid() );
 468 #endif
 469
 470     wxString s = str1;
 471     s += str2;
 472
 473     return s;
 474 }
 475
 476 wxString operator+(const wxString& str, wxUniChar ch)
 477 {
 478 #if !wxUSE_STL_BASED_WXSTRING
 479     wxASSERT( str.IsValid() );
 480 #endif
 481
 482     wxString s = str;
 483     s += ch;
 484
 485     return s;
 486 }
 487
 488 wxString operator+(wxUniChar ch, const wxString& str)
 489 {
 490 #if !wxUSE_STL_BASED_WXSTRING
 491     wxASSERT( str.IsValid() );
 492 #endif
 493
 494     wxString s = ch;
 495     s += str;
 496
 497     return s;
 498 }
 499
 500 wxString operator+(const wxString& str, const char *psz)
 501 {
 502 #if !wxUSE_STL_BASED_WXSTRING
 503     wxASSERT( str.IsValid() );
 504 #endif
 505
 506     wxString s;
 507     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 508         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 509     }
 510     s += str;
 511     s += psz;
 512
 513     return s;
 514 }
 515
 516 wxString operator+(const wxString& str, const wchar_t *pwz)
 517 {
 518 #if !wxUSE_STL_BASED_WXSTRING
 519     wxASSERT( str.IsValid() );
 520 #endif
 521
 522     wxString s;
 523     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 524         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 525     }
 526     s += str;
 527     s += pwz;
 528
 529     return s;
 530 }
 531
 532 wxString operator+(const char *psz, const wxString& str)
 533 {
 534 #if !wxUSE_STL_BASED_WXSTRING
 535     wxASSERT( str.IsValid() );
 536 #endif
 537
 538     wxString s;
 539     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 540         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 541     }
 542     s = psz;
 543     s += str;
 544
 545     return s;
 546 }
 547
 548 wxString operator+(const wchar_t *pwz, const wxString& str)
 549 {
 550 #if !wxUSE_STL_BASED_WXSTRING
 551     wxASSERT( str.IsValid() );
 552 #endif
 553
 554     wxString s;
 555     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 556         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 557     }
 558     s = pwz;
 559     s += str;
 560
 561     return s;
 562 }
 563
 564 // ---------------------------------------------------------------------------
 565 // string comparison
 566 // ---------------------------------------------------------------------------
 567
 568 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 569 {
 570     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 571                                : wxToupper(GetChar(0u)) == wxToupper(c));
 572 }
 573
 574 #ifdef HAVE_STD_STRING_COMPARE
 575
 576 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 577 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 578 //     sort strings in characters code point order by sorting the byte sequence
 579 //     in byte values order (i.e. what strcmp() and memcmp() do).
 580
 581 int wxString::compare(const wxString& str) const
 582 {
 583     return m_impl.compare(str.m_impl);
 584 }
 585
 586 int wxString::compare(size_t nStart, size_t nLen,
 587                       const wxString& str) const
 588 {
 589     size_t pos, len;
 590     PosLenToImpl(nStart, nLen, &pos, &len);
 591     return m_impl.compare(pos, len, str.m_impl);
 592 }
 593
 594 int wxString::compare(size_t nStart, size_t nLen,
 595                       const wxString& str,
 596                       size_t nStart2, size_t nLen2) const
 597 {
 598     size_t pos, len;
 599     PosLenToImpl(nStart, nLen, &pos, &len);
 600
 601     size_t pos2, len2;
 602     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 603
 604     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 605 }
 606
 607 int wxString::compare(const char* sz) const
 608 {
 609     return m_impl.compare(ImplStr(sz));
 610 }
 611
 612 int wxString::compare(const wchar_t* sz) const
 613 {
 614     return m_impl.compare(ImplStr(sz));
 615 }
 616
 617 int wxString::compare(size_t nStart, size_t nLen,
 618                       const char* sz, size_t nCount) const
 619 {
 620     size_t pos, len;
 621     PosLenToImpl(nStart, nLen, &pos, &len);
 622
 623     SubstrBufFromMB str(ImplStr(sz, nCount));
 624
 625     return m_impl.compare(pos, len, str.data, str.len);
 626 }
 627
 628 int wxString::compare(size_t nStart, size_t nLen,
 629                       const wchar_t* sz, size_t nCount) const
 630 {
 631     size_t pos, len;
 632     PosLenToImpl(nStart, nLen, &pos, &len);
 633
 634     SubstrBufFromWC str(ImplStr(sz, nCount));
 635
 636     return m_impl.compare(pos, len, str.data, str.len);
 637 }
 638
 639 #else // !HAVE_STD_STRING_COMPARE
 640
 641 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 642                           const wxStringCharType* s2, size_t l2)
 643 {
 644     if( l1 == l2 )
 645         return wxStringMemcmp(s1, s2, l1);
 646     else if( l1 < l2 )
 647     {
 648         int ret = wxStringMemcmp(s1, s2, l1);
 649         return ret == 0 ? -1 : ret;
 650     }
 651     else
 652     {
 653         int ret = wxStringMemcmp(s1, s2, l2);
 654         return ret == 0 ? +1 : ret;
 655     }
 656 }
 657
 658 int wxString::compare(const wxString& str) const
 659 {
 660     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 661                      str.m_impl.data(), str.m_impl.length());
 662 }
 663
 664 int wxString::compare(size_t nStart, size_t nLen,
 665                       const wxString& str) const
 666 {
 667     wxASSERT(nStart <= length());
 668     size_type strLen = length() - nStart;
 669     nLen = strLen < nLen ? strLen : nLen;
 670
 671     size_t pos, len;
 672     PosLenToImpl(nStart, nLen, &pos, &len);
 673
 674     return ::wxDoCmp(m_impl.data() + pos,  len,
 675                      str.m_impl.data(), str.m_impl.length());
 676 }
 677
 678 int wxString::compare(size_t nStart, size_t nLen,
 679                       const wxString& str,
 680                       size_t nStart2, size_t nLen2) const
 681 {
 682     wxASSERT(nStart <= length());
 683     wxASSERT(nStart2 <= str.length());
 684     size_type strLen  =     length() - nStart,
 685               strLen2 = str.length() - nStart2;
 686     nLen  = strLen  < nLen  ? strLen  : nLen;
 687     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 688
 689     size_t pos, len;
 690     PosLenToImpl(nStart, nLen, &pos, &len);
 691     size_t pos2, len2;
 692     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 693
 694     return ::wxDoCmp(m_impl.data() + pos, len,
 695                      str.m_impl.data() + pos2, len2);
 696 }
 697
 698 int wxString::compare(const char* sz) const
 699 {
 700     SubstrBufFromMB str(ImplStr(sz, npos));
 701     if ( str.len == npos )
 702         str.len = wxStringStrlen(str.data);
 703     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 704 }
 705
 706 int wxString::compare(const wchar_t* sz) const
 707 {
 708     SubstrBufFromWC str(ImplStr(sz, npos));
 709     if ( str.len == npos )
 710         str.len = wxStringStrlen(str.data);
 711     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 712 }
 713
 714 int wxString::compare(size_t nStart, size_t nLen,
 715                       const char* sz, size_t nCount) const
 716 {
 717     wxASSERT(nStart <= length());
 718     size_type strLen = length() - nStart;
 719     nLen = strLen < nLen ? strLen : nLen;
 720
 721     size_t pos, len;
 722     PosLenToImpl(nStart, nLen, &pos, &len);
 723
 724     SubstrBufFromMB str(ImplStr(sz, nCount));
 725     if ( str.len == npos )
 726         str.len = wxStringStrlen(str.data);
 727
 728     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 729 }
 730
 731 int wxString::compare(size_t nStart, size_t nLen,
 732                       const wchar_t* sz, size_t nCount) const
 733 {
 734     wxASSERT(nStart <= length());
 735     size_type strLen = length() - nStart;
 736     nLen = strLen < nLen ? strLen : nLen;
 737
 738     size_t pos, len;
 739     PosLenToImpl(nStart, nLen, &pos, &len);
 740
 741     SubstrBufFromWC str(ImplStr(sz, nCount));
 742     if ( str.len == npos )
 743         str.len = wxStringStrlen(str.data);
 744
 745     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 746 }
 747
 748 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 749
 750
 751 // ---------------------------------------------------------------------------
 752 // find_{first,last}_[not]_of functions
 753 // ---------------------------------------------------------------------------
 754
 755 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 756
 757 // NB: All these functions are implemented  with the argument being wxChar*,
 758 //     i.e. widechar string in any Unicode build, even though native string
 759 //     representation is char* in the UTF-8 build. This is because we couldn't
 760 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 761
 762 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 763 {
 764     return find_first_of(sz, nStart, wxStrlen(sz));
 765 }
 766
 767 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 768 {
 769     return find_first_not_of(sz, nStart, wxStrlen(sz));
 770 }
 771
 772 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 773 {
 774     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 775
 776     size_t idx = nStart;
 777     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 778     {
 779         if ( wxTmemchr(sz, *i, n) )
 780             return idx;
 781     }
 782
 783     return npos;
 784 }
 785
 786 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 787 {
 788     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 789
 790     size_t idx = nStart;
 791     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 792     {
 793         if ( !wxTmemchr(sz, *i, n) )
 794             return idx;
 795     }
 796
 797     return npos;
 798 }
 799
 800
 801 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 802 {
 803     return find_last_of(sz, nStart, wxStrlen(sz));
 804 }
 805
 806 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 807 {
 808     return find_last_not_of(sz, nStart, wxStrlen(sz));
 809 }
 810
 811 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 812 {
 813     size_t len = length();
 814
 815     if ( nStart == npos )
 816     {
 817         nStart = len - 1;
 818     }
 819     else
 820     {
 821         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 822     }
 823
 824     size_t idx = nStart;
 825     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 826           i != rend(); --idx, ++i )
 827     {
 828         if ( wxTmemchr(sz, *i, n) )
 829             return idx;
 830     }
 831
 832     return npos;
 833 }
 834
 835 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 836 {
 837     size_t len = length();
 838
 839     if ( nStart == npos )
 840     {
 841         nStart = len - 1;
 842     }
 843     else
 844     {
 845         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 846     }
 847
 848     size_t idx = nStart;
 849     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 850           i != rend(); --idx, ++i )
 851     {
 852         if ( !wxTmemchr(sz, *i, n) )
 853             return idx;
 854     }
 855
 856     return npos;
 857 }
 858
 859 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
 860 {
 861     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 862
 863     size_t idx = nStart;
 864     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 865     {
 866         if ( *i != ch )
 867             return idx;
 868     }
 869
 870     return npos;
 871 }
 872
 873 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
 874 {
 875     size_t len = length();
 876
 877     if ( nStart == npos )
 878     {
 879         nStart = len - 1;
 880     }
 881     else
 882     {
 883         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 884     }
 885
 886     size_t idx = nStart;
 887     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 888           i != rend(); --idx, ++i )
 889     {
 890         if ( *i != ch )
 891             return idx;
 892     }
 893
 894     return npos;
 895 }
 896
 897 // the functions above were implemented for wchar_t* arguments in Unicode
 898 // build and char* in ANSI build; below are implementations for the other
 899 // version:
 900 #if wxUSE_UNICODE
 901     #define wxOtherCharType char
 902     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
 903 #else
 904     #define wxOtherCharType wchar_t
 905     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
 906 #endif
 907
 908 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
 909     { return find_first_of(STRCONV(sz), nStart); }
 910
 911 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
 912                                size_t n) const
 913     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
 914 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
 915     { return find_last_of(STRCONV(sz), nStart); }
 916 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
 917                               size_t n) const
 918     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
 919 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
 920     { return find_first_not_of(STRCONV(sz), nStart); }
 921 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
 922                                    size_t n) const
 923     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
 924 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
 925     { return find_last_not_of(STRCONV(sz), nStart); }
 926 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
 927                                   size_t n) const
 928     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
 929
 930 #undef wxOtherCharType
 931 #undef STRCONV
 932
 933 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 934
 935 // ===========================================================================
 936 // other common string functions
 937 // ===========================================================================
 938
 939 int wxString::CmpNoCase(const wxString& s) const
 940 {
 941     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
 942
 943     size_t idx = 0;
 944     const_iterator i1 = begin();
 945     const_iterator end1 = end();
 946     const_iterator i2 = s.begin();
 947     const_iterator end2 = s.end();
 948
 949     for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
 950     {
 951         wxUniChar lower1 = (wxChar)wxTolower(*i1);
 952         wxUniChar lower2 = (wxChar)wxTolower(*i2);
 953         if ( lower1 != lower2 )
 954             return lower1 < lower2 ? -1 : 1;
 955     }
 956
 957     size_t len1 = length();
 958     size_t len2 = s.length();
 959
 960     if ( len1 < len2 )
 961         return -1;
 962     else if ( len1 > len2 )
 963         return 1;
 964     return 0;
 965 }
 966
 967
 968 #if wxUSE_UNICODE
 969
 970 #ifdef __MWERKS__
 971 #ifndef __SCHAR_MAX__
 972 #define __SCHAR_MAX__ 127
 973 #endif
 974 #endif
 975
 976 wxString wxString::FromAscii(const char *ascii)
 977 {
 978     if (!ascii)
 979        return wxEmptyString;
 980
 981     size_t len = strlen(ascii);
 982     wxString res;
 983
 984     if ( len )
 985     {
 986         wxImplStringBuffer buf(res, len);
 987         wxStringCharType *dest = buf;
 988
 989         for ( ;; )
 990         {
 991             unsigned char c = (unsigned char)*ascii++;
 992             wxASSERT_MSG( c < 0x80,
 993                           _T("Non-ASCII value passed to FromAscii().") );
 994
 995             *dest++ = (wchar_t)c;
 996
 997             if ( c == '\0' )
 998                 break;
 999         }
1000     }
1001
1002     return res;
1003 }
1004
1005 wxString wxString::FromAscii(const char ascii)
1006 {
1007     // What do we do with '\0' ?
1008
1009     unsigned char c = (unsigned char)ascii;
1010
1011     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1012
1013     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1014     return wxString(wxUniChar((wchar_t)c));
1015 }
1016
1017 const wxCharBuffer wxString::ToAscii() const
1018 {
1019     // this will allocate enough space for the terminating NUL too
1020     wxCharBuffer buffer(length());
1021     char *dest = buffer.data();
1022
1023     for ( const_iterator i = begin(); i != end(); ++i )
1024     {
1025         wxUniChar c(*i);
1026         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1027         *dest++ = c.IsAscii() ? (char)c : '_';
1028
1029         // the output string can't have embedded NULs anyhow, so we can safely
1030         // stop at first of them even if we do have any
1031         if ( !c )
1032             break;
1033     }
1034
1035     return buffer;
1036 }
1037
1038 #endif // wxUSE_UNICODE
1039
1040 // extract string of length nCount starting at nFirst
1041 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1042 {
1043     size_t nLen = length();
1044
1045     // default value of nCount is npos and means "till the end"
1046     if ( nCount == npos )
1047     {
1048         nCount = nLen - nFirst;
1049     }
1050
1051     // out-of-bounds requests return sensible things
1052     if ( nFirst + nCount > nLen )
1053     {
1054         nCount = nLen - nFirst;
1055     }
1056
1057     if ( nFirst > nLen )
1058     {
1059         // AllocCopy() will return empty string
1060         return wxEmptyString;
1061     }
1062
1063     wxString dest(*this, nFirst, nCount);
1064     if ( dest.length() != nCount )
1065     {
1066         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1067     }
1068
1069     return dest;
1070 }
1071
1072 // check that the string starts with prefix and return the rest of the string
1073 // in the provided pointer if it is not NULL, otherwise return false
1074 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1075 {
1076     if ( compare(0, prefix.length(), prefix) != 0 )
1077         return false;
1078
1079     if ( rest )
1080     {
1081         // put the rest of the string into provided pointer
1082         rest->assign(*this, prefix.length(), npos);
1083     }
1084
1085     return true;
1086 }
1087
1088
1089 // check that the string ends with suffix and return the rest of it in the
1090 // provided pointer if it is not NULL, otherwise return false
1091 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1092 {
1093     int start = length() - suffix.length();
1094
1095     if ( start < 0 || compare(start, npos, suffix) != 0 )
1096         return false;
1097
1098     if ( rest )
1099     {
1100         // put the rest of the string into provided pointer
1101         rest->assign(*this, 0, start);
1102     }
1103
1104     return true;
1105 }
1106
1107
1108 // extract nCount last (rightmost) characters
1109 wxString wxString::Right(size_t nCount) const
1110 {
1111   if ( nCount > length() )
1112     nCount = length();
1113
1114   wxString dest(*this, length() - nCount, nCount);
1115   if ( dest.length() != nCount ) {
1116     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1117   }
1118   return dest;
1119 }
1120
1121 // get all characters after the last occurence of ch
1122 // (returns the whole string if ch not found)
1123 wxString wxString::AfterLast(wxUniChar ch) const
1124 {
1125   wxString str;
1126   int iPos = Find(ch, true);
1127   if ( iPos == wxNOT_FOUND )
1128     str = *this;
1129   else
1130     str = wx_str() + iPos + 1;
1131
1132   return str;
1133 }
1134
1135 // extract nCount first (leftmost) characters
1136 wxString wxString::Left(size_t nCount) const
1137 {
1138   if ( nCount > length() )
1139     nCount = length();
1140
1141   wxString dest(*this, 0, nCount);
1142   if ( dest.length() != nCount ) {
1143     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1144   }
1145   return dest;
1146 }
1147
1148 // get all characters before the first occurence of ch
1149 // (returns the whole string if ch not found)
1150 wxString wxString::BeforeFirst(wxUniChar ch) const
1151 {
1152   int iPos = Find(ch);
1153   if ( iPos == wxNOT_FOUND ) iPos = length();
1154   return wxString(*this, 0, iPos);
1155 }
1156
1157 /// get all characters before the last occurence of ch
1158 /// (returns empty string if ch not found)
1159 wxString wxString::BeforeLast(wxUniChar ch) const
1160 {
1161   wxString str;
1162   int iPos = Find(ch, true);
1163   if ( iPos != wxNOT_FOUND && iPos != 0 )
1164     str = wxString(c_str(), iPos);
1165
1166   return str;
1167 }
1168
1169 /// get all characters after the first occurence of ch
1170 /// (returns empty string if ch not found)
1171 wxString wxString::AfterFirst(wxUniChar ch) const
1172 {
1173   wxString str;
1174   int iPos = Find(ch);
1175   if ( iPos != wxNOT_FOUND )
1176     str = wx_str() + iPos + 1;
1177
1178   return str;
1179 }
1180
1181 // replace first (or all) occurences of some substring with another one
1182 size_t wxString::Replace(const wxString& strOld,
1183                          const wxString& strNew, bool bReplaceAll)
1184 {
1185     // if we tried to replace an empty string we'd enter an infinite loop below
1186     wxCHECK_MSG( !strOld.empty(), 0,
1187                  _T("wxString::Replace(): invalid parameter") );
1188
1189     size_t uiCount = 0;   // count of replacements made
1190
1191     size_t uiOldLen = strOld.length();
1192     size_t uiNewLen = strNew.length();
1193
1194     size_t dwPos = 0;
1195
1196     while ( (*this)[dwPos] != wxT('\0') )
1197     {
1198         //DO NOT USE STRSTR HERE
1199         //this string can contain embedded null characters,
1200         //so strstr will function incorrectly
1201         dwPos = find(strOld, dwPos);
1202         if ( dwPos == npos )
1203             break;                  // exit the loop
1204         else
1205         {
1206             //replace this occurance of the old string with the new one
1207             replace(dwPos, uiOldLen, strNew, uiNewLen);
1208
1209             //move up pos past the string that was replaced
1210             dwPos += uiNewLen;
1211
1212             //increase replace count
1213             ++uiCount;
1214
1215             // stop now?
1216             if ( !bReplaceAll )
1217                 break;                  // exit the loop
1218         }
1219     }
1220
1221     return uiCount;
1222 }
1223
1224 bool wxString::IsAscii() const
1225 {
1226     for ( const_iterator i = begin(); i != end(); ++i )
1227     {
1228         if ( !(*i).IsAscii() )
1229             return false;
1230     }
1231
1232     return true;
1233 }
1234
1235 bool wxString::IsWord() const
1236 {
1237     for ( const_iterator i = begin(); i != end(); ++i )
1238     {
1239         if ( !wxIsalpha(*i) )
1240             return false;
1241     }
1242
1243     return true;
1244 }
1245
1246 bool wxString::IsNumber() const
1247 {
1248     if ( empty() )
1249         return true;
1250
1251     const_iterator i = begin();
1252
1253     if ( *i == _T('-') || *i == _T('+') )
1254         ++i;
1255
1256     for ( ; i != end(); ++i )
1257     {
1258         if ( !wxIsdigit(*i) )
1259             return false;
1260     }
1261
1262     return true;
1263 }
1264
1265 wxString wxString::Strip(stripType w) const
1266 {
1267     wxString s = *this;
1268     if ( w & leading ) s.Trim(false);
1269     if ( w & trailing ) s.Trim(true);
1270     return s;
1271 }
1272
1273 // ---------------------------------------------------------------------------
1274 // case conversion
1275 // ---------------------------------------------------------------------------
1276
1277 wxString& wxString::MakeUpper()
1278 {
1279   for ( iterator it = begin(), en = end(); it != en; ++it )
1280     *it = (wxChar)wxToupper(*it);
1281
1282   return *this;
1283 }
1284
1285 wxString& wxString::MakeLower()
1286 {
1287   for ( iterator it = begin(), en = end(); it != en; ++it )
1288     *it = (wxChar)wxTolower(*it);
1289
1290   return *this;
1291 }
1292
1293 // ---------------------------------------------------------------------------
1294 // trimming and padding
1295 // ---------------------------------------------------------------------------
1296
1297 // some compilers (VC++ 6.0 not to name them) return true for a call to
1298 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1299 // live with this by checking that the character is a 7 bit one - even if this
1300 // may fail to detect some spaces (I don't know if Unicode doesn't have
1301 // space-like symbols somewhere except in the first 128 chars), it is arguably
1302 // still better than trimming away accented letters
1303 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1304
1305 // trims spaces (in the sense of isspace) from left or right side
1306 wxString& wxString::Trim(bool bFromRight)
1307 {
1308     // first check if we're going to modify the string at all
1309     if ( !empty() &&
1310          (
1311           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1312           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1313          )
1314        )
1315     {
1316         if ( bFromRight )
1317         {
1318             // find last non-space character
1319             reverse_iterator psz = rbegin();
1320             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1321                 psz++;
1322
1323             // truncate at trailing space start
1324             erase(psz.base(), end());
1325         }
1326         else
1327         {
1328             // find first non-space character
1329             iterator psz = begin();
1330             while ( (psz != end()) && wxSafeIsspace(*psz) )
1331                 psz++;
1332
1333             // fix up data and length
1334             erase(begin(), psz);
1335         }
1336     }
1337
1338     return *this;
1339 }
1340
1341 // adds nCount characters chPad to the string from either side
1342 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1343 {
1344     wxString s(chPad, nCount);
1345
1346     if ( bFromRight )
1347         *this += s;
1348     else
1349     {
1350         s += *this;
1351         swap(s);
1352     }
1353
1354     return *this;
1355 }
1356
1357 // truncate the string
1358 wxString& wxString::Truncate(size_t uiLen)
1359 {
1360     if ( uiLen < length() )
1361     {
1362         erase(begin() + uiLen, end());
1363     }
1364     //else: nothing to do, string is already short enough
1365
1366     return *this;
1367 }
1368
1369 // ---------------------------------------------------------------------------
1370 // finding (return wxNOT_FOUND if not found and index otherwise)
1371 // ---------------------------------------------------------------------------
1372
1373 // find a character
1374 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1375 {
1376     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1377
1378     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1379 }
1380
1381 // ----------------------------------------------------------------------------
1382 // conversion to numbers
1383 // ----------------------------------------------------------------------------
1384
1385 // The implementation of all the functions below is exactly the same so factor
1386 // it out. Note that number extraction works correctly on UTF-8 strings, so
1387 // we can use wxStringCharType and wx_str() for maximum efficiency.
1388
1389 template <typename T>
1390 bool wxStringToIntType(const wxStringCharType *start,
1391                        T *val,
1392                        int base,
1393                        T (*func)(const wxStringCharType*, wxStringCharType**, int))
1394 {
1395     wxCHECK_MSG( val, false, _T("NULL output pointer") );
1396     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1397
1398 #ifndef __WXWINCE__
1399     errno = 0;
1400 #endif
1401
1402     wxStringCharType *end;
1403     *val = (*func)(start, &end, base);
1404
1405     // return true only if scan was stopped by the terminating NUL and if the
1406     // string was not empty to start with and no under/overflow occurred
1407     return !*end && (end != start)
1408 #ifndef __WXWINCE__
1409         && (errno != ERANGE)
1410 #endif
1411     ;
1412 }
1413
1414 bool wxString::ToLong(long *val, int base) const
1415 {
1416     return wxStringToIntType(wx_str(), val, base, wxStrtol);
1417 }
1418
1419 bool wxString::ToULong(unsigned long *val, int base) const
1420 {
1421     return wxStringToIntType(wx_str(), val, base, wxStrtoul);
1422 }
1423
1424 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1425 {
1426     return wxStringToIntType(wx_str(), val, base, wxStrtoll);
1427 }
1428
1429 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1430 {
1431     return wxStringToIntType(wx_str(), val, base, wxStrtoull);
1432 }
1433
1434 bool wxString::ToDouble(double *val) const
1435 {
1436     wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1437
1438 #ifndef __WXWINCE__
1439     errno = 0;
1440 #endif
1441
1442     const wxChar *start = c_str();
1443     wxChar *end;
1444     *val = wxStrtod(start, &end);
1445
1446     // return true only if scan was stopped by the terminating NUL and if the
1447     // string was not empty to start with and no under/overflow occurred
1448     return !*end && (end != start)
1449 #ifndef __WXWINCE__
1450         && (errno != ERANGE)
1451 #endif
1452     ;
1453 }
1454
1455 // ---------------------------------------------------------------------------
1456 // formatted output
1457 // ---------------------------------------------------------------------------
1458
1459 #if !wxUSE_UTF8_LOCALE_ONLY
1460 /* static */
1461 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1462 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1463 #else
1464 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1465 #endif
1466 {
1467     va_list argptr;
1468     va_start(argptr, format);
1469
1470     wxString s;
1471     s.PrintfV(format, argptr);
1472
1473     va_end(argptr);
1474
1475     return s;
1476 }
1477 #endif // !wxUSE_UTF8_LOCALE_ONLY
1478
1479 #if wxUSE_UNICODE_UTF8
1480 /* static */
1481 wxString wxString::DoFormatUtf8(const char *format, ...)
1482 {
1483     va_list argptr;
1484     va_start(argptr, format);
1485
1486     wxString s;
1487     s.PrintfV(format, argptr);
1488
1489     va_end(argptr);
1490
1491     return s;
1492 }
1493 #endif // wxUSE_UNICODE_UTF8
1494
1495 /* static */
1496 wxString wxString::FormatV(const wxString& format, va_list argptr)
1497 {
1498     wxString s;
1499     s.PrintfV(format, argptr);
1500     return s;
1501 }
1502
1503 #if !wxUSE_UTF8_LOCALE_ONLY
1504 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1505 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1506 #else
1507 int wxString::DoPrintfWchar(const wxChar *format, ...)
1508 #endif
1509 {
1510     va_list argptr;
1511     va_start(argptr, format);
1512
1513 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1514     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1515     // because it's the only cast that works safely for downcasting when
1516     // multiple inheritance is used:
1517     wxString *str = static_cast<wxString*>(this);
1518 #else
1519     wxString *str = this;
1520 #endif
1521
1522     int iLen = str->PrintfV(format, argptr);
1523
1524     va_end(argptr);
1525
1526     return iLen;
1527 }
1528 #endif // !wxUSE_UTF8_LOCALE_ONLY
1529
1530 #if wxUSE_UNICODE_UTF8
1531 int wxString::DoPrintfUtf8(const char *format, ...)
1532 {
1533     va_list argptr;
1534     va_start(argptr, format);
1535
1536     int iLen = PrintfV(format, argptr);
1537
1538     va_end(argptr);
1539
1540     return iLen;
1541 }
1542 #endif // wxUSE_UNICODE_UTF8
1543
1544 #if wxUSE_UNICODE_UTF8
1545 template<typename BufferType>
1546 #else
1547 // we only need one version in non-UTF8 builds and at least two Windows
1548 // compilers have problems with this function template, so use just one
1549 // normal function here
1550 #endif
1551 static int DoStringPrintfV(wxString& str,
1552                            const wxString& format, va_list argptr)
1553 {
1554     int size = 1024;
1555
1556     for ( ;; )
1557     {
1558 #if wxUSE_UNICODE_UTF8
1559         BufferType tmp(str, size + 1);
1560         typename BufferType::CharType *buf = tmp;
1561 #else
1562         wxStringBuffer tmp(str, size + 1);
1563         wxChar *buf = tmp;
1564 #endif
1565
1566         if ( !buf )
1567         {
1568             // out of memory
1569             return -1;
1570         }
1571
1572         // wxVsnprintf() may modify the original arg pointer, so pass it
1573         // only a copy
1574         va_list argptrcopy;
1575         wxVaCopy(argptrcopy, argptr);
1576         int len = wxVsnprintf(buf, size, format, argptrcopy);
1577         va_end(argptrcopy);
1578
1579         // some implementations of vsnprintf() don't NUL terminate
1580         // the string if there is not enough space for it so
1581         // always do it manually
1582         buf[size] = _T('\0');
1583
1584         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1585         // total number of characters which would have been written if the
1586         // buffer were large enough (newer standards such as Unix98)
1587         if ( len < 0 )
1588         {
1589             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1590             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1591             //     is true if *both* of them use our own implementation,
1592             //     otherwise we can't be sure
1593 #if wxUSE_WXVSNPRINTF
1594             // we know that our own implementation of wxVsnprintf() returns -1
1595             // only for a format error - thus there's something wrong with
1596             // the user's format string
1597             return -1;
1598 #else // possibly using system version
1599             // assume it only returns error if there is not enough space, but
1600             // as we don't know how much we need, double the current size of
1601             // the buffer
1602             size *= 2;
1603 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1604         }
1605         else if ( len >= size )
1606         {
1607 #if wxUSE_WXVSNPRINTF
1608             // we know that our own implementation of wxVsnprintf() returns
1609             // size+1 when there's not enough space but that's not the size
1610             // of the required buffer!
1611             size *= 2;      // so we just double the current size of the buffer
1612 #else
1613             // some vsnprintf() implementations NUL-terminate the buffer and
1614             // some don't in len == size case, to be safe always add 1
1615             size = len + 1;
1616 #endif
1617         }
1618         else // ok, there was enough space
1619         {
1620             break;
1621         }
1622     }
1623
1624     // we could have overshot
1625     str.Shrink();
1626
1627     return str.length();
1628 }
1629
1630 int wxString::PrintfV(const wxString& format, va_list argptr)
1631 {
1632 #if wxUSE_UNICODE_UTF8
1633     #if wxUSE_STL_BASED_WXSTRING
1634         typedef wxStringTypeBuffer<char> Utf8Buffer;
1635     #else
1636         typedef wxImplStringBuffer Utf8Buffer;
1637     #endif
1638 #endif
1639
1640 #if wxUSE_UTF8_LOCALE_ONLY
1641     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1642 #else
1643     #if wxUSE_UNICODE_UTF8
1644     if ( wxLocaleIsUtf8 )
1645         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1646     else
1647         // wxChar* version
1648         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1649     #else
1650         return DoStringPrintfV(*this, format, argptr);
1651     #endif // UTF8/WCHAR
1652 #endif
1653 }
1654
1655 // ----------------------------------------------------------------------------
1656 // misc other operations
1657 // ----------------------------------------------------------------------------
1658
1659 // returns true if the string matches the pattern which may contain '*' and
1660 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1661 // of them)
1662 bool wxString::Matches(const wxString& mask) const
1663 {
1664     // I disable this code as it doesn't seem to be faster (in fact, it seems
1665     // to be much slower) than the old, hand-written code below and using it
1666     // here requires always linking with libregex even if the user code doesn't
1667     // use it
1668 #if 0 // wxUSE_REGEX
1669     // first translate the shell-like mask into a regex
1670     wxString pattern;
1671     pattern.reserve(wxStrlen(pszMask));
1672
1673     pattern += _T('^');
1674     while ( *pszMask )
1675     {
1676         switch ( *pszMask )
1677         {
1678             case _T('?'):
1679                 pattern += _T('.');
1680                 break;
1681
1682             case _T('*'):
1683                 pattern += _T(".*");
1684                 break;
1685
1686             case _T('^'):
1687             case _T('.'):
1688             case _T('$'):
1689             case _T('('):
1690             case _T(')'):
1691             case _T('|'):
1692             case _T('+'):
1693             case _T('\\'):
1694                 // these characters are special in a RE, quote them
1695                 // (however note that we don't quote '[' and ']' to allow
1696                 // using them for Unix shell like matching)
1697                 pattern += _T('\\');
1698                 // fall through
1699
1700             default:
1701                 pattern += *pszMask;
1702         }
1703
1704         pszMask++;
1705     }
1706     pattern += _T('$');
1707
1708     // and now use it
1709     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1710 #else // !wxUSE_REGEX
1711   // TODO: this is, of course, awfully inefficient...
1712
1713   // FIXME-UTF8: implement using iterators, remove #if
1714 #if wxUSE_UNICODE_UTF8
1715   wxWCharBuffer maskBuf = mask.wc_str();
1716   wxWCharBuffer txtBuf = wc_str();
1717   const wxChar *pszMask = maskBuf.data();
1718   const wxChar *pszTxt = txtBuf.data();
1719 #else
1720   const wxChar *pszMask = mask.wx_str();
1721   // the char currently being checked
1722   const wxChar *pszTxt = wx_str();
1723 #endif
1724
1725   // the last location where '*' matched
1726   const wxChar *pszLastStarInText = NULL;
1727   const wxChar *pszLastStarInMask = NULL;
1728
1729 match:
1730   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1731     switch ( *pszMask ) {
1732       case wxT('?'):
1733         if ( *pszTxt == wxT('\0') )
1734           return false;
1735
1736         // pszTxt and pszMask will be incremented in the loop statement
1737
1738         break;
1739
1740       case wxT('*'):
1741         {
1742           // remember where we started to be able to backtrack later
1743           pszLastStarInText = pszTxt;
1744           pszLastStarInMask = pszMask;
1745
1746           // ignore special chars immediately following this one
1747           // (should this be an error?)
1748           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1749             pszMask++;
1750
1751           // if there is nothing more, match
1752           if ( *pszMask == wxT('\0') )
1753             return true;
1754
1755           // are there any other metacharacters in the mask?
1756           size_t uiLenMask;
1757           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1758
1759           if ( pEndMask != NULL ) {
1760             // we have to match the string between two metachars
1761             uiLenMask = pEndMask - pszMask;
1762           }
1763           else {
1764             // we have to match the remainder of the string
1765             uiLenMask = wxStrlen(pszMask);
1766           }
1767
1768           wxString strToMatch(pszMask, uiLenMask);
1769           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1770           if ( pMatch == NULL )
1771             return false;
1772
1773           // -1 to compensate "++" in the loop
1774           pszTxt = pMatch + uiLenMask - 1;
1775           pszMask += uiLenMask - 1;
1776         }
1777         break;
1778
1779       default:
1780         if ( *pszMask != *pszTxt )
1781           return false;
1782         break;
1783     }
1784   }
1785
1786   // match only if nothing left
1787   if ( *pszTxt == wxT('\0') )
1788     return true;
1789
1790   // if we failed to match, backtrack if we can
1791   if ( pszLastStarInText ) {
1792     pszTxt = pszLastStarInText + 1;
1793     pszMask = pszLastStarInMask;
1794
1795     pszLastStarInText = NULL;
1796
1797     // don't bother resetting pszLastStarInMask, it's unnecessary
1798
1799     goto match;
1800   }
1801
1802   return false;
1803 #endif // wxUSE_REGEX/!wxUSE_REGEX
1804 }
1805
1806 // Count the number of chars
1807 int wxString::Freq(wxUniChar ch) const
1808 {
1809     int count = 0;
1810     for ( const_iterator i = begin(); i != end(); ++i )
1811     {
1812         if ( *i == ch )
1813             count ++;
1814     }
1815     return count;
1816 }
1817
1818 // convert to upper case, return the copy of the string
1819 wxString wxString::Upper() const
1820 { wxString s(*this); return s.MakeUpper(); }
1821
1822 // convert to lower case, return the copy of the string
1823 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }