src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #ifdef __SALFORDC__
  39     #include <clib.h>
  40 #endif
  41
  42 #include "wx/hashmap.h"
  43
  44 // string handling functions used by wxString:
  45 #if wxUSE_UNICODE_UTF8
  46     #define wxStringMemcpy   memcpy
  47     #define wxStringMemcmp   memcmp
  48     #define wxStringMemchr   memchr
  49     #define wxStringStrlen   strlen
  50 #else
  51     #define wxStringMemcpy   wxTmemcpy
  52     #define wxStringMemcmp   wxTmemcmp
  53     #define wxStringMemchr   wxTmemchr
  54     #define wxStringStrlen   wxStrlen
  55 #endif
  56
  57
  58 // ---------------------------------------------------------------------------
  59 // static class variables definition
  60 // ---------------------------------------------------------------------------
  61
  62 //According to STL _must_ be a -1 size_t
  63 const size_t wxString::npos = (size_t) -1;
  64
  65 // ----------------------------------------------------------------------------
  66 // global functions
  67 // ----------------------------------------------------------------------------
  68
  69 #if wxUSE_STD_IOSTREAM
  70
  71 #include <iostream>
  72
  73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
  74 {
  75 // FIXME-UTF8: always, not only if wxUSE_UNICODE
  76 #if wxUSE_UNICODE && !defined(__BORLANDC__)
  77     return os << (const wchar_t*)str.AsWCharBuf();
  78 #else
  79     return os << (const char*)str.AsCharBuf();
  80 #endif
  81 }
  82
  83 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
  84 {
  85     return os << str.c_str();
  86 }
  87
  88 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
  89 {
  90     return os << str.data();
  91 }
  92
  93 #ifndef __BORLANDC__
  94 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
  95 {
  96     return os << str.data();
  97 }
  98 #endif
  99
 100 #endif // wxUSE_STD_IOSTREAM
 101
 102 // ===========================================================================
 103 // wxString class core
 104 // ===========================================================================
 105
 106 #if wxUSE_UNICODE_UTF8
 107
 108 void wxString::PosLenToImpl(size_t pos, size_t len,
 109                             size_t *implPos, size_t *implLen) const
 110 {
 111     if ( pos == npos )
 112         *implPos = npos;
 113     else
 114     {
 115         const_iterator i = begin() + pos;
 116         *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
 117         if ( len == npos )
 118             *implLen = npos;
 119         else
 120         {
 121             // too large length is interpreted as "to the end of the string"
 122             // FIXME-UTF8: verify this is the case in std::string, assert
 123             // otherwise
 124             if ( pos + len > length() )
 125                 len = length() - pos;
 126
 127             *implLen = (i + len).impl() - i.impl();
 128         }
 129     }
 130 }
 131
 132 #endif // wxUSE_UNICODE_UTF8
 133
 134 // ----------------------------------------------------------------------------
 135 // wxCStrData converted strings caching
 136 // ----------------------------------------------------------------------------
 137
 138 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 139 //             string objects; re-enable after fixing this bug and benchmarking
 140 //             performance to see if using a hash is a good idea at all
 141 #if 0
 142
 143 // For backward compatibility reasons, it must be possible to assign the value
 144 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 145 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 146 // because the memory would be freed immediately, but it has to be valid as long
 147 // as the string is not modified, so that code like this still works:
 148 //
 149 // const wxChar *s = str.c_str();
 150 // while ( s ) { ... }
 151
 152 // FIXME-UTF8: not thread safe!
 153 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 154 //             destroyed, but we should do it when the string is modified, to
 155 //             keep memory usage down
 156 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 157 //             invalidated the cache on every change, we could keep the previous
 158 //             conversion
 159 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 160 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 161
 162 template<typename T>
 163 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 164 {
 165     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 166     if ( i != hash.end() )
 167     {
 168         free(i->second);
 169         hash.erase(i);
 170     }
 171 }
 172
 173 #if wxUSE_UNICODE
 174 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 175 //     so we have to use wxString* here and const-cast when used
 176 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 177                     wxStringCharConversionCache);
 178 static wxStringCharConversionCache gs_stringsCharCache;
 179
 180 const char* wxCStrData::AsChar() const
 181 {
 182     // remove previously cache value, if any (see FIXMEs above):
 183     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 184
 185     // convert the string and keep it:
 186     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 187         m_str->mb_str().release();
 188
 189     return s + m_offset;
 190 }
 191 #endif // wxUSE_UNICODE
 192
 193 #if !wxUSE_UNICODE_WCHAR
 194 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 195                     wxStringWCharConversionCache);
 196 static wxStringWCharConversionCache gs_stringsWCharCache;
 197
 198 const wchar_t* wxCStrData::AsWChar() const
 199 {
 200     // remove previously cache value, if any (see FIXMEs above):
 201     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 202
 203     // convert the string and keep it:
 204     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 205         m_str->wc_str().release();
 206
 207     return s + m_offset;
 208 }
 209 #endif // !wxUSE_UNICODE_WCHAR
 210
 211 wxString::~wxString()
 212 {
 213 #if wxUSE_UNICODE
 214     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 215     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 216 #endif
 217 #if !wxUSE_UNICODE_WCHAR
 218     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 219 #endif
 220 }
 221 #endif
 222
 223 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 224 const char* wxCStrData::AsChar() const
 225 {
 226 #if wxUSE_UNICODE_UTF8
 227     if ( wxLocaleIsUtf8 )
 228         return AsInternal();
 229 #endif
 230     // under non-UTF8 locales, we have to convert the internal UTF-8
 231     // representation using wxConvLibc and cache the result
 232
 233     wxString *str = wxConstCast(m_str, wxString);
 234
 235     // convert the string:
 236     wxCharBuffer buf(str->mb_str());
 237
 238     // FIXME-UTF8: do the conversion in-place in the existing buffer
 239     if ( str->m_convertedToChar &&
 240          strlen(buf) == strlen(str->m_convertedToChar) )
 241     {
 242         // keep the same buffer for as long as possible, so that several calls
 243         // to c_str() in a row still work:
 244         strcpy(str->m_convertedToChar, buf);
 245     }
 246     else
 247     {
 248         str->m_convertedToChar = buf.release();
 249     }
 250
 251     // and keep it:
 252     return str->m_convertedToChar + m_offset;
 253 }
 254 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 255
 256 #if !wxUSE_UNICODE_WCHAR
 257 const wchar_t* wxCStrData::AsWChar() const
 258 {
 259     wxString *str = wxConstCast(m_str, wxString);
 260
 261     // convert the string:
 262     wxWCharBuffer buf(str->wc_str());
 263
 264     // FIXME-UTF8: do the conversion in-place in the existing buffer
 265     if ( str->m_convertedToWChar &&
 266          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 267     {
 268         // keep the same buffer for as long as possible, so that several calls
 269         // to c_str() in a row still work:
 270         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 271     }
 272     else
 273     {
 274         str->m_convertedToWChar = buf.release();
 275     }
 276
 277     // and keep it:
 278     return str->m_convertedToWChar + m_offset;
 279 }
 280 #endif // !wxUSE_UNICODE_WCHAR
 281
 282 // ===========================================================================
 283 // wxString class core
 284 // ===========================================================================
 285
 286 // ---------------------------------------------------------------------------
 287 // construction and conversion
 288 // ---------------------------------------------------------------------------
 289
 290 #if wxUSE_UNICODE_WCHAR
 291 /* static */
 292 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 293                                                const wxMBConv& conv)
 294 {
 295     // anything to do?
 296     if ( !psz || nLength == 0 )
 297         return SubstrBufFromMB(L"", 0);
 298
 299     if ( nLength == npos )
 300         nLength = wxNO_LEN;
 301
 302     size_t wcLen;
 303     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 304     if ( !wcLen )
 305         return SubstrBufFromMB(_T(""), 0);
 306     else
 307         return SubstrBufFromMB(wcBuf, wcLen);
 308 }
 309 #endif // wxUSE_UNICODE_WCHAR
 310
 311 #if wxUSE_UNICODE_UTF8
 312 /* static */
 313 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 314                                                const wxMBConv& conv)
 315 {
 316     // anything to do?
 317     if ( !psz || nLength == 0 )
 318         return SubstrBufFromMB("", 0);
 319
 320     // if psz is already in UTF-8, we don't have to do the roundtrip to
 321     // wchar_t* and back:
 322     if ( conv.IsUTF8() )
 323     {
 324         // we need to validate the input because UTF8 iterators assume valid
 325         // UTF-8 sequence and psz may be invalid:
 326         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 327         {
 328             return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
 329         }
 330         // else: do the roundtrip through wchar_t*
 331     }
 332
 333     if ( nLength == npos )
 334         nLength = wxNO_LEN;
 335
 336     // first convert to wide string:
 337     size_t wcLen;
 338     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 339     if ( !wcLen )
 340         return SubstrBufFromMB("", 0);
 341
 342     // and then to UTF-8:
 343     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxConvUTF8));
 344     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 345     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 346
 347     return buf;
 348 }
 349 #endif // wxUSE_UNICODE_UTF8
 350
 351 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 352 /* static */
 353 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 354                                                const wxMBConv& conv)
 355 {
 356     // anything to do?
 357     if ( !pwz || nLength == 0 )
 358         return SubstrBufFromWC("", 0);
 359
 360     if ( nLength == npos )
 361         nLength = wxNO_LEN;
 362
 363     size_t mbLen;
 364     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 365     if ( !mbLen )
 366         return SubstrBufFromWC("", 0);
 367     else
 368         return SubstrBufFromWC(mbBuf, mbLen);
 369 }
 370 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 371
 372
 373 #if wxUSE_UNICODE_WCHAR
 374
 375 //Convert wxString in Unicode mode to a multi-byte string
 376 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 377 {
 378     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 379 }
 380
 381 #elif wxUSE_UNICODE_UTF8
 382
 383 const wxWCharBuffer wxString::wc_str() const
 384 {
 385     return wxConvUTF8.cMB2WC(m_impl.c_str(),
 386                              m_impl.length() + 1 /* size, not length */,
 387                              NULL);
 388 }
 389
 390 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 391 {
 392     if ( conv.IsUTF8() )
 393         return wxCharBuffer::CreateNonOwned(m_impl.c_str());
 394
 395     // FIXME-UTF8: use wc_str() here once we have buffers with length
 396
 397     size_t wcLen;
 398     wxWCharBuffer wcBuf(
 399             wxConvUTF8.cMB2WC(m_impl.c_str(),
 400                               m_impl.length() + 1 /* size, not length */,
 401                               &wcLen));
 402     if ( !wcLen )
 403         return wxCharBuffer("");
 404
 405     return conv.cWC2MB(wcBuf, wcLen, NULL);
 406 }
 407
 408 #else // ANSI
 409
 410 //Converts this string to a wide character string if unicode
 411 //mode is not enabled and wxUSE_WCHAR_T is enabled
 412 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 413 {
 414     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 415 }
 416
 417 #endif // Unicode/ANSI
 418
 419 // shrink to minimal size (releasing extra memory)
 420 bool wxString::Shrink()
 421 {
 422   wxString tmp(begin(), end());
 423   swap(tmp);
 424   return tmp.length() == length();
 425 }
 426
 427 // deprecated compatibility code:
 428 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 429 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 430 {
 431     return DoGetWriteBuf(nLen);
 432 }
 433
 434 void wxString::UngetWriteBuf()
 435 {
 436     DoUngetWriteBuf();
 437 }
 438
 439 void wxString::UngetWriteBuf(size_t nLen)
 440 {
 441     DoUngetWriteBuf(nLen);
 442 }
 443 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 444
 445
 446 // ---------------------------------------------------------------------------
 447 // data access
 448 // ---------------------------------------------------------------------------
 449
 450 // all functions are inline in string.h
 451
 452 // ---------------------------------------------------------------------------
 453 // concatenation operators
 454 // ---------------------------------------------------------------------------
 455
 456 /*
 457  * concatenation functions come in 5 flavours:
 458  *  string + string
 459  *  char   + string      and      string + char
 460  *  C str  + string      and      string + C str
 461  */
 462
 463 wxString operator+(const wxString& str1, const wxString& str2)
 464 {
 465 #if !wxUSE_STL_BASED_WXSTRING
 466     wxASSERT( str1.IsValid() );
 467     wxASSERT( str2.IsValid() );
 468 #endif
 469
 470     wxString s = str1;
 471     s += str2;
 472
 473     return s;
 474 }
 475
 476 wxString operator+(const wxString& str, wxUniChar ch)
 477 {
 478 #if !wxUSE_STL_BASED_WXSTRING
 479     wxASSERT( str.IsValid() );
 480 #endif
 481
 482     wxString s = str;
 483     s += ch;
 484
 485     return s;
 486 }
 487
 488 wxString operator+(wxUniChar ch, const wxString& str)
 489 {
 490 #if !wxUSE_STL_BASED_WXSTRING
 491     wxASSERT( str.IsValid() );
 492 #endif
 493
 494     wxString s = ch;
 495     s += str;
 496
 497     return s;
 498 }
 499
 500 wxString operator+(const wxString& str, const char *psz)
 501 {
 502 #if !wxUSE_STL_BASED_WXSTRING
 503     wxASSERT( str.IsValid() );
 504 #endif
 505
 506     wxString s;
 507     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 508         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 509     }
 510     s += str;
 511     s += psz;
 512
 513     return s;
 514 }
 515
 516 wxString operator+(const wxString& str, const wchar_t *pwz)
 517 {
 518 #if !wxUSE_STL_BASED_WXSTRING
 519     wxASSERT( str.IsValid() );
 520 #endif
 521
 522     wxString s;
 523     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 524         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 525     }
 526     s += str;
 527     s += pwz;
 528
 529     return s;
 530 }
 531
 532 wxString operator+(const char *psz, const wxString& str)
 533 {
 534 #if !wxUSE_STL_BASED_WXSTRING
 535     wxASSERT( str.IsValid() );
 536 #endif
 537
 538     wxString s;
 539     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 540         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 541     }
 542     s = psz;
 543     s += str;
 544
 545     return s;
 546 }
 547
 548 wxString operator+(const wchar_t *pwz, const wxString& str)
 549 {
 550 #if !wxUSE_STL_BASED_WXSTRING
 551     wxASSERT( str.IsValid() );
 552 #endif
 553
 554     wxString s;
 555     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 556         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 557     }
 558     s = pwz;
 559     s += str;
 560
 561     return s;
 562 }
 563
 564 // ---------------------------------------------------------------------------
 565 // string comparison
 566 // ---------------------------------------------------------------------------
 567
 568 #ifdef HAVE_STD_STRING_COMPARE
 569
 570 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 571 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 572 //     sort strings in characters code point order by sorting the byte sequence
 573 //     in byte values order (i.e. what strcmp() and memcmp() do).
 574
 575 int wxString::compare(const wxString& str) const
 576 {
 577     return m_impl.compare(str.m_impl);
 578 }
 579
 580 int wxString::compare(size_t nStart, size_t nLen,
 581                       const wxString& str) const
 582 {
 583     size_t pos, len;
 584     PosLenToImpl(nStart, nLen, &pos, &len);
 585     return m_impl.compare(pos, len, str.m_impl);
 586 }
 587
 588 int wxString::compare(size_t nStart, size_t nLen,
 589                       const wxString& str,
 590                       size_t nStart2, size_t nLen2) const
 591 {
 592     size_t pos, len;
 593     PosLenToImpl(nStart, nLen, &pos, &len);
 594
 595     size_t pos2, len2;
 596     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 597
 598     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 599 }
 600
 601 int wxString::compare(const char* sz) const
 602 {
 603     return m_impl.compare(ImplStr(sz));
 604 }
 605
 606 int wxString::compare(const wchar_t* sz) const
 607 {
 608     return m_impl.compare(ImplStr(sz));
 609 }
 610
 611 int wxString::compare(size_t nStart, size_t nLen,
 612                       const char* sz, size_t nCount) const
 613 {
 614     size_t pos, len;
 615     PosLenToImpl(nStart, nLen, &pos, &len);
 616
 617     SubstrBufFromMB str(ImplStr(sz, nCount));
 618
 619     return m_impl.compare(pos, len, str.data, str.len);
 620 }
 621
 622 int wxString::compare(size_t nStart, size_t nLen,
 623                       const wchar_t* sz, size_t nCount) const
 624 {
 625     size_t pos, len;
 626     PosLenToImpl(nStart, nLen, &pos, &len);
 627
 628     SubstrBufFromWC str(ImplStr(sz, nCount));
 629
 630     return m_impl.compare(pos, len, str.data, str.len);
 631 }
 632
 633 #else // !HAVE_STD_STRING_COMPARE
 634
 635 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 636                           const wxStringCharType* s2, size_t l2)
 637 {
 638     if( l1 == l2 )
 639         return wxStringMemcmp(s1, s2, l1);
 640     else if( l1 < l2 )
 641     {
 642         int ret = wxStringMemcmp(s1, s2, l1);
 643         return ret == 0 ? -1 : ret;
 644     }
 645     else
 646     {
 647         int ret = wxStringMemcmp(s1, s2, l2);
 648         return ret == 0 ? +1 : ret;
 649     }
 650 }
 651
 652 int wxString::compare(const wxString& str) const
 653 {
 654     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 655                      str.m_impl.data(), str.m_impl.length());
 656 }
 657
 658 int wxString::compare(size_t nStart, size_t nLen,
 659                       const wxString& str) const
 660 {
 661     wxASSERT(nStart <= length());
 662     size_type strLen = length() - nStart;
 663     nLen = strLen < nLen ? strLen : nLen;
 664
 665     size_t pos, len;
 666     PosLenToImpl(nStart, nLen, &pos, &len);
 667
 668     return ::wxDoCmp(m_impl.data() + pos,  len,
 669                      str.m_impl.data(), str.m_impl.length());
 670 }
 671
 672 int wxString::compare(size_t nStart, size_t nLen,
 673                       const wxString& str,
 674                       size_t nStart2, size_t nLen2) const
 675 {
 676     wxASSERT(nStart <= length());
 677     wxASSERT(nStart2 <= str.length());
 678     size_type strLen  =     length() - nStart,
 679               strLen2 = str.length() - nStart2;
 680     nLen  = strLen  < nLen  ? strLen  : nLen;
 681     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 682
 683     size_t pos, len;
 684     PosLenToImpl(nStart, nLen, &pos, &len);
 685     size_t pos2, len2;
 686     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 687
 688     return ::wxDoCmp(m_impl.data() + pos, len,
 689                      str.m_impl.data() + pos2, len2);
 690 }
 691
 692 int wxString::compare(const char* sz) const
 693 {
 694     SubstrBufFromMB str(ImplStr(sz, npos));
 695     if ( str.len == npos )
 696         str.len = wxStringStrlen(str.data);
 697     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 698 }
 699
 700 int wxString::compare(const wchar_t* sz) const
 701 {
 702     SubstrBufFromWC str(ImplStr(sz, npos));
 703     if ( str.len == npos )
 704         str.len = wxStringStrlen(str.data);
 705     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 706 }
 707
 708 int wxString::compare(size_t nStart, size_t nLen,
 709                       const char* sz, size_t nCount) const
 710 {
 711     wxASSERT(nStart <= length());
 712     size_type strLen = length() - nStart;
 713     nLen = strLen < nLen ? strLen : nLen;
 714
 715     size_t pos, len;
 716     PosLenToImpl(nStart, nLen, &pos, &len);
 717
 718     SubstrBufFromMB str(ImplStr(sz, nCount));
 719     if ( str.len == npos )
 720         str.len = wxStringStrlen(str.data);
 721
 722     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 723 }
 724
 725 int wxString::compare(size_t nStart, size_t nLen,
 726                       const wchar_t* sz, size_t nCount) const
 727 {
 728     wxASSERT(nStart <= length());
 729     size_type strLen = length() - nStart;
 730     nLen = strLen < nLen ? strLen : nLen;
 731
 732     size_t pos, len;
 733     PosLenToImpl(nStart, nLen, &pos, &len);
 734
 735     SubstrBufFromWC str(ImplStr(sz, nCount));
 736     if ( str.len == npos )
 737         str.len = wxStringStrlen(str.data);
 738
 739     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 740 }
 741
 742 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 743
 744
 745 // ---------------------------------------------------------------------------
 746 // find_{first,last}_[not]_of functions
 747 // ---------------------------------------------------------------------------
 748
 749 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 750
 751 // NB: All these functions are implemented  with the argument being wxChar*,
 752 //     i.e. widechar string in any Unicode build, even though native string
 753 //     representation is char* in the UTF-8 build. This is because we couldn't
 754 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 755
 756 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 757 {
 758     return find_first_of(sz, nStart, wxStrlen(sz));
 759 }
 760
 761 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 762 {
 763     return find_first_not_of(sz, nStart, wxStrlen(sz));
 764 }
 765
 766 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 767 {
 768     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 769
 770     size_t idx = nStart;
 771     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 772     {
 773         if ( wxTmemchr(sz, *i, n) )
 774             return idx;
 775     }
 776
 777     return npos;
 778 }
 779
 780 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 781 {
 782     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 783
 784     size_t idx = nStart;
 785     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 786     {
 787         if ( !wxTmemchr(sz, *i, n) )
 788             return idx;
 789     }
 790
 791     return npos;
 792 }
 793
 794
 795 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 796 {
 797     return find_last_of(sz, nStart, wxStrlen(sz));
 798 }
 799
 800 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 801 {
 802     return find_last_not_of(sz, nStart, wxStrlen(sz));
 803 }
 804
 805 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 806 {
 807     size_t len = length();
 808
 809     if ( nStart == npos )
 810     {
 811         nStart = len - 1;
 812     }
 813     else
 814     {
 815         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 816     }
 817
 818     size_t idx = nStart;
 819     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 820           i != rend(); --idx, ++i )
 821     {
 822         if ( wxTmemchr(sz, *i, n) )
 823             return idx;
 824     }
 825
 826     return npos;
 827 }
 828
 829 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 830 {
 831     size_t len = length();
 832
 833     if ( nStart == npos )
 834     {
 835         nStart = len - 1;
 836     }
 837     else
 838     {
 839         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 840     }
 841
 842     size_t idx = nStart;
 843     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 844           i != rend(); --idx, ++i )
 845     {
 846         if ( !wxTmemchr(sz, *i, n) )
 847             return idx;
 848     }
 849
 850     return npos;
 851 }
 852
 853 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
 854 {
 855     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 856
 857     size_t idx = nStart;
 858     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 859     {
 860         if ( *i != ch )
 861             return idx;
 862     }
 863
 864     return npos;
 865 }
 866
 867 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
 868 {
 869     size_t len = length();
 870
 871     if ( nStart == npos )
 872     {
 873         nStart = len - 1;
 874     }
 875     else
 876     {
 877         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 878     }
 879
 880     size_t idx = nStart;
 881     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 882           i != rend(); --idx, ++i )
 883     {
 884         if ( *i != ch )
 885             return idx;
 886     }
 887
 888     return npos;
 889 }
 890
 891 // the functions above were implemented for wchar_t* arguments in Unicode
 892 // build and char* in ANSI build; below are implementations for the other
 893 // version:
 894 #if wxUSE_UNICODE
 895     #define wxOtherCharType char
 896     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
 897 #else
 898     #define wxOtherCharType wchar_t
 899     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
 900 #endif
 901
 902 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
 903     { return find_first_of(STRCONV(sz), nStart); }
 904
 905 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
 906                                size_t n) const
 907     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
 908 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
 909     { return find_last_of(STRCONV(sz), nStart); }
 910 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
 911                               size_t n) const
 912     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
 913 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
 914     { return find_first_not_of(STRCONV(sz), nStart); }
 915 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
 916                                    size_t n) const
 917     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
 918 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
 919     { return find_last_not_of(STRCONV(sz), nStart); }
 920 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
 921                                   size_t n) const
 922     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
 923
 924 #undef wxOtherCharType
 925 #undef STRCONV
 926
 927 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 928
 929 // ===========================================================================
 930 // other common string functions
 931 // ===========================================================================
 932
 933 int wxString::CmpNoCase(const wxString& s) const
 934 {
 935     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
 936
 937     size_t idx = 0;
 938     const_iterator i1 = begin();
 939     const_iterator end1 = end();
 940     const_iterator i2 = s.begin();
 941     const_iterator end2 = s.end();
 942
 943     for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
 944     {
 945         wxUniChar lower1 = (wxChar)wxTolower(*i1);
 946         wxUniChar lower2 = (wxChar)wxTolower(*i2);
 947         if ( lower1 != lower2 )
 948             return lower1 < lower2 ? -1 : 1;
 949     }
 950
 951     size_t len1 = length();
 952     size_t len2 = s.length();
 953
 954     if ( len1 < len2 )
 955         return -1;
 956     else if ( len1 > len2 )
 957         return 1;
 958     return 0;
 959 }
 960
 961
 962 #if wxUSE_UNICODE
 963
 964 #ifdef __MWERKS__
 965 #ifndef __SCHAR_MAX__
 966 #define __SCHAR_MAX__ 127
 967 #endif
 968 #endif
 969
 970 wxString wxString::FromAscii(const char *ascii)
 971 {
 972     if (!ascii)
 973        return wxEmptyString;
 974
 975     size_t len = strlen(ascii);
 976     wxString res;
 977
 978     if ( len )
 979     {
 980         wxImplStringBuffer buf(res, len);
 981         wxStringCharType *dest = buf;
 982
 983         for ( ;; )
 984         {
 985             unsigned char c = (unsigned char)*ascii++;
 986             wxASSERT_MSG( c < 0x80,
 987                           _T("Non-ASCII value passed to FromAscii().") );
 988
 989             *dest++ = (wchar_t)c;
 990
 991             if ( c == '\0' )
 992                 break;
 993         }
 994     }
 995
 996     return res;
 997 }
 998
 999 wxString wxString::FromAscii(const char ascii)
1000 {
1001     // What do we do with '\0' ?
1002
1003     unsigned char c = (unsigned char)ascii;
1004
1005     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1006
1007     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1008     return wxString(wxUniChar((wchar_t)c));
1009 }
1010
1011 const wxCharBuffer wxString::ToAscii() const
1012 {
1013     // this will allocate enough space for the terminating NUL too
1014     wxCharBuffer buffer(length());
1015     char *dest = buffer.data();
1016
1017     for ( const_iterator i = begin(); i != end(); ++i )
1018     {
1019         wxUniChar c(*i);
1020         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1021         *dest++ = c.IsAscii() ? (char)c : '_';
1022
1023         // the output string can't have embedded NULs anyhow, so we can safely
1024         // stop at first of them even if we do have any
1025         if ( !c )
1026             break;
1027     }
1028
1029     return buffer;
1030 }
1031
1032 #endif // wxUSE_UNICODE
1033
1034 // extract string of length nCount starting at nFirst
1035 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1036 {
1037     size_t nLen = length();
1038
1039     // default value of nCount is npos and means "till the end"
1040     if ( nCount == npos )
1041     {
1042         nCount = nLen - nFirst;
1043     }
1044
1045     // out-of-bounds requests return sensible things
1046     if ( nFirst + nCount > nLen )
1047     {
1048         nCount = nLen - nFirst;
1049     }
1050
1051     if ( nFirst > nLen )
1052     {
1053         // AllocCopy() will return empty string
1054         return wxEmptyString;
1055     }
1056
1057     wxString dest(*this, nFirst, nCount);
1058     if ( dest.length() != nCount )
1059     {
1060         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1061     }
1062
1063     return dest;
1064 }
1065
1066 // check that the string starts with prefix and return the rest of the string
1067 // in the provided pointer if it is not NULL, otherwise return false
1068 bool wxString::StartsWith(const wxChar *prefix, wxString *rest) const
1069 {
1070     wxASSERT_MSG( prefix, _T("invalid parameter in wxString::StartsWith") );
1071
1072     // first check if the beginning of the string matches the prefix: note
1073     // that we don't have to check that we don't run out of this string as
1074     // when we reach the terminating NUL, either prefix string ends too (and
1075     // then it's ok) or we break out of the loop because there is no match
1076     const wxChar *p = c_str();
1077     while ( *prefix )
1078     {
1079         if ( *prefix++ != *p++ )
1080         {
1081             // no match
1082             return false;
1083         }
1084     }
1085
1086     if ( rest )
1087     {
1088         // put the rest of the string into provided pointer
1089         *rest = p;
1090     }
1091
1092     return true;
1093 }
1094
1095
1096 // check that the string ends with suffix and return the rest of it in the
1097 // provided pointer if it is not NULL, otherwise return false
1098 bool wxString::EndsWith(const wxChar *suffix, wxString *rest) const
1099 {
1100     wxASSERT_MSG( suffix, _T("invalid parameter in wxString::EndssWith") );
1101
1102     int start = length() - wxStrlen(suffix);
1103
1104     if ( start < 0 || compare(start, npos, suffix) != 0 )
1105         return false;
1106
1107     if ( rest )
1108     {
1109         // put the rest of the string into provided pointer
1110         rest->assign(*this, 0, start);
1111     }
1112
1113     return true;
1114 }
1115
1116
1117 // extract nCount last (rightmost) characters
1118 wxString wxString::Right(size_t nCount) const
1119 {
1120   if ( nCount > length() )
1121     nCount = length();
1122
1123   wxString dest(*this, length() - nCount, nCount);
1124   if ( dest.length() != nCount ) {
1125     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1126   }
1127   return dest;
1128 }
1129
1130 // get all characters after the last occurence of ch
1131 // (returns the whole string if ch not found)
1132 wxString wxString::AfterLast(wxUniChar ch) const
1133 {
1134   wxString str;
1135   int iPos = Find(ch, true);
1136   if ( iPos == wxNOT_FOUND )
1137     str = *this;
1138   else
1139     str = wx_str() + iPos + 1;
1140
1141   return str;
1142 }
1143
1144 // extract nCount first (leftmost) characters
1145 wxString wxString::Left(size_t nCount) const
1146 {
1147   if ( nCount > length() )
1148     nCount = length();
1149
1150   wxString dest(*this, 0, nCount);
1151   if ( dest.length() != nCount ) {
1152     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1153   }
1154   return dest;
1155 }
1156
1157 // get all characters before the first occurence of ch
1158 // (returns the whole string if ch not found)
1159 wxString wxString::BeforeFirst(wxUniChar ch) const
1160 {
1161   int iPos = Find(ch);
1162   if ( iPos == wxNOT_FOUND ) iPos = length();
1163   return wxString(*this, 0, iPos);
1164 }
1165
1166 /// get all characters before the last occurence of ch
1167 /// (returns empty string if ch not found)
1168 wxString wxString::BeforeLast(wxUniChar ch) const
1169 {
1170   wxString str;
1171   int iPos = Find(ch, true);
1172   if ( iPos != wxNOT_FOUND && iPos != 0 )
1173     str = wxString(c_str(), iPos);
1174
1175   return str;
1176 }
1177
1178 /// get all characters after the first occurence of ch
1179 /// (returns empty string if ch not found)
1180 wxString wxString::AfterFirst(wxUniChar ch) const
1181 {
1182   wxString str;
1183   int iPos = Find(ch);
1184   if ( iPos != wxNOT_FOUND )
1185     str = wx_str() + iPos + 1;
1186
1187   return str;
1188 }
1189
1190 // replace first (or all) occurences of some substring with another one
1191 size_t wxString::Replace(const wxString& strOld,
1192                          const wxString& strNew, bool bReplaceAll)
1193 {
1194     // if we tried to replace an empty string we'd enter an infinite loop below
1195     wxCHECK_MSG( !strOld.empty(), 0,
1196                  _T("wxString::Replace(): invalid parameter") );
1197
1198     size_t uiCount = 0;   // count of replacements made
1199
1200     size_t uiOldLen = strOld.length();
1201     size_t uiNewLen = strNew.length();
1202
1203     size_t dwPos = 0;
1204
1205     while ( (*this)[dwPos] != wxT('\0') )
1206     {
1207         //DO NOT USE STRSTR HERE
1208         //this string can contain embedded null characters,
1209         //so strstr will function incorrectly
1210         dwPos = find(strOld, dwPos);
1211         if ( dwPos == npos )
1212             break;                  // exit the loop
1213         else
1214         {
1215             //replace this occurance of the old string with the new one
1216             replace(dwPos, uiOldLen, strNew, uiNewLen);
1217
1218             //move up pos past the string that was replaced
1219             dwPos += uiNewLen;
1220
1221             //increase replace count
1222             ++uiCount;
1223
1224             // stop now?
1225             if ( !bReplaceAll )
1226                 break;                  // exit the loop
1227         }
1228     }
1229
1230     return uiCount;
1231 }
1232
1233 bool wxString::IsAscii() const
1234 {
1235     for ( const_iterator i = begin(); i != end(); ++i )
1236     {
1237         if ( !(*i).IsAscii() )
1238             return false;
1239     }
1240
1241     return true;
1242 }
1243
1244 bool wxString::IsWord() const
1245 {
1246     for ( const_iterator i = begin(); i != end(); ++i )
1247     {
1248         if ( !wxIsalpha(*i) )
1249             return false;
1250     }
1251
1252     return true;
1253 }
1254
1255 bool wxString::IsNumber() const
1256 {
1257     if ( empty() )
1258         return true;
1259
1260     const_iterator i = begin();
1261
1262     if ( *i == _T('-') || *i == _T('+') )
1263         ++i;
1264
1265     for ( ; i != end(); ++i )
1266     {
1267         if ( !wxIsdigit(*i) )
1268             return false;
1269     }
1270
1271     return true;
1272 }
1273
1274 wxString wxString::Strip(stripType w) const
1275 {
1276     wxString s = *this;
1277     if ( w & leading ) s.Trim(false);
1278     if ( w & trailing ) s.Trim(true);
1279     return s;
1280 }
1281
1282 // ---------------------------------------------------------------------------
1283 // case conversion
1284 // ---------------------------------------------------------------------------
1285
1286 wxString& wxString::MakeUpper()
1287 {
1288   for ( iterator it = begin(), en = end(); it != en; ++it )
1289     *it = (wxChar)wxToupper(*it);
1290
1291   return *this;
1292 }
1293
1294 wxString& wxString::MakeLower()
1295 {
1296   for ( iterator it = begin(), en = end(); it != en; ++it )
1297     *it = (wxChar)wxTolower(*it);
1298
1299   return *this;
1300 }
1301
1302 // ---------------------------------------------------------------------------
1303 // trimming and padding
1304 // ---------------------------------------------------------------------------
1305
1306 // some compilers (VC++ 6.0 not to name them) return true for a call to
1307 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1308 // live with this by checking that the character is a 7 bit one - even if this
1309 // may fail to detect some spaces (I don't know if Unicode doesn't have
1310 // space-like symbols somewhere except in the first 128 chars), it is arguably
1311 // still better than trimming away accented letters
1312 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1313
1314 // trims spaces (in the sense of isspace) from left or right side
1315 wxString& wxString::Trim(bool bFromRight)
1316 {
1317     // first check if we're going to modify the string at all
1318     if ( !empty() &&
1319          (
1320           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1321           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1322          )
1323        )
1324     {
1325         if ( bFromRight )
1326         {
1327             // find last non-space character
1328             reverse_iterator psz = rbegin();
1329             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1330                 psz++;
1331
1332             // truncate at trailing space start
1333             erase(psz.base(), end());
1334         }
1335         else
1336         {
1337             // find first non-space character
1338             iterator psz = begin();
1339             while ( (psz != end()) && wxSafeIsspace(*psz) )
1340                 psz++;
1341
1342             // fix up data and length
1343             erase(begin(), psz);
1344         }
1345     }
1346
1347     return *this;
1348 }
1349
1350 // adds nCount characters chPad to the string from either side
1351 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1352 {
1353     wxString s(chPad, nCount);
1354
1355     if ( bFromRight )
1356         *this += s;
1357     else
1358     {
1359         s += *this;
1360         swap(s);
1361     }
1362
1363     return *this;
1364 }
1365
1366 // truncate the string
1367 wxString& wxString::Truncate(size_t uiLen)
1368 {
1369     if ( uiLen < length() )
1370     {
1371         erase(begin() + uiLen, end());
1372     }
1373     //else: nothing to do, string is already short enough
1374
1375     return *this;
1376 }
1377
1378 // ---------------------------------------------------------------------------
1379 // finding (return wxNOT_FOUND if not found and index otherwise)
1380 // ---------------------------------------------------------------------------
1381
1382 // find a character
1383 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1384 {
1385     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1386
1387     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1388 }
1389
1390 // ----------------------------------------------------------------------------
1391 // conversion to numbers
1392 // ----------------------------------------------------------------------------
1393
1394 // the implementation of all the functions below is exactly the same so factor
1395 // it out
1396
1397 template <typename T, typename F>
1398 bool wxStringToIntType(const wxChar *start,
1399                        T *val,
1400                        int base,
1401                        F func)
1402 {
1403     wxCHECK_MSG( val, false, _T("NULL output pointer") );
1404     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1405
1406 #ifndef __WXWINCE__
1407     errno = 0;
1408 #endif
1409
1410     wxChar *end;
1411     *val = (*func)(start, &end, base);
1412
1413     // return true only if scan was stopped by the terminating NUL and if the
1414     // string was not empty to start with and no under/overflow occurred
1415     return !*end && (end != start)
1416 #ifndef __WXWINCE__
1417         && (errno != ERANGE)
1418 #endif
1419     ;
1420 }
1421
1422 bool wxString::ToLong(long *val, int base) const
1423 {
1424     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtol);
1425 }
1426
1427 bool wxString::ToULong(unsigned long *val, int base) const
1428 {
1429     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoul);
1430 }
1431
1432 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1433 {
1434     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoll);
1435 }
1436
1437 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1438 {
1439     return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoull);
1440 }
1441
1442 bool wxString::ToDouble(double *val) const
1443 {
1444     wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1445
1446 #ifndef __WXWINCE__
1447     errno = 0;
1448 #endif
1449
1450     const wxChar *start = c_str();
1451     wxChar *end;
1452     *val = wxStrtod(start, &end);
1453
1454     // return true only if scan was stopped by the terminating NUL and if the
1455     // string was not empty to start with and no under/overflow occurred
1456     return !*end && (end != start)
1457 #ifndef __WXWINCE__
1458         && (errno != ERANGE)
1459 #endif
1460     ;
1461 }
1462
1463 // ---------------------------------------------------------------------------
1464 // formatted output
1465 // ---------------------------------------------------------------------------
1466
1467 #if !wxUSE_UTF8_LOCALE_ONLY
1468 /* static */
1469 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1470 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1471 #else
1472 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1473 #endif
1474 {
1475     va_list argptr;
1476     va_start(argptr, format);
1477
1478     wxString s;
1479     s.PrintfV(format, argptr);
1480
1481     va_end(argptr);
1482
1483     return s;
1484 }
1485 #endif // !wxUSE_UTF8_LOCALE_ONLY
1486
1487 #if wxUSE_UNICODE_UTF8
1488 /* static */
1489 wxString wxString::DoFormatUtf8(const char *format, ...)
1490 {
1491     va_list argptr;
1492     va_start(argptr, format);
1493
1494     wxString s;
1495     s.PrintfV(format, argptr);
1496
1497     va_end(argptr);
1498
1499     return s;
1500 }
1501 #endif // wxUSE_UNICODE_UTF8
1502
1503 /* static */
1504 wxString wxString::FormatV(const wxString& format, va_list argptr)
1505 {
1506     wxString s;
1507     s.PrintfV(format, argptr);
1508     return s;
1509 }
1510
1511 #if !wxUSE_UTF8_LOCALE_ONLY
1512 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1513 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1514 #else
1515 int wxString::DoPrintfWchar(const wxChar *format, ...)
1516 #endif
1517 {
1518     va_list argptr;
1519     va_start(argptr, format);
1520
1521 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1522     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1523     // because it's the only cast that works safely for downcasting when
1524     // multiple inheritance is used:
1525     wxString *str = static_cast<wxString*>(this);
1526 #else
1527     wxString *str = this;
1528 #endif
1529
1530     int iLen = str->PrintfV(format, argptr);
1531
1532     va_end(argptr);
1533
1534     return iLen;
1535 }
1536 #endif // !wxUSE_UTF8_LOCALE_ONLY
1537
1538 #if wxUSE_UNICODE_UTF8
1539 int wxString::DoPrintfUtf8(const char *format, ...)
1540 {
1541     va_list argptr;
1542     va_start(argptr, format);
1543
1544     int iLen = PrintfV(format, argptr);
1545
1546     va_end(argptr);
1547
1548     return iLen;
1549 }
1550 #endif // wxUSE_UNICODE_UTF8
1551
1552 #if wxUSE_UNICODE_UTF8
1553 template<typename BufferType>
1554 #else
1555 // we only need one version in non-UTF8 builds and at least two Windows
1556 // compilers have problems with this function template, so use just one
1557 // normal function here
1558 #endif
1559 static int DoStringPrintfV(wxString& str,
1560                            const wxString& format, va_list argptr)
1561 {
1562     int size = 1024;
1563
1564     for ( ;; )
1565     {
1566 #if wxUSE_UNICODE_UTF8
1567         BufferType tmp(str, size + 1);
1568         typename BufferType::CharType *buf = tmp;
1569 #else
1570         wxStringBuffer tmp(str, size + 1);
1571         wxChar *buf = tmp;
1572 #endif
1573
1574         if ( !buf )
1575         {
1576             // out of memory
1577             return -1;
1578         }
1579
1580         // wxVsnprintf() may modify the original arg pointer, so pass it
1581         // only a copy
1582         va_list argptrcopy;
1583         wxVaCopy(argptrcopy, argptr);
1584         int len = wxVsnprintf(buf, size, format, argptrcopy);
1585         va_end(argptrcopy);
1586
1587         // some implementations of vsnprintf() don't NUL terminate
1588         // the string if there is not enough space for it so
1589         // always do it manually
1590         buf[size] = _T('\0');
1591
1592         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1593         // total number of characters which would have been written if the
1594         // buffer were large enough (newer standards such as Unix98)
1595         if ( len < 0 )
1596         {
1597 #if wxUSE_WXVSNPRINTF
1598             // we know that our own implementation of wxVsnprintf() returns -1
1599             // only for a format error - thus there's something wrong with
1600             // the user's format string
1601             return -1;
1602 #else // assume that system version only returns error if not enough space
1603             // still not enough, as we don't know how much we need, double the
1604             // current size of the buffer
1605             size *= 2;
1606 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1607         }
1608         else if ( len >= size )
1609         {
1610 #if wxUSE_WXVSNPRINTF
1611             // we know that our own implementation of wxVsnprintf() returns
1612             // size+1 when there's not enough space but that's not the size
1613             // of the required buffer!
1614             size *= 2;      // so we just double the current size of the buffer
1615 #else
1616             // some vsnprintf() implementations NUL-terminate the buffer and
1617             // some don't in len == size case, to be safe always add 1
1618             size = len + 1;
1619 #endif
1620         }
1621         else // ok, there was enough space
1622         {
1623             break;
1624         }
1625     }
1626
1627     // we could have overshot
1628     str.Shrink();
1629
1630     return str.length();
1631 }
1632
1633 int wxString::PrintfV(const wxString& format, va_list argptr)
1634 {
1635     va_list argcopy;
1636     wxVaCopy(argcopy, argptr);
1637
1638 #if wxUSE_UNICODE_UTF8
1639     #if wxUSE_STL_BASED_WXSTRING
1640         typedef wxStringTypeBuffer<char> Utf8Buffer;
1641     #else
1642         typedef wxImplStringBuffer Utf8Buffer;
1643     #endif
1644 #endif
1645
1646 #if wxUSE_UTF8_LOCALE_ONLY
1647     return DoStringPrintfV<Utf8Buffer>(*this, format, argcopy);
1648 #else
1649     #if wxUSE_UNICODE_UTF8
1650     if ( wxLocaleIsUtf8 )
1651         return DoStringPrintfV<Utf8Buffer>(*this, format, argcopy);
1652     else
1653         // wxChar* version
1654         return DoStringPrintfV<wxStringBuffer>(*this, format, argcopy);
1655     #else
1656         return DoStringPrintfV(*this, format, argcopy);
1657     #endif // UTF8/WCHAR
1658 #endif
1659 }
1660
1661 // ----------------------------------------------------------------------------
1662 // misc other operations
1663 // ----------------------------------------------------------------------------
1664
1665 // returns true if the string matches the pattern which may contain '*' and
1666 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1667 // of them)
1668 bool wxString::Matches(const wxString& mask) const
1669 {
1670     // I disable this code as it doesn't seem to be faster (in fact, it seems
1671     // to be much slower) than the old, hand-written code below and using it
1672     // here requires always linking with libregex even if the user code doesn't
1673     // use it
1674 #if 0 // wxUSE_REGEX
1675     // first translate the shell-like mask into a regex
1676     wxString pattern;
1677     pattern.reserve(wxStrlen(pszMask));
1678
1679     pattern += _T('^');
1680     while ( *pszMask )
1681     {
1682         switch ( *pszMask )
1683         {
1684             case _T('?'):
1685                 pattern += _T('.');
1686                 break;
1687
1688             case _T('*'):
1689                 pattern += _T(".*");
1690                 break;
1691
1692             case _T('^'):
1693             case _T('.'):
1694             case _T('$'):
1695             case _T('('):
1696             case _T(')'):
1697             case _T('|'):
1698             case _T('+'):
1699             case _T('\\'):
1700                 // these characters are special in a RE, quote them
1701                 // (however note that we don't quote '[' and ']' to allow
1702                 // using them for Unix shell like matching)
1703                 pattern += _T('\\');
1704                 // fall through
1705
1706             default:
1707                 pattern += *pszMask;
1708         }
1709
1710         pszMask++;
1711     }
1712     pattern += _T('$');
1713
1714     // and now use it
1715     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1716 #else // !wxUSE_REGEX
1717   // TODO: this is, of course, awfully inefficient...
1718
1719   // FIXME-UTF8: implement using iterators, remove #if
1720 #if wxUSE_UNICODE_UTF8
1721   wxWCharBuffer maskBuf = mask.wc_str();
1722   wxWCharBuffer txtBuf = wc_str();
1723   const wxChar *pszMask = maskBuf.data();
1724   const wxChar *pszTxt = txtBuf.data();
1725 #else
1726   const wxChar *pszMask = mask.wx_str();
1727   // the char currently being checked
1728   const wxChar *pszTxt = wx_str();
1729 #endif
1730
1731   // the last location where '*' matched
1732   const wxChar *pszLastStarInText = NULL;
1733   const wxChar *pszLastStarInMask = NULL;
1734
1735 match:
1736   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1737     switch ( *pszMask ) {
1738       case wxT('?'):
1739         if ( *pszTxt == wxT('\0') )
1740           return false;
1741
1742         // pszTxt and pszMask will be incremented in the loop statement
1743
1744         break;
1745
1746       case wxT('*'):
1747         {
1748           // remember where we started to be able to backtrack later
1749           pszLastStarInText = pszTxt;
1750           pszLastStarInMask = pszMask;
1751
1752           // ignore special chars immediately following this one
1753           // (should this be an error?)
1754           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1755             pszMask++;
1756
1757           // if there is nothing more, match
1758           if ( *pszMask == wxT('\0') )
1759             return true;
1760
1761           // are there any other metacharacters in the mask?
1762           size_t uiLenMask;
1763           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1764
1765           if ( pEndMask != NULL ) {
1766             // we have to match the string between two metachars
1767             uiLenMask = pEndMask - pszMask;
1768           }
1769           else {
1770             // we have to match the remainder of the string
1771             uiLenMask = wxStrlen(pszMask);
1772           }
1773
1774           wxString strToMatch(pszMask, uiLenMask);
1775           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1776           if ( pMatch == NULL )
1777             return false;
1778
1779           // -1 to compensate "++" in the loop
1780           pszTxt = pMatch + uiLenMask - 1;
1781           pszMask += uiLenMask - 1;
1782         }
1783         break;
1784
1785       default:
1786         if ( *pszMask != *pszTxt )
1787           return false;
1788         break;
1789     }
1790   }
1791
1792   // match only if nothing left
1793   if ( *pszTxt == wxT('\0') )
1794     return true;
1795
1796   // if we failed to match, backtrack if we can
1797   if ( pszLastStarInText ) {
1798     pszTxt = pszLastStarInText + 1;
1799     pszMask = pszLastStarInMask;
1800
1801     pszLastStarInText = NULL;
1802
1803     // don't bother resetting pszLastStarInMask, it's unnecessary
1804
1805     goto match;
1806   }
1807
1808   return false;
1809 #endif // wxUSE_REGEX/!wxUSE_REGEX
1810 }
1811
1812 // Count the number of chars
1813 int wxString::Freq(wxUniChar ch) const
1814 {
1815     int count = 0;
1816     for ( const_iterator i = begin(); i != end(); ++i )
1817     {
1818         if ( *i == ch )
1819             count ++;
1820     }
1821     return count;
1822 }
1823
1824 // convert to upper case, return the copy of the string
1825 wxString wxString::Upper() const
1826 { wxString s(*this); return s.MakeUpper(); }
1827
1828 // convert to lower case, return the copy of the string
1829 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }