src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #include "wx/hashmap.h"
  39
  40 // string handling functions used by wxString:
  41 #if wxUSE_UNICODE_UTF8
  42     #define wxStringMemcpy   memcpy
  43     #define wxStringMemcmp   memcmp
  44     #define wxStringMemchr   memchr
  45     #define wxStringStrlen   strlen
  46 #else
  47     #define wxStringMemcpy   wxTmemcpy
  48     #define wxStringMemcmp   wxTmemcmp
  49     #define wxStringMemchr   wxTmemchr
  50     #define wxStringStrlen   wxStrlen
  51 #endif
  52
  53
  54 // ---------------------------------------------------------------------------
  55 // static class variables definition
  56 // ---------------------------------------------------------------------------
  57
  58 //According to STL _must_ be a -1 size_t
  59 const size_t wxString::npos = (size_t) -1;
  60
  61 // ----------------------------------------------------------------------------
  62 // global functions
  63 // ----------------------------------------------------------------------------
  64
  65 #if wxUSE_STD_IOSTREAM
  66
  67 #include <iostream>
  68
  69 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
  70 {
  71 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
  72     return os << (const char *)str.AsCharBuf();
  73 #else
  74     return os << str.AsInternal();
  75 #endif
  76 }
  77
  78 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
  79 {
  80     return os << str.c_str();
  81 }
  82
  83 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
  84 {
  85     return os << str.data();
  86 }
  87
  88 #ifndef __BORLANDC__
  89 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
  90 {
  91     return os << str.data();
  92 }
  93 #endif
  94
  95 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
  96
  97 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
  98 {
  99     return wos << str.wc_str();
 100 }
 101
 102 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
 103 {
 104     return wos << str.AsWChar();
 105 }
 106
 107 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
 108 {
 109     return wos << str.data();
 110 }
 111
 112 #endif  // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 113
 114 #endif // wxUSE_STD_IOSTREAM
 115
 116 // ===========================================================================
 117 // wxString class core
 118 // ===========================================================================
 119
 120 #if wxUSE_UNICODE_UTF8
 121
 122 void wxString::PosLenToImpl(size_t pos, size_t len,
 123                             size_t *implPos, size_t *implLen) const
 124 {
 125     if ( pos == npos )
 126         *implPos = npos;
 127     else
 128     {
 129         const_iterator i = begin() + pos;
 130         *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
 131         if ( len == npos )
 132             *implLen = npos;
 133         else
 134         {
 135             // too large length is interpreted as "to the end of the string"
 136             // FIXME-UTF8: verify this is the case in std::string, assert
 137             // otherwise
 138             if ( pos + len > length() )
 139                 len = length() - pos;
 140
 141             *implLen = (i + len).impl() - i.impl();
 142         }
 143     }
 144 }
 145
 146 #endif // wxUSE_UNICODE_UTF8
 147
 148 // ----------------------------------------------------------------------------
 149 // wxCStrData converted strings caching
 150 // ----------------------------------------------------------------------------
 151
 152 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 153 //             string objects; re-enable after fixing this bug and benchmarking
 154 //             performance to see if using a hash is a good idea at all
 155 #if 0
 156
 157 // For backward compatibility reasons, it must be possible to assign the value
 158 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 159 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 160 // because the memory would be freed immediately, but it has to be valid as long
 161 // as the string is not modified, so that code like this still works:
 162 //
 163 // const wxChar *s = str.c_str();
 164 // while ( s ) { ... }
 165
 166 // FIXME-UTF8: not thread safe!
 167 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 168 //             destroyed, but we should do it when the string is modified, to
 169 //             keep memory usage down
 170 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 171 //             invalidated the cache on every change, we could keep the previous
 172 //             conversion
 173 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 174 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 175
 176 template<typename T>
 177 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 178 {
 179     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 180     if ( i != hash.end() )
 181     {
 182         free(i->second);
 183         hash.erase(i);
 184     }
 185 }
 186
 187 #if wxUSE_UNICODE
 188 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 189 //     so we have to use wxString* here and const-cast when used
 190 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 191                     wxStringCharConversionCache);
 192 static wxStringCharConversionCache gs_stringsCharCache;
 193
 194 const char* wxCStrData::AsChar() const
 195 {
 196     // remove previously cache value, if any (see FIXMEs above):
 197     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 198
 199     // convert the string and keep it:
 200     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 201         m_str->mb_str().release();
 202
 203     return s + m_offset;
 204 }
 205 #endif // wxUSE_UNICODE
 206
 207 #if !wxUSE_UNICODE_WCHAR
 208 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 209                     wxStringWCharConversionCache);
 210 static wxStringWCharConversionCache gs_stringsWCharCache;
 211
 212 const wchar_t* wxCStrData::AsWChar() const
 213 {
 214     // remove previously cache value, if any (see FIXMEs above):
 215     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 216
 217     // convert the string and keep it:
 218     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 219         m_str->wc_str().release();
 220
 221     return s + m_offset;
 222 }
 223 #endif // !wxUSE_UNICODE_WCHAR
 224
 225 wxString::~wxString()
 226 {
 227 #if wxUSE_UNICODE
 228     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 229     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 230 #endif
 231 #if !wxUSE_UNICODE_WCHAR
 232     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 233 #endif
 234 }
 235 #endif
 236
 237 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 238 const char* wxCStrData::AsChar() const
 239 {
 240 #if wxUSE_UNICODE_UTF8
 241     if ( wxLocaleIsUtf8 )
 242         return AsInternal();
 243 #endif
 244     // under non-UTF8 locales, we have to convert the internal UTF-8
 245     // representation using wxConvLibc and cache the result
 246
 247     wxString *str = wxConstCast(m_str, wxString);
 248
 249     // convert the string:
 250     //
 251     // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
 252     //             have it) but it's unfortunately not obvious to implement
 253     //             because we don't know how big buffer do we need for the
 254     //             given string length (in case of multibyte encodings, e.g.
 255     //             ISO-2022-JP or UTF-8 when internal representation is wchar_t)
 256     //
 257     //             One idea would be to store more than just m_convertedToChar
 258     //             in wxString: then we could record the length of the string
 259     //             which was converted the last time and try to reuse the same
 260     //             buffer if the current length is not greater than it (this
 261     //             could still fail because string could have been modified in
 262     //             place but it would work most of the time, so we'd do it and
 263     //             only allocate the new buffer if in-place conversion returned
 264     //             an error). We could also store a bit saying if the string
 265     //             was modified since the last conversion (and update it in all
 266     //             operation modifying the string, of course) to avoid unneeded
 267     //             consequential conversions. But both of these ideas require
 268     //             adding more fields to wxString and require profiling results
 269     //             to be sure that we really gain enough from them to justify
 270     //             doing it.
 271     wxCharBuffer buf(str->mb_str());
 272
 273     // if it failed, return empty string and not NULL to avoid crashes in code
 274     // written with either wxWidgets 2 wxString or std::string behaviour in
 275     // mind: neither of them ever returns NULL and so we shouldn't neither
 276     if ( !buf )
 277         return "";
 278
 279     if ( str->m_convertedToChar &&
 280          strlen(buf) == strlen(str->m_convertedToChar) )
 281     {
 282         // keep the same buffer for as long as possible, so that several calls
 283         // to c_str() in a row still work:
 284         strcpy(str->m_convertedToChar, buf);
 285     }
 286     else
 287     {
 288         str->m_convertedToChar = buf.release();
 289     }
 290
 291     // and keep it:
 292     return str->m_convertedToChar + m_offset;
 293 }
 294 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 295
 296 #if !wxUSE_UNICODE_WCHAR
 297 const wchar_t* wxCStrData::AsWChar() const
 298 {
 299     wxString *str = wxConstCast(m_str, wxString);
 300
 301     // convert the string:
 302     wxWCharBuffer buf(str->wc_str());
 303
 304     // notice that here, unlike above in AsChar(), conversion can't fail as our
 305     // internal UTF-8 is always well-formed -- or the string was corrupted and
 306     // all bets are off anyhow
 307
 308     // FIXME-UTF8: do the conversion in-place in the existing buffer
 309     if ( str->m_convertedToWChar &&
 310          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 311     {
 312         // keep the same buffer for as long as possible, so that several calls
 313         // to c_str() in a row still work:
 314         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 315     }
 316     else
 317     {
 318         str->m_convertedToWChar = buf.release();
 319     }
 320
 321     // and keep it:
 322     return str->m_convertedToWChar + m_offset;
 323 }
 324 #endif // !wxUSE_UNICODE_WCHAR
 325
 326 // ===========================================================================
 327 // wxString class core
 328 // ===========================================================================
 329
 330 // ---------------------------------------------------------------------------
 331 // construction and conversion
 332 // ---------------------------------------------------------------------------
 333
 334 #if wxUSE_UNICODE_WCHAR
 335 /* static */
 336 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 337                                                const wxMBConv& conv)
 338 {
 339     // anything to do?
 340     if ( !psz || nLength == 0 )
 341         return SubstrBufFromMB(L"", 0);
 342
 343     if ( nLength == npos )
 344         nLength = wxNO_LEN;
 345
 346     size_t wcLen;
 347     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 348     if ( !wcLen )
 349         return SubstrBufFromMB(_T(""), 0);
 350     else
 351         return SubstrBufFromMB(wcBuf, wcLen);
 352 }
 353 #endif // wxUSE_UNICODE_WCHAR
 354
 355 #if wxUSE_UNICODE_UTF8
 356 /* static */
 357 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 358                                                const wxMBConv& conv)
 359 {
 360     // anything to do?
 361     if ( !psz || nLength == 0 )
 362         return SubstrBufFromMB("", 0);
 363
 364     // if psz is already in UTF-8, we don't have to do the roundtrip to
 365     // wchar_t* and back:
 366     if ( conv.IsUTF8() )
 367     {
 368         // we need to validate the input because UTF8 iterators assume valid
 369         // UTF-8 sequence and psz may be invalid:
 370         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 371         {
 372             return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
 373         }
 374         // else: do the roundtrip through wchar_t*
 375     }
 376
 377     if ( nLength == npos )
 378         nLength = wxNO_LEN;
 379
 380     // first convert to wide string:
 381     size_t wcLen;
 382     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 383     if ( !wcLen )
 384         return SubstrBufFromMB("", 0);
 385
 386     // and then to UTF-8:
 387     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
 388     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 389     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 390
 391     return buf;
 392 }
 393 #endif // wxUSE_UNICODE_UTF8
 394
 395 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 396 /* static */
 397 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 398                                                const wxMBConv& conv)
 399 {
 400     // anything to do?
 401     if ( !pwz || nLength == 0 )
 402         return SubstrBufFromWC("", 0);
 403
 404     if ( nLength == npos )
 405         nLength = wxNO_LEN;
 406
 407     size_t mbLen;
 408     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 409     if ( !mbLen )
 410         return SubstrBufFromWC("", 0);
 411     else
 412         return SubstrBufFromWC(mbBuf, mbLen);
 413 }
 414 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 415
 416
 417 #if wxUSE_UNICODE_WCHAR
 418
 419 //Convert wxString in Unicode mode to a multi-byte string
 420 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 421 {
 422     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 423 }
 424
 425 #elif wxUSE_UNICODE_UTF8
 426
 427 const wxWCharBuffer wxString::wc_str() const
 428 {
 429     return wxMBConvStrictUTF8().cMB2WC
 430                                 (
 431                                     m_impl.c_str(),
 432                                     m_impl.length() + 1, // size, not length
 433                                     NULL
 434                                 );
 435 }
 436
 437 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 438 {
 439     if ( conv.IsUTF8() )
 440         return wxCharBuffer::CreateNonOwned(m_impl.c_str());
 441
 442     // FIXME-UTF8: use wc_str() here once we have buffers with length
 443
 444     size_t wcLen;
 445     wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
 446                                              (
 447                                                 m_impl.c_str(),
 448                                                 m_impl.length() + 1, // size
 449                                                 &wcLen
 450                                              ));
 451     if ( !wcLen )
 452         return wxCharBuffer("");
 453
 454     return conv.cWC2MB(wcBuf, wcLen+1, NULL);
 455 }
 456
 457 #else // ANSI
 458
 459 //Converts this string to a wide character string if unicode
 460 //mode is not enabled and wxUSE_WCHAR_T is enabled
 461 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 462 {
 463     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 464 }
 465
 466 #endif // Unicode/ANSI
 467
 468 // shrink to minimal size (releasing extra memory)
 469 bool wxString::Shrink()
 470 {
 471   wxString tmp(begin(), end());
 472   swap(tmp);
 473   return tmp.length() == length();
 474 }
 475
 476 // deprecated compatibility code:
 477 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 478 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 479 {
 480     return DoGetWriteBuf(nLen);
 481 }
 482
 483 void wxString::UngetWriteBuf()
 484 {
 485     DoUngetWriteBuf();
 486 }
 487
 488 void wxString::UngetWriteBuf(size_t nLen)
 489 {
 490     DoUngetWriteBuf(nLen);
 491 }
 492 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 493
 494
 495 // ---------------------------------------------------------------------------
 496 // data access
 497 // ---------------------------------------------------------------------------
 498
 499 // all functions are inline in string.h
 500
 501 // ---------------------------------------------------------------------------
 502 // concatenation operators
 503 // ---------------------------------------------------------------------------
 504
 505 /*
 506  * concatenation functions come in 5 flavours:
 507  *  string + string
 508  *  char   + string      and      string + char
 509  *  C str  + string      and      string + C str
 510  */
 511
 512 wxString operator+(const wxString& str1, const wxString& str2)
 513 {
 514 #if !wxUSE_STL_BASED_WXSTRING
 515     wxASSERT( str1.IsValid() );
 516     wxASSERT( str2.IsValid() );
 517 #endif
 518
 519     wxString s = str1;
 520     s += str2;
 521
 522     return s;
 523 }
 524
 525 wxString operator+(const wxString& str, wxUniChar ch)
 526 {
 527 #if !wxUSE_STL_BASED_WXSTRING
 528     wxASSERT( str.IsValid() );
 529 #endif
 530
 531     wxString s = str;
 532     s += ch;
 533
 534     return s;
 535 }
 536
 537 wxString operator+(wxUniChar ch, const wxString& str)
 538 {
 539 #if !wxUSE_STL_BASED_WXSTRING
 540     wxASSERT( str.IsValid() );
 541 #endif
 542
 543     wxString s = ch;
 544     s += str;
 545
 546     return s;
 547 }
 548
 549 wxString operator+(const wxString& str, const char *psz)
 550 {
 551 #if !wxUSE_STL_BASED_WXSTRING
 552     wxASSERT( str.IsValid() );
 553 #endif
 554
 555     wxString s;
 556     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 557         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 558     }
 559     s += str;
 560     s += psz;
 561
 562     return s;
 563 }
 564
 565 wxString operator+(const wxString& str, const wchar_t *pwz)
 566 {
 567 #if !wxUSE_STL_BASED_WXSTRING
 568     wxASSERT( str.IsValid() );
 569 #endif
 570
 571     wxString s;
 572     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 573         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 574     }
 575     s += str;
 576     s += pwz;
 577
 578     return s;
 579 }
 580
 581 wxString operator+(const char *psz, const wxString& str)
 582 {
 583 #if !wxUSE_STL_BASED_WXSTRING
 584     wxASSERT( str.IsValid() );
 585 #endif
 586
 587     wxString s;
 588     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 589         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 590     }
 591     s = psz;
 592     s += str;
 593
 594     return s;
 595 }
 596
 597 wxString operator+(const wchar_t *pwz, const wxString& str)
 598 {
 599 #if !wxUSE_STL_BASED_WXSTRING
 600     wxASSERT( str.IsValid() );
 601 #endif
 602
 603     wxString s;
 604     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 605         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 606     }
 607     s = pwz;
 608     s += str;
 609
 610     return s;
 611 }
 612
 613 // ---------------------------------------------------------------------------
 614 // string comparison
 615 // ---------------------------------------------------------------------------
 616
 617 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 618 {
 619     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 620                                : wxToupper(GetChar(0u)) == wxToupper(c));
 621 }
 622
 623 #ifdef HAVE_STD_STRING_COMPARE
 624
 625 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 626 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 627 //     sort strings in characters code point order by sorting the byte sequence
 628 //     in byte values order (i.e. what strcmp() and memcmp() do).
 629
 630 int wxString::compare(const wxString& str) const
 631 {
 632     return m_impl.compare(str.m_impl);
 633 }
 634
 635 int wxString::compare(size_t nStart, size_t nLen,
 636                       const wxString& str) const
 637 {
 638     size_t pos, len;
 639     PosLenToImpl(nStart, nLen, &pos, &len);
 640     return m_impl.compare(pos, len, str.m_impl);
 641 }
 642
 643 int wxString::compare(size_t nStart, size_t nLen,
 644                       const wxString& str,
 645                       size_t nStart2, size_t nLen2) const
 646 {
 647     size_t pos, len;
 648     PosLenToImpl(nStart, nLen, &pos, &len);
 649
 650     size_t pos2, len2;
 651     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 652
 653     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 654 }
 655
 656 int wxString::compare(const char* sz) const
 657 {
 658     return m_impl.compare(ImplStr(sz));
 659 }
 660
 661 int wxString::compare(const wchar_t* sz) const
 662 {
 663     return m_impl.compare(ImplStr(sz));
 664 }
 665
 666 int wxString::compare(size_t nStart, size_t nLen,
 667                       const char* sz, size_t nCount) const
 668 {
 669     size_t pos, len;
 670     PosLenToImpl(nStart, nLen, &pos, &len);
 671
 672     SubstrBufFromMB str(ImplStr(sz, nCount));
 673
 674     return m_impl.compare(pos, len, str.data, str.len);
 675 }
 676
 677 int wxString::compare(size_t nStart, size_t nLen,
 678                       const wchar_t* sz, size_t nCount) const
 679 {
 680     size_t pos, len;
 681     PosLenToImpl(nStart, nLen, &pos, &len);
 682
 683     SubstrBufFromWC str(ImplStr(sz, nCount));
 684
 685     return m_impl.compare(pos, len, str.data, str.len);
 686 }
 687
 688 #else // !HAVE_STD_STRING_COMPARE
 689
 690 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 691                           const wxStringCharType* s2, size_t l2)
 692 {
 693     if( l1 == l2 )
 694         return wxStringMemcmp(s1, s2, l1);
 695     else if( l1 < l2 )
 696     {
 697         int ret = wxStringMemcmp(s1, s2, l1);
 698         return ret == 0 ? -1 : ret;
 699     }
 700     else
 701     {
 702         int ret = wxStringMemcmp(s1, s2, l2);
 703         return ret == 0 ? +1 : ret;
 704     }
 705 }
 706
 707 int wxString::compare(const wxString& str) const
 708 {
 709     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 710                      str.m_impl.data(), str.m_impl.length());
 711 }
 712
 713 int wxString::compare(size_t nStart, size_t nLen,
 714                       const wxString& str) const
 715 {
 716     wxASSERT(nStart <= length());
 717     size_type strLen = length() - nStart;
 718     nLen = strLen < nLen ? strLen : nLen;
 719
 720     size_t pos, len;
 721     PosLenToImpl(nStart, nLen, &pos, &len);
 722
 723     return ::wxDoCmp(m_impl.data() + pos,  len,
 724                      str.m_impl.data(), str.m_impl.length());
 725 }
 726
 727 int wxString::compare(size_t nStart, size_t nLen,
 728                       const wxString& str,
 729                       size_t nStart2, size_t nLen2) const
 730 {
 731     wxASSERT(nStart <= length());
 732     wxASSERT(nStart2 <= str.length());
 733     size_type strLen  =     length() - nStart,
 734               strLen2 = str.length() - nStart2;
 735     nLen  = strLen  < nLen  ? strLen  : nLen;
 736     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 737
 738     size_t pos, len;
 739     PosLenToImpl(nStart, nLen, &pos, &len);
 740     size_t pos2, len2;
 741     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 742
 743     return ::wxDoCmp(m_impl.data() + pos, len,
 744                      str.m_impl.data() + pos2, len2);
 745 }
 746
 747 int wxString::compare(const char* sz) const
 748 {
 749     SubstrBufFromMB str(ImplStr(sz, npos));
 750     if ( str.len == npos )
 751         str.len = wxStringStrlen(str.data);
 752     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 753 }
 754
 755 int wxString::compare(const wchar_t* sz) const
 756 {
 757     SubstrBufFromWC str(ImplStr(sz, npos));
 758     if ( str.len == npos )
 759         str.len = wxStringStrlen(str.data);
 760     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 761 }
 762
 763 int wxString::compare(size_t nStart, size_t nLen,
 764                       const char* sz, size_t nCount) const
 765 {
 766     wxASSERT(nStart <= length());
 767     size_type strLen = length() - nStart;
 768     nLen = strLen < nLen ? strLen : nLen;
 769
 770     size_t pos, len;
 771     PosLenToImpl(nStart, nLen, &pos, &len);
 772
 773     SubstrBufFromMB str(ImplStr(sz, nCount));
 774     if ( str.len == npos )
 775         str.len = wxStringStrlen(str.data);
 776
 777     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 778 }
 779
 780 int wxString::compare(size_t nStart, size_t nLen,
 781                       const wchar_t* sz, size_t nCount) const
 782 {
 783     wxASSERT(nStart <= length());
 784     size_type strLen = length() - nStart;
 785     nLen = strLen < nLen ? strLen : nLen;
 786
 787     size_t pos, len;
 788     PosLenToImpl(nStart, nLen, &pos, &len);
 789
 790     SubstrBufFromWC str(ImplStr(sz, nCount));
 791     if ( str.len == npos )
 792         str.len = wxStringStrlen(str.data);
 793
 794     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 795 }
 796
 797 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 798
 799
 800 // ---------------------------------------------------------------------------
 801 // find_{first,last}_[not]_of functions
 802 // ---------------------------------------------------------------------------
 803
 804 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 805
 806 // NB: All these functions are implemented  with the argument being wxChar*,
 807 //     i.e. widechar string in any Unicode build, even though native string
 808 //     representation is char* in the UTF-8 build. This is because we couldn't
 809 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 810
 811 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 812 {
 813     return find_first_of(sz, nStart, wxStrlen(sz));
 814 }
 815
 816 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 817 {
 818     return find_first_not_of(sz, nStart, wxStrlen(sz));
 819 }
 820
 821 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 822 {
 823     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 824
 825     size_t idx = nStart;
 826     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 827     {
 828         if ( wxTmemchr(sz, *i, n) )
 829             return idx;
 830     }
 831
 832     return npos;
 833 }
 834
 835 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 836 {
 837     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 838
 839     size_t idx = nStart;
 840     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 841     {
 842         if ( !wxTmemchr(sz, *i, n) )
 843             return idx;
 844     }
 845
 846     return npos;
 847 }
 848
 849
 850 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 851 {
 852     return find_last_of(sz, nStart, wxStrlen(sz));
 853 }
 854
 855 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 856 {
 857     return find_last_not_of(sz, nStart, wxStrlen(sz));
 858 }
 859
 860 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 861 {
 862     size_t len = length();
 863
 864     if ( nStart == npos )
 865     {
 866         nStart = len - 1;
 867     }
 868     else
 869     {
 870         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 871     }
 872
 873     size_t idx = nStart;
 874     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 875           i != rend(); --idx, ++i )
 876     {
 877         if ( wxTmemchr(sz, *i, n) )
 878             return idx;
 879     }
 880
 881     return npos;
 882 }
 883
 884 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 885 {
 886     size_t len = length();
 887
 888     if ( nStart == npos )
 889     {
 890         nStart = len - 1;
 891     }
 892     else
 893     {
 894         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 895     }
 896
 897     size_t idx = nStart;
 898     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 899           i != rend(); --idx, ++i )
 900     {
 901         if ( !wxTmemchr(sz, *i, n) )
 902             return idx;
 903     }
 904
 905     return npos;
 906 }
 907
 908 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
 909 {
 910     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 911
 912     size_t idx = nStart;
 913     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 914     {
 915         if ( *i != ch )
 916             return idx;
 917     }
 918
 919     return npos;
 920 }
 921
 922 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
 923 {
 924     size_t len = length();
 925
 926     if ( nStart == npos )
 927     {
 928         nStart = len - 1;
 929     }
 930     else
 931     {
 932         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 933     }
 934
 935     size_t idx = nStart;
 936     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 937           i != rend(); --idx, ++i )
 938     {
 939         if ( *i != ch )
 940             return idx;
 941     }
 942
 943     return npos;
 944 }
 945
 946 // the functions above were implemented for wchar_t* arguments in Unicode
 947 // build and char* in ANSI build; below are implementations for the other
 948 // version:
 949 #if wxUSE_UNICODE
 950     #define wxOtherCharType char
 951     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
 952 #else
 953     #define wxOtherCharType wchar_t
 954     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
 955 #endif
 956
 957 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
 958     { return find_first_of(STRCONV(sz), nStart); }
 959
 960 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
 961                                size_t n) const
 962     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
 963 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
 964     { return find_last_of(STRCONV(sz), nStart); }
 965 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
 966                               size_t n) const
 967     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
 968 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
 969     { return find_first_not_of(STRCONV(sz), nStart); }
 970 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
 971                                    size_t n) const
 972     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
 973 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
 974     { return find_last_not_of(STRCONV(sz), nStart); }
 975 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
 976                                   size_t n) const
 977     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
 978
 979 #undef wxOtherCharType
 980 #undef STRCONV
 981
 982 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 983
 984 // ===========================================================================
 985 // other common string functions
 986 // ===========================================================================
 987
 988 int wxString::CmpNoCase(const wxString& s) const
 989 {
 990     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
 991
 992     const_iterator i1 = begin();
 993     const_iterator end1 = end();
 994     const_iterator i2 = s.begin();
 995     const_iterator end2 = s.end();
 996
 997     for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
 998     {
 999         wxUniChar lower1 = (wxChar)wxTolower(*i1);
1000         wxUniChar lower2 = (wxChar)wxTolower(*i2);
1001         if ( lower1 != lower2 )
1002             return lower1 < lower2 ? -1 : 1;
1003     }
1004
1005     size_t len1 = length();
1006     size_t len2 = s.length();
1007
1008     if ( len1 < len2 )
1009         return -1;
1010     else if ( len1 > len2 )
1011         return 1;
1012     return 0;
1013 }
1014
1015
1016 #if wxUSE_UNICODE
1017
1018 #ifdef __MWERKS__
1019 #ifndef __SCHAR_MAX__
1020 #define __SCHAR_MAX__ 127
1021 #endif
1022 #endif
1023
1024 wxString wxString::FromAscii(const char *ascii, size_t len)
1025 {
1026     if (!ascii || len == 0)
1027        return wxEmptyString;
1028
1029     wxString res;
1030
1031     {
1032         wxStringInternalBuffer buf(res, len);
1033         wxStringCharType *dest = buf;
1034
1035         for ( ; len > 0; --len )
1036         {
1037             unsigned char c = (unsigned char)*ascii++;
1038             wxASSERT_MSG( c < 0x80,
1039                           _T("Non-ASCII value passed to FromAscii().") );
1040
1041             *dest++ = (wchar_t)c;
1042         }
1043     }
1044
1045     return res;
1046 }
1047
1048 wxString wxString::FromAscii(const char *ascii)
1049 {
1050     return FromAscii(ascii, wxStrlen(ascii));
1051 }
1052
1053 wxString wxString::FromAscii(char ascii)
1054 {
1055     // What do we do with '\0' ?
1056
1057     unsigned char c = (unsigned char)ascii;
1058
1059     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1060
1061     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1062     return wxString(wxUniChar((wchar_t)c));
1063 }
1064
1065 const wxCharBuffer wxString::ToAscii() const
1066 {
1067     // this will allocate enough space for the terminating NUL too
1068     wxCharBuffer buffer(length());
1069     char *dest = buffer.data();
1070
1071     for ( const_iterator i = begin(); i != end(); ++i )
1072     {
1073         wxUniChar c(*i);
1074         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1075         *dest++ = c.IsAscii() ? (char)c : '_';
1076
1077         // the output string can't have embedded NULs anyhow, so we can safely
1078         // stop at first of them even if we do have any
1079         if ( !c )
1080             break;
1081     }
1082
1083     return buffer;
1084 }
1085
1086 #endif // wxUSE_UNICODE
1087
1088 // extract string of length nCount starting at nFirst
1089 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1090 {
1091     size_t nLen = length();
1092
1093     // default value of nCount is npos and means "till the end"
1094     if ( nCount == npos )
1095     {
1096         nCount = nLen - nFirst;
1097     }
1098
1099     // out-of-bounds requests return sensible things
1100     if ( nFirst + nCount > nLen )
1101     {
1102         nCount = nLen - nFirst;
1103     }
1104
1105     if ( nFirst > nLen )
1106     {
1107         // AllocCopy() will return empty string
1108         return wxEmptyString;
1109     }
1110
1111     wxString dest(*this, nFirst, nCount);
1112     if ( dest.length() != nCount )
1113     {
1114         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1115     }
1116
1117     return dest;
1118 }
1119
1120 // check that the string starts with prefix and return the rest of the string
1121 // in the provided pointer if it is not NULL, otherwise return false
1122 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1123 {
1124     if ( compare(0, prefix.length(), prefix) != 0 )
1125         return false;
1126
1127     if ( rest )
1128     {
1129         // put the rest of the string into provided pointer
1130         rest->assign(*this, prefix.length(), npos);
1131     }
1132
1133     return true;
1134 }
1135
1136
1137 // check that the string ends with suffix and return the rest of it in the
1138 // provided pointer if it is not NULL, otherwise return false
1139 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1140 {
1141     int start = length() - suffix.length();
1142
1143     if ( start < 0 || compare(start, npos, suffix) != 0 )
1144         return false;
1145
1146     if ( rest )
1147     {
1148         // put the rest of the string into provided pointer
1149         rest->assign(*this, 0, start);
1150     }
1151
1152     return true;
1153 }
1154
1155
1156 // extract nCount last (rightmost) characters
1157 wxString wxString::Right(size_t nCount) const
1158 {
1159   if ( nCount > length() )
1160     nCount = length();
1161
1162   wxString dest(*this, length() - nCount, nCount);
1163   if ( dest.length() != nCount ) {
1164     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1165   }
1166   return dest;
1167 }
1168
1169 // get all characters after the last occurence of ch
1170 // (returns the whole string if ch not found)
1171 wxString wxString::AfterLast(wxUniChar ch) const
1172 {
1173   wxString str;
1174   int iPos = Find(ch, true);
1175   if ( iPos == wxNOT_FOUND )
1176     str = *this;
1177   else
1178     str = wx_str() + iPos + 1;
1179
1180   return str;
1181 }
1182
1183 // extract nCount first (leftmost) characters
1184 wxString wxString::Left(size_t nCount) const
1185 {
1186   if ( nCount > length() )
1187     nCount = length();
1188
1189   wxString dest(*this, 0, nCount);
1190   if ( dest.length() != nCount ) {
1191     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1192   }
1193   return dest;
1194 }
1195
1196 // get all characters before the first occurence of ch
1197 // (returns the whole string if ch not found)
1198 wxString wxString::BeforeFirst(wxUniChar ch) const
1199 {
1200   int iPos = Find(ch);
1201   if ( iPos == wxNOT_FOUND ) iPos = length();
1202   return wxString(*this, 0, iPos);
1203 }
1204
1205 /// get all characters before the last occurence of ch
1206 /// (returns empty string if ch not found)
1207 wxString wxString::BeforeLast(wxUniChar ch) const
1208 {
1209   wxString str;
1210   int iPos = Find(ch, true);
1211   if ( iPos != wxNOT_FOUND && iPos != 0 )
1212     str = wxString(c_str(), iPos);
1213
1214   return str;
1215 }
1216
1217 /// get all characters after the first occurence of ch
1218 /// (returns empty string if ch not found)
1219 wxString wxString::AfterFirst(wxUniChar ch) const
1220 {
1221   wxString str;
1222   int iPos = Find(ch);
1223   if ( iPos != wxNOT_FOUND )
1224     str = wx_str() + iPos + 1;
1225
1226   return str;
1227 }
1228
1229 // replace first (or all) occurences of some substring with another one
1230 size_t wxString::Replace(const wxString& strOld,
1231                          const wxString& strNew, bool bReplaceAll)
1232 {
1233     // if we tried to replace an empty string we'd enter an infinite loop below
1234     wxCHECK_MSG( !strOld.empty(), 0,
1235                  _T("wxString::Replace(): invalid parameter") );
1236
1237     size_t uiCount = 0;   // count of replacements made
1238
1239     size_t uiOldLen = strOld.length();
1240     size_t uiNewLen = strNew.length();
1241
1242     size_t dwPos = 0;
1243
1244     while ( (*this)[dwPos] != wxT('\0') )
1245     {
1246         //DO NOT USE STRSTR HERE
1247         //this string can contain embedded null characters,
1248         //so strstr will function incorrectly
1249         dwPos = find(strOld, dwPos);
1250         if ( dwPos == npos )
1251             break;                  // exit the loop
1252         else
1253         {
1254             //replace this occurance of the old string with the new one
1255             replace(dwPos, uiOldLen, strNew, uiNewLen);
1256
1257             //move up pos past the string that was replaced
1258             dwPos += uiNewLen;
1259
1260             //increase replace count
1261             ++uiCount;
1262
1263             // stop now?
1264             if ( !bReplaceAll )
1265                 break;                  // exit the loop
1266         }
1267     }
1268
1269     return uiCount;
1270 }
1271
1272 bool wxString::IsAscii() const
1273 {
1274     for ( const_iterator i = begin(); i != end(); ++i )
1275     {
1276         if ( !(*i).IsAscii() )
1277             return false;
1278     }
1279
1280     return true;
1281 }
1282
1283 bool wxString::IsWord() const
1284 {
1285     for ( const_iterator i = begin(); i != end(); ++i )
1286     {
1287         if ( !wxIsalpha(*i) )
1288             return false;
1289     }
1290
1291     return true;
1292 }
1293
1294 bool wxString::IsNumber() const
1295 {
1296     if ( empty() )
1297         return true;
1298
1299     const_iterator i = begin();
1300
1301     if ( *i == _T('-') || *i == _T('+') )
1302         ++i;
1303
1304     for ( ; i != end(); ++i )
1305     {
1306         if ( !wxIsdigit(*i) )
1307             return false;
1308     }
1309
1310     return true;
1311 }
1312
1313 wxString wxString::Strip(stripType w) const
1314 {
1315     wxString s = *this;
1316     if ( w & leading ) s.Trim(false);
1317     if ( w & trailing ) s.Trim(true);
1318     return s;
1319 }
1320
1321 // ---------------------------------------------------------------------------
1322 // case conversion
1323 // ---------------------------------------------------------------------------
1324
1325 wxString& wxString::MakeUpper()
1326 {
1327   for ( iterator it = begin(), en = end(); it != en; ++it )
1328     *it = (wxChar)wxToupper(*it);
1329
1330   return *this;
1331 }
1332
1333 wxString& wxString::MakeLower()
1334 {
1335   for ( iterator it = begin(), en = end(); it != en; ++it )
1336     *it = (wxChar)wxTolower(*it);
1337
1338   return *this;
1339 }
1340
1341 // ---------------------------------------------------------------------------
1342 // trimming and padding
1343 // ---------------------------------------------------------------------------
1344
1345 // some compilers (VC++ 6.0 not to name them) return true for a call to
1346 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1347 // to live with this by checking that the character is a 7 bit one - even if
1348 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1349 // space-like symbols somewhere except in the first 128 chars), it is arguably
1350 // still better than trimming away accented letters
1351 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1352
1353 // trims spaces (in the sense of isspace) from left or right side
1354 wxString& wxString::Trim(bool bFromRight)
1355 {
1356     // first check if we're going to modify the string at all
1357     if ( !empty() &&
1358          (
1359           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1360           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1361          )
1362        )
1363     {
1364         if ( bFromRight )
1365         {
1366             // find last non-space character
1367             reverse_iterator psz = rbegin();
1368             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1369                 ++psz;
1370
1371             // truncate at trailing space start
1372             erase(psz.base(), end());
1373         }
1374         else
1375         {
1376             // find first non-space character
1377             iterator psz = begin();
1378             while ( (psz != end()) && wxSafeIsspace(*psz) )
1379                 ++psz;
1380
1381             // fix up data and length
1382             erase(begin(), psz);
1383         }
1384     }
1385
1386     return *this;
1387 }
1388
1389 // adds nCount characters chPad to the string from either side
1390 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1391 {
1392     wxString s(chPad, nCount);
1393
1394     if ( bFromRight )
1395         *this += s;
1396     else
1397     {
1398         s += *this;
1399         swap(s);
1400     }
1401
1402     return *this;
1403 }
1404
1405 // truncate the string
1406 wxString& wxString::Truncate(size_t uiLen)
1407 {
1408     if ( uiLen < length() )
1409     {
1410         erase(begin() + uiLen, end());
1411     }
1412     //else: nothing to do, string is already short enough
1413
1414     return *this;
1415 }
1416
1417 // ---------------------------------------------------------------------------
1418 // finding (return wxNOT_FOUND if not found and index otherwise)
1419 // ---------------------------------------------------------------------------
1420
1421 // find a character
1422 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1423 {
1424     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1425
1426     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1427 }
1428
1429 // ----------------------------------------------------------------------------
1430 // conversion to numbers
1431 // ----------------------------------------------------------------------------
1432
1433 // The implementation of all the functions below is exactly the same so factor
1434 // it out. Note that number extraction works correctly on UTF-8 strings, so
1435 // we can use wxStringCharType and wx_str() for maximum efficiency.
1436
1437 #ifndef __WXWINCE__
1438     #define DO_IF_NOT_WINCE(x) x
1439 #else
1440     #define DO_IF_NOT_WINCE(x)
1441 #endif
1442
1443 #define WX_STRING_TO_INT_TYPE(out, base, func, T)                           \
1444     wxCHECK_MSG( out, false, _T("NULL output pointer") );                   \
1445     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );  \
1446                                                                             \
1447     DO_IF_NOT_WINCE( errno = 0; )                                           \
1448                                                                             \
1449     const wxStringCharType *start = wx_str();                               \
1450     wxStringCharType *end;                                                  \
1451     T val = func(start, &end, base);                                        \
1452                                                                             \
1453     /* return true only if scan was stopped by the terminating NUL and */   \
1454     /* if the string was not empty to start with and no under/overflow */   \
1455     /* occurred: */                                                         \
1456     if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )         \
1457         return false;                                                       \
1458     *out = val;                                                             \
1459     return true
1460
1461 bool wxString::ToLong(long *pVal, int base) const
1462 {
1463     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
1464 }
1465
1466 bool wxString::ToULong(unsigned long *pVal, int base) const
1467 {
1468     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
1469 }
1470
1471 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1472 {
1473     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
1474 }
1475
1476 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1477 {
1478     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
1479 }
1480
1481 bool wxString::ToDouble(double *pVal) const
1482 {
1483     wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
1484
1485     DO_IF_NOT_WINCE( errno = 0; )
1486
1487     const wxChar *start = c_str();
1488     wxChar *end;
1489     double val = wxStrtod(start, &end);
1490
1491     // return true only if scan was stopped by the terminating NUL and if the
1492     // string was not empty to start with and no under/overflow occurred
1493     if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1494         return false;
1495
1496     *pVal = val;
1497
1498     return true;
1499 }
1500
1501 // ---------------------------------------------------------------------------
1502 // formatted output
1503 // ---------------------------------------------------------------------------
1504
1505 #if !wxUSE_UTF8_LOCALE_ONLY
1506 /* static */
1507 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1508 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1509 #else
1510 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1511 #endif
1512 {
1513     va_list argptr;
1514     va_start(argptr, format);
1515
1516     wxString s;
1517     s.PrintfV(format, argptr);
1518
1519     va_end(argptr);
1520
1521     return s;
1522 }
1523 #endif // !wxUSE_UTF8_LOCALE_ONLY
1524
1525 #if wxUSE_UNICODE_UTF8
1526 /* static */
1527 wxString wxString::DoFormatUtf8(const char *format, ...)
1528 {
1529     va_list argptr;
1530     va_start(argptr, format);
1531
1532     wxString s;
1533     s.PrintfV(format, argptr);
1534
1535     va_end(argptr);
1536
1537     return s;
1538 }
1539 #endif // wxUSE_UNICODE_UTF8
1540
1541 /* static */
1542 wxString wxString::FormatV(const wxString& format, va_list argptr)
1543 {
1544     wxString s;
1545     s.PrintfV(format, argptr);
1546     return s;
1547 }
1548
1549 #if !wxUSE_UTF8_LOCALE_ONLY
1550 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1551 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1552 #else
1553 int wxString::DoPrintfWchar(const wxChar *format, ...)
1554 #endif
1555 {
1556     va_list argptr;
1557     va_start(argptr, format);
1558
1559 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1560     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1561     // because it's the only cast that works safely for downcasting when
1562     // multiple inheritance is used:
1563     wxString *str = static_cast<wxString*>(this);
1564 #else
1565     wxString *str = this;
1566 #endif
1567
1568     int iLen = str->PrintfV(format, argptr);
1569
1570     va_end(argptr);
1571
1572     return iLen;
1573 }
1574 #endif // !wxUSE_UTF8_LOCALE_ONLY
1575
1576 #if wxUSE_UNICODE_UTF8
1577 int wxString::DoPrintfUtf8(const char *format, ...)
1578 {
1579     va_list argptr;
1580     va_start(argptr, format);
1581
1582     int iLen = PrintfV(format, argptr);
1583
1584     va_end(argptr);
1585
1586     return iLen;
1587 }
1588 #endif // wxUSE_UNICODE_UTF8
1589
1590 /*
1591     Uses wxVsnprintf and places the result into the this string.
1592
1593     In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1594     it is vswprintf.  Due to a discrepancy between vsnprintf and vswprintf in
1595     the ISO C99 (and thus SUSv3) standard the return value for the case of
1596     an undersized buffer is inconsistent.  For conforming vsnprintf
1597     implementations the function must return the number of characters that
1598     would have been printed had the buffer been large enough.  For conforming
1599     vswprintf implementations the function must return a negative number
1600     and set errno.
1601
1602     What vswprintf sets errno to is undefined but Darwin seems to set it to
1603     EOVERFLOW.  The only expected errno are EILSEQ and EINVAL.  Both of
1604     those are defined in the standard and backed up by several conformance
1605     statements.  Note that ENOMEM mentioned in the manual page does not
1606     apply to swprintf, only wprintf and fwprintf.
1607
1608     Official manual page:
1609     http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1610
1611     Some conformance statements (AIX, Solaris):
1612     http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1613     http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1614
1615     Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1616     EILSEQ and EINVAL are specifically defined to mean the error is other than
1617     an undersized buffer and no other errno are defined we treat those two
1618     as meaning hard errors and everything else gets the old behavior which
1619     is to keep looping and increasing buffer size until the function succeeds.
1620
1621     In practice it's impossible to determine before compilation which behavior
1622     may be used.  The vswprintf function may have vsnprintf-like behavior or
1623     vice-versa.  Behavior detected on one release can theoretically change
1624     with an updated release.  Not to mention that configure testing for it
1625     would require the test to be run on the host system, not the build system
1626     which makes cross compilation difficult. Therefore, we make no assumptions
1627     about behavior and try our best to handle every known case, including the
1628     case where wxVsnprintf returns a negative number and fails to set errno.
1629
1630     There is yet one more non-standard implementation and that is our own.
1631     Fortunately, that can be detected at compile-time.
1632
1633     On top of all that, ISO C99 explicitly defines snprintf to write a null
1634     character to the last position of the specified buffer.  That would be at
1635     at the given buffer size minus 1.  It is supposed to do this even if it
1636     turns out that the buffer is sized too small.
1637
1638     Darwin (tested on 10.5) follows the C99 behavior exactly.
1639
1640     Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1641     errno even when it fails.  However, it only seems to ever fail due
1642     to an undersized buffer.
1643 */
1644 #if wxUSE_UNICODE_UTF8
1645 template<typename BufferType>
1646 #else
1647 // we only need one version in non-UTF8 builds and at least two Windows
1648 // compilers have problems with this function template, so use just one
1649 // normal function here
1650 #endif
1651 static int DoStringPrintfV(wxString& str,
1652                            const wxString& format, va_list argptr)
1653 {
1654     int size = 1024;
1655
1656     for ( ;; )
1657     {
1658 #if wxUSE_UNICODE_UTF8
1659         BufferType tmp(str, size + 1);
1660         typename BufferType::CharType *buf = tmp;
1661 #else
1662         wxStringBuffer tmp(str, size + 1);
1663         wxChar *buf = tmp;
1664 #endif
1665
1666         if ( !buf )
1667         {
1668             // out of memory
1669
1670             // in UTF-8 build, leaving uninitialized junk in the buffer
1671             // could result in invalid non-empty UTF-8 string, so just
1672             // reset the string to empty on failure:
1673             buf[0] = '\0';
1674             return -1;
1675         }
1676
1677         // wxVsnprintf() may modify the original arg pointer, so pass it
1678         // only a copy
1679         va_list argptrcopy;
1680         wxVaCopy(argptrcopy, argptr);
1681
1682 #ifndef __WXWINCE__
1683         // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1684         errno = 0;
1685 #endif
1686         int len = wxVsnprintf(buf, size, format, argptrcopy);
1687         va_end(argptrcopy);
1688
1689         // some implementations of vsnprintf() don't NUL terminate
1690         // the string if there is not enough space for it so
1691         // always do it manually
1692         // FIXME: This really seems to be the wrong and would be an off-by-one
1693         // bug except the code above allocates an extra character.
1694         buf[size] = _T('\0');
1695
1696         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1697         // total number of characters which would have been written if the
1698         // buffer were large enough (newer standards such as Unix98)
1699         if ( len < 0 )
1700         {
1701             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1702             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1703             //     is true if *both* of them use our own implementation,
1704             //     otherwise we can't be sure
1705 #if wxUSE_WXVSNPRINTF
1706             // we know that our own implementation of wxVsnprintf() returns -1
1707             // only for a format error - thus there's something wrong with
1708             // the user's format string
1709             buf[0] = '\0';
1710             return -1;
1711 #else // possibly using system version
1712             // assume it only returns error if there is not enough space, but
1713             // as we don't know how much we need, double the current size of
1714             // the buffer
1715 #ifndef __WXWINCE__
1716             if( (errno == EILSEQ) || (errno == EINVAL) )
1717             // If errno was set to one of the two well-known hard errors
1718             // then fail immediately to avoid an infinite loop.
1719                 return -1;
1720             else
1721 #endif // __WXWINCE__
1722             // still not enough, as we don't know how much we need, double the
1723             // current size of the buffer
1724                 size *= 2;
1725 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1726         }
1727         else if ( len >= size )
1728         {
1729 #if wxUSE_WXVSNPRINTF
1730             // we know that our own implementation of wxVsnprintf() returns
1731             // size+1 when there's not enough space but that's not the size
1732             // of the required buffer!
1733             size *= 2;      // so we just double the current size of the buffer
1734 #else
1735             // some vsnprintf() implementations NUL-terminate the buffer and
1736             // some don't in len == size case, to be safe always add 1
1737             // FIXME: I don't quite understand this comment.  The vsnprintf
1738             // function is specifically defined to return the number of
1739             // characters printed not including the null terminator.
1740             // So OF COURSE you need to add 1 to get the right buffer size.
1741             // The following line is definitely correct, no question.
1742             size = len + 1;
1743 #endif
1744         }
1745         else // ok, there was enough space
1746         {
1747             break;
1748         }
1749     }
1750
1751     // we could have overshot
1752     str.Shrink();
1753
1754     return str.length();
1755 }
1756
1757 int wxString::PrintfV(const wxString& format, va_list argptr)
1758 {
1759 #if wxUSE_UNICODE_UTF8
1760     #if wxUSE_STL_BASED_WXSTRING
1761         typedef wxStringTypeBuffer<char> Utf8Buffer;
1762     #else
1763         typedef wxStringInternalBuffer Utf8Buffer;
1764     #endif
1765 #endif
1766
1767 #if wxUSE_UTF8_LOCALE_ONLY
1768     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1769 #else
1770     #if wxUSE_UNICODE_UTF8
1771     if ( wxLocaleIsUtf8 )
1772         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1773     else
1774         // wxChar* version
1775         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1776     #else
1777         return DoStringPrintfV(*this, format, argptr);
1778     #endif // UTF8/WCHAR
1779 #endif
1780 }
1781
1782 // ----------------------------------------------------------------------------
1783 // misc other operations
1784 // ----------------------------------------------------------------------------
1785
1786 // returns true if the string matches the pattern which may contain '*' and
1787 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1788 // of them)
1789 bool wxString::Matches(const wxString& mask) const
1790 {
1791     // I disable this code as it doesn't seem to be faster (in fact, it seems
1792     // to be much slower) than the old, hand-written code below and using it
1793     // here requires always linking with libregex even if the user code doesn't
1794     // use it
1795 #if 0 // wxUSE_REGEX
1796     // first translate the shell-like mask into a regex
1797     wxString pattern;
1798     pattern.reserve(wxStrlen(pszMask));
1799
1800     pattern += _T('^');
1801     while ( *pszMask )
1802     {
1803         switch ( *pszMask )
1804         {
1805             case _T('?'):
1806                 pattern += _T('.');
1807                 break;
1808
1809             case _T('*'):
1810                 pattern += _T(".*");
1811                 break;
1812
1813             case _T('^'):
1814             case _T('.'):
1815             case _T('$'):
1816             case _T('('):
1817             case _T(')'):
1818             case _T('|'):
1819             case _T('+'):
1820             case _T('\\'):
1821                 // these characters are special in a RE, quote them
1822                 // (however note that we don't quote '[' and ']' to allow
1823                 // using them for Unix shell like matching)
1824                 pattern += _T('\\');
1825                 // fall through
1826
1827             default:
1828                 pattern += *pszMask;
1829         }
1830
1831         pszMask++;
1832     }
1833     pattern += _T('$');
1834
1835     // and now use it
1836     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1837 #else // !wxUSE_REGEX
1838   // TODO: this is, of course, awfully inefficient...
1839
1840   // FIXME-UTF8: implement using iterators, remove #if
1841 #if wxUSE_UNICODE_UTF8
1842   wxWCharBuffer maskBuf = mask.wc_str();
1843   wxWCharBuffer txtBuf = wc_str();
1844   const wxChar *pszMask = maskBuf.data();
1845   const wxChar *pszTxt = txtBuf.data();
1846 #else
1847   const wxChar *pszMask = mask.wx_str();
1848   // the char currently being checked
1849   const wxChar *pszTxt = wx_str();
1850 #endif
1851
1852   // the last location where '*' matched
1853   const wxChar *pszLastStarInText = NULL;
1854   const wxChar *pszLastStarInMask = NULL;
1855
1856 match:
1857   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1858     switch ( *pszMask ) {
1859       case wxT('?'):
1860         if ( *pszTxt == wxT('\0') )
1861           return false;
1862
1863         // pszTxt and pszMask will be incremented in the loop statement
1864
1865         break;
1866
1867       case wxT('*'):
1868         {
1869           // remember where we started to be able to backtrack later
1870           pszLastStarInText = pszTxt;
1871           pszLastStarInMask = pszMask;
1872
1873           // ignore special chars immediately following this one
1874           // (should this be an error?)
1875           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1876             pszMask++;
1877
1878           // if there is nothing more, match
1879           if ( *pszMask == wxT('\0') )
1880             return true;
1881
1882           // are there any other metacharacters in the mask?
1883           size_t uiLenMask;
1884           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1885
1886           if ( pEndMask != NULL ) {
1887             // we have to match the string between two metachars
1888             uiLenMask = pEndMask - pszMask;
1889           }
1890           else {
1891             // we have to match the remainder of the string
1892             uiLenMask = wxStrlen(pszMask);
1893           }
1894
1895           wxString strToMatch(pszMask, uiLenMask);
1896           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1897           if ( pMatch == NULL )
1898             return false;
1899
1900           // -1 to compensate "++" in the loop
1901           pszTxt = pMatch + uiLenMask - 1;
1902           pszMask += uiLenMask - 1;
1903         }
1904         break;
1905
1906       default:
1907         if ( *pszMask != *pszTxt )
1908           return false;
1909         break;
1910     }
1911   }
1912
1913   // match only if nothing left
1914   if ( *pszTxt == wxT('\0') )
1915     return true;
1916
1917   // if we failed to match, backtrack if we can
1918   if ( pszLastStarInText ) {
1919     pszTxt = pszLastStarInText + 1;
1920     pszMask = pszLastStarInMask;
1921
1922     pszLastStarInText = NULL;
1923
1924     // don't bother resetting pszLastStarInMask, it's unnecessary
1925
1926     goto match;
1927   }
1928
1929   return false;
1930 #endif // wxUSE_REGEX/!wxUSE_REGEX
1931 }
1932
1933 // Count the number of chars
1934 int wxString::Freq(wxUniChar ch) const
1935 {
1936     int count = 0;
1937     for ( const_iterator i = begin(); i != end(); ++i )
1938     {
1939         if ( *i == ch )
1940             count ++;
1941     }
1942     return count;
1943 }
1944
1945 // convert to upper case, return the copy of the string
1946 wxString wxString::Upper() const
1947 { wxString s(*this); return s.MakeUpper(); }
1948
1949 // convert to lower case, return the copy of the string
1950 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
1951
1952 // ----------------------------------------------------------------------------
1953 // wxUTF8StringBuffer
1954 // ----------------------------------------------------------------------------
1955
1956 #if wxUSE_UNICODE_WCHAR
1957 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1958 {
1959     wxMBConvStrictUTF8 conv;
1960     size_t wlen = conv.ToWChar(NULL, 0, m_buf);
1961     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1962
1963     wxStringInternalBuffer wbuf(m_str, wlen);
1964     conv.ToWChar(wbuf, wlen, m_buf);
1965 }
1966
1967 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
1968 {
1969     wxCHECK_RET(m_lenSet, "length not set");
1970
1971     wxMBConvStrictUTF8 conv;
1972     size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
1973     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1974
1975     wxStringInternalBufferLength wbuf(m_str, wlen);
1976     conv.ToWChar(wbuf, wlen, m_buf, m_len);
1977     wbuf.SetLength(wlen);
1978 }
1979 #endif // wxUSE_UNICODE_WCHAR