src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #ifdef __SALFORDC__
  39     #include <clib.h>
  40 #endif
  41
  42 #include "wx/hashmap.h"
  43
  44 // string handling functions used by wxString:
  45 #if wxUSE_UNICODE_UTF8
  46     #define wxStringMemcpy   memcpy
  47     #define wxStringMemcmp   memcmp
  48     #define wxStringMemchr   memchr
  49     #define wxStringStrlen   strlen
  50 #else
  51     #define wxStringMemcpy   wxTmemcpy
  52     #define wxStringMemcmp   wxTmemcmp
  53     #define wxStringMemchr   wxTmemchr
  54     #define wxStringStrlen   wxStrlen
  55 #endif
  56
  57
  58 // ---------------------------------------------------------------------------
  59 // static class variables definition
  60 // ---------------------------------------------------------------------------
  61
  62 //According to STL _must_ be a -1 size_t
  63 const size_t wxString::npos = (size_t) -1;
  64
  65 // ----------------------------------------------------------------------------
  66 // global functions
  67 // ----------------------------------------------------------------------------
  68
  69 #if wxUSE_STD_IOSTREAM
  70
  71 #include <iostream>
  72
  73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
  74 {
  75 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
  76     return os << (const char *)str.AsCharBuf();
  77 #else
  78     return os << str.AsInternal();
  79 #endif
  80 }
  81
  82 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
  83 {
  84     return os << str.c_str();
  85 }
  86
  87 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
  88 {
  89     return os << str.data();
  90 }
  91
  92 #ifndef __BORLANDC__
  93 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
  94 {
  95     return os << str.data();
  96 }
  97 #endif
  98
  99 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 100
 101 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
 102 {
 103     return wos << str.wc_str();
 104 }
 105
 106 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
 107 {
 108     return wos << str.AsWChar();
 109 }
 110
 111 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
 112 {
 113     return wos << str.data();
 114 }
 115
 116 #endif  // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 117
 118 #endif // wxUSE_STD_IOSTREAM
 119
 120 // ===========================================================================
 121 // wxString class core
 122 // ===========================================================================
 123
 124 #if wxUSE_UNICODE_UTF8
 125
 126 void wxString::PosLenToImpl(size_t pos, size_t len,
 127                             size_t *implPos, size_t *implLen) const
 128 {
 129     if ( pos == npos )
 130         *implPos = npos;
 131     else
 132     {
 133         const_iterator i = begin() + pos;
 134         *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
 135         if ( len == npos )
 136             *implLen = npos;
 137         else
 138         {
 139             // too large length is interpreted as "to the end of the string"
 140             // FIXME-UTF8: verify this is the case in std::string, assert
 141             // otherwise
 142             if ( pos + len > length() )
 143                 len = length() - pos;
 144
 145             *implLen = (i + len).impl() - i.impl();
 146         }
 147     }
 148 }
 149
 150 #endif // wxUSE_UNICODE_UTF8
 151
 152 // ----------------------------------------------------------------------------
 153 // wxCStrData converted strings caching
 154 // ----------------------------------------------------------------------------
 155
 156 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 157 //             string objects; re-enable after fixing this bug and benchmarking
 158 //             performance to see if using a hash is a good idea at all
 159 #if 0
 160
 161 // For backward compatibility reasons, it must be possible to assign the value
 162 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 163 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 164 // because the memory would be freed immediately, but it has to be valid as long
 165 // as the string is not modified, so that code like this still works:
 166 //
 167 // const wxChar *s = str.c_str();
 168 // while ( s ) { ... }
 169
 170 // FIXME-UTF8: not thread safe!
 171 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 172 //             destroyed, but we should do it when the string is modified, to
 173 //             keep memory usage down
 174 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 175 //             invalidated the cache on every change, we could keep the previous
 176 //             conversion
 177 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 178 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 179
 180 template<typename T>
 181 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 182 {
 183     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 184     if ( i != hash.end() )
 185     {
 186         free(i->second);
 187         hash.erase(i);
 188     }
 189 }
 190
 191 #if wxUSE_UNICODE
 192 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 193 //     so we have to use wxString* here and const-cast when used
 194 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 195                     wxStringCharConversionCache);
 196 static wxStringCharConversionCache gs_stringsCharCache;
 197
 198 const char* wxCStrData::AsChar() const
 199 {
 200     // remove previously cache value, if any (see FIXMEs above):
 201     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 202
 203     // convert the string and keep it:
 204     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 205         m_str->mb_str().release();
 206
 207     return s + m_offset;
 208 }
 209 #endif // wxUSE_UNICODE
 210
 211 #if !wxUSE_UNICODE_WCHAR
 212 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 213                     wxStringWCharConversionCache);
 214 static wxStringWCharConversionCache gs_stringsWCharCache;
 215
 216 const wchar_t* wxCStrData::AsWChar() const
 217 {
 218     // remove previously cache value, if any (see FIXMEs above):
 219     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 220
 221     // convert the string and keep it:
 222     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 223         m_str->wc_str().release();
 224
 225     return s + m_offset;
 226 }
 227 #endif // !wxUSE_UNICODE_WCHAR
 228
 229 wxString::~wxString()
 230 {
 231 #if wxUSE_UNICODE
 232     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 233     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 234 #endif
 235 #if !wxUSE_UNICODE_WCHAR
 236     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 237 #endif
 238 }
 239 #endif
 240
 241 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 242 const char* wxCStrData::AsChar() const
 243 {
 244 #if wxUSE_UNICODE_UTF8
 245     if ( wxLocaleIsUtf8 )
 246         return AsInternal();
 247 #endif
 248     // under non-UTF8 locales, we have to convert the internal UTF-8
 249     // representation using wxConvLibc and cache the result
 250
 251     wxString *str = wxConstCast(m_str, wxString);
 252
 253     // convert the string:
 254     //
 255     // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
 256     //             have it) but it's unfortunately not obvious to implement
 257     //             because we don't know how big buffer do we need for the
 258     //             given string length (in case of multibyte encodings, e.g.
 259     //             ISO-2022-JP or UTF-8 when internal representation is wchar_t)
 260     //
 261     //             One idea would be to store more than just m_convertedToChar
 262     //             in wxString: then we could record the length of the string
 263     //             which was converted the last time and try to reuse the same
 264     //             buffer if the current length is not greater than it (this
 265     //             could still fail because string could have been modified in
 266     //             place but it would work most of the time, so we'd do it and
 267     //             only allocate the new buffer if in-place conversion returned
 268     //             an error). We could also store a bit saying if the string
 269     //             was modified since the last conversion (and update it in all
 270     //             operation modifying the string, of course) to avoid unneeded
 271     //             consequential conversions. But both of these ideas require
 272     //             adding more fields to wxString and require profiling results
 273     //             to be sure that we really gain enough from them to justify
 274     //             doing it.
 275     wxCharBuffer buf(str->mb_str());
 276
 277     // if it failed, return empty string and not NULL to avoid crashes in code
 278     // written with either wxWidgets 2 wxString or std::string behaviour in
 279     // mind: neither of them ever returns NULL and so we shouldn't neither
 280     if ( !buf )
 281         return "";
 282
 283     if ( str->m_convertedToChar &&
 284          strlen(buf) == strlen(str->m_convertedToChar) )
 285     {
 286         // keep the same buffer for as long as possible, so that several calls
 287         // to c_str() in a row still work:
 288         strcpy(str->m_convertedToChar, buf);
 289     }
 290     else
 291     {
 292         str->m_convertedToChar = buf.release();
 293     }
 294
 295     // and keep it:
 296     return str->m_convertedToChar + m_offset;
 297 }
 298 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 299
 300 #if !wxUSE_UNICODE_WCHAR
 301 const wchar_t* wxCStrData::AsWChar() const
 302 {
 303     wxString *str = wxConstCast(m_str, wxString);
 304
 305     // convert the string:
 306     wxWCharBuffer buf(str->wc_str());
 307
 308     // notice that here, unlike above in AsChar(), conversion can't fail as our
 309     // internal UTF-8 is always well-formed -- or the string was corrupted and
 310     // all bets are off anyhow
 311
 312     // FIXME-UTF8: do the conversion in-place in the existing buffer
 313     if ( str->m_convertedToWChar &&
 314          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 315     {
 316         // keep the same buffer for as long as possible, so that several calls
 317         // to c_str() in a row still work:
 318         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 319     }
 320     else
 321     {
 322         str->m_convertedToWChar = buf.release();
 323     }
 324
 325     // and keep it:
 326     return str->m_convertedToWChar + m_offset;
 327 }
 328 #endif // !wxUSE_UNICODE_WCHAR
 329
 330 // ===========================================================================
 331 // wxString class core
 332 // ===========================================================================
 333
 334 // ---------------------------------------------------------------------------
 335 // construction and conversion
 336 // ---------------------------------------------------------------------------
 337
 338 #if wxUSE_UNICODE_WCHAR
 339 /* static */
 340 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 341                                                const wxMBConv& conv)
 342 {
 343     // anything to do?
 344     if ( !psz || nLength == 0 )
 345         return SubstrBufFromMB(L"", 0);
 346
 347     if ( nLength == npos )
 348         nLength = wxNO_LEN;
 349
 350     size_t wcLen;
 351     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 352     if ( !wcLen )
 353         return SubstrBufFromMB(_T(""), 0);
 354     else
 355         return SubstrBufFromMB(wcBuf, wcLen);
 356 }
 357 #endif // wxUSE_UNICODE_WCHAR
 358
 359 #if wxUSE_UNICODE_UTF8
 360 /* static */
 361 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 362                                                const wxMBConv& conv)
 363 {
 364     // anything to do?
 365     if ( !psz || nLength == 0 )
 366         return SubstrBufFromMB("", 0);
 367
 368     // if psz is already in UTF-8, we don't have to do the roundtrip to
 369     // wchar_t* and back:
 370     if ( conv.IsUTF8() )
 371     {
 372         // we need to validate the input because UTF8 iterators assume valid
 373         // UTF-8 sequence and psz may be invalid:
 374         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 375         {
 376             return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
 377         }
 378         // else: do the roundtrip through wchar_t*
 379     }
 380
 381     if ( nLength == npos )
 382         nLength = wxNO_LEN;
 383
 384     // first convert to wide string:
 385     size_t wcLen;
 386     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 387     if ( !wcLen )
 388         return SubstrBufFromMB("", 0);
 389
 390     // and then to UTF-8:
 391     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
 392     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 393     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 394
 395     return buf;
 396 }
 397 #endif // wxUSE_UNICODE_UTF8
 398
 399 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 400 /* static */
 401 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 402                                                const wxMBConv& conv)
 403 {
 404     // anything to do?
 405     if ( !pwz || nLength == 0 )
 406         return SubstrBufFromWC("", 0);
 407
 408     if ( nLength == npos )
 409         nLength = wxNO_LEN;
 410
 411     size_t mbLen;
 412     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 413     if ( !mbLen )
 414         return SubstrBufFromWC("", 0);
 415     else
 416         return SubstrBufFromWC(mbBuf, mbLen);
 417 }
 418 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 419
 420
 421 #if wxUSE_UNICODE_WCHAR
 422
 423 //Convert wxString in Unicode mode to a multi-byte string
 424 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 425 {
 426     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 427 }
 428
 429 #elif wxUSE_UNICODE_UTF8
 430
 431 const wxWCharBuffer wxString::wc_str() const
 432 {
 433     return wxMBConvStrictUTF8().cMB2WC
 434                                 (
 435                                     m_impl.c_str(),
 436                                     m_impl.length() + 1, // size, not length
 437                                     NULL
 438                                 );
 439 }
 440
 441 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 442 {
 443     if ( conv.IsUTF8() )
 444         return wxCharBuffer::CreateNonOwned(m_impl.c_str());
 445
 446     // FIXME-UTF8: use wc_str() here once we have buffers with length
 447
 448     size_t wcLen;
 449     wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
 450                                              (
 451                                                 m_impl.c_str(),
 452                                                 m_impl.length() + 1, // size
 453                                                 &wcLen
 454                                              ));
 455     if ( !wcLen )
 456         return wxCharBuffer("");
 457
 458     return conv.cWC2MB(wcBuf, wcLen+1, NULL);
 459 }
 460
 461 #else // ANSI
 462
 463 //Converts this string to a wide character string if unicode
 464 //mode is not enabled and wxUSE_WCHAR_T is enabled
 465 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 466 {
 467     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 468 }
 469
 470 #endif // Unicode/ANSI
 471
 472 // shrink to minimal size (releasing extra memory)
 473 bool wxString::Shrink()
 474 {
 475   wxString tmp(begin(), end());
 476   swap(tmp);
 477   return tmp.length() == length();
 478 }
 479
 480 // deprecated compatibility code:
 481 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 482 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 483 {
 484     return DoGetWriteBuf(nLen);
 485 }
 486
 487 void wxString::UngetWriteBuf()
 488 {
 489     DoUngetWriteBuf();
 490 }
 491
 492 void wxString::UngetWriteBuf(size_t nLen)
 493 {
 494     DoUngetWriteBuf(nLen);
 495 }
 496 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 497
 498
 499 // ---------------------------------------------------------------------------
 500 // data access
 501 // ---------------------------------------------------------------------------
 502
 503 // all functions are inline in string.h
 504
 505 // ---------------------------------------------------------------------------
 506 // concatenation operators
 507 // ---------------------------------------------------------------------------
 508
 509 /*
 510  * concatenation functions come in 5 flavours:
 511  *  string + string
 512  *  char   + string      and      string + char
 513  *  C str  + string      and      string + C str
 514  */
 515
 516 wxString operator+(const wxString& str1, const wxString& str2)
 517 {
 518 #if !wxUSE_STL_BASED_WXSTRING
 519     wxASSERT( str1.IsValid() );
 520     wxASSERT( str2.IsValid() );
 521 #endif
 522
 523     wxString s = str1;
 524     s += str2;
 525
 526     return s;
 527 }
 528
 529 wxString operator+(const wxString& str, wxUniChar ch)
 530 {
 531 #if !wxUSE_STL_BASED_WXSTRING
 532     wxASSERT( str.IsValid() );
 533 #endif
 534
 535     wxString s = str;
 536     s += ch;
 537
 538     return s;
 539 }
 540
 541 wxString operator+(wxUniChar ch, const wxString& str)
 542 {
 543 #if !wxUSE_STL_BASED_WXSTRING
 544     wxASSERT( str.IsValid() );
 545 #endif
 546
 547     wxString s = ch;
 548     s += str;
 549
 550     return s;
 551 }
 552
 553 wxString operator+(const wxString& str, const char *psz)
 554 {
 555 #if !wxUSE_STL_BASED_WXSTRING
 556     wxASSERT( str.IsValid() );
 557 #endif
 558
 559     wxString s;
 560     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 561         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 562     }
 563     s += str;
 564     s += psz;
 565
 566     return s;
 567 }
 568
 569 wxString operator+(const wxString& str, const wchar_t *pwz)
 570 {
 571 #if !wxUSE_STL_BASED_WXSTRING
 572     wxASSERT( str.IsValid() );
 573 #endif
 574
 575     wxString s;
 576     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 577         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 578     }
 579     s += str;
 580     s += pwz;
 581
 582     return s;
 583 }
 584
 585 wxString operator+(const char *psz, const wxString& str)
 586 {
 587 #if !wxUSE_STL_BASED_WXSTRING
 588     wxASSERT( str.IsValid() );
 589 #endif
 590
 591     wxString s;
 592     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 593         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 594     }
 595     s = psz;
 596     s += str;
 597
 598     return s;
 599 }
 600
 601 wxString operator+(const wchar_t *pwz, const wxString& str)
 602 {
 603 #if !wxUSE_STL_BASED_WXSTRING
 604     wxASSERT( str.IsValid() );
 605 #endif
 606
 607     wxString s;
 608     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 609         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 610     }
 611     s = pwz;
 612     s += str;
 613
 614     return s;
 615 }
 616
 617 // ---------------------------------------------------------------------------
 618 // string comparison
 619 // ---------------------------------------------------------------------------
 620
 621 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 622 {
 623     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 624                                : wxToupper(GetChar(0u)) == wxToupper(c));
 625 }
 626
 627 #ifdef HAVE_STD_STRING_COMPARE
 628
 629 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 630 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 631 //     sort strings in characters code point order by sorting the byte sequence
 632 //     in byte values order (i.e. what strcmp() and memcmp() do).
 633
 634 int wxString::compare(const wxString& str) const
 635 {
 636     return m_impl.compare(str.m_impl);
 637 }
 638
 639 int wxString::compare(size_t nStart, size_t nLen,
 640                       const wxString& str) const
 641 {
 642     size_t pos, len;
 643     PosLenToImpl(nStart, nLen, &pos, &len);
 644     return m_impl.compare(pos, len, str.m_impl);
 645 }
 646
 647 int wxString::compare(size_t nStart, size_t nLen,
 648                       const wxString& str,
 649                       size_t nStart2, size_t nLen2) const
 650 {
 651     size_t pos, len;
 652     PosLenToImpl(nStart, nLen, &pos, &len);
 653
 654     size_t pos2, len2;
 655     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 656
 657     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 658 }
 659
 660 int wxString::compare(const char* sz) const
 661 {
 662     return m_impl.compare(ImplStr(sz));
 663 }
 664
 665 int wxString::compare(const wchar_t* sz) const
 666 {
 667     return m_impl.compare(ImplStr(sz));
 668 }
 669
 670 int wxString::compare(size_t nStart, size_t nLen,
 671                       const char* sz, size_t nCount) const
 672 {
 673     size_t pos, len;
 674     PosLenToImpl(nStart, nLen, &pos, &len);
 675
 676     SubstrBufFromMB str(ImplStr(sz, nCount));
 677
 678     return m_impl.compare(pos, len, str.data, str.len);
 679 }
 680
 681 int wxString::compare(size_t nStart, size_t nLen,
 682                       const wchar_t* sz, size_t nCount) const
 683 {
 684     size_t pos, len;
 685     PosLenToImpl(nStart, nLen, &pos, &len);
 686
 687     SubstrBufFromWC str(ImplStr(sz, nCount));
 688
 689     return m_impl.compare(pos, len, str.data, str.len);
 690 }
 691
 692 #else // !HAVE_STD_STRING_COMPARE
 693
 694 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 695                           const wxStringCharType* s2, size_t l2)
 696 {
 697     if( l1 == l2 )
 698         return wxStringMemcmp(s1, s2, l1);
 699     else if( l1 < l2 )
 700     {
 701         int ret = wxStringMemcmp(s1, s2, l1);
 702         return ret == 0 ? -1 : ret;
 703     }
 704     else
 705     {
 706         int ret = wxStringMemcmp(s1, s2, l2);
 707         return ret == 0 ? +1 : ret;
 708     }
 709 }
 710
 711 int wxString::compare(const wxString& str) const
 712 {
 713     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 714                      str.m_impl.data(), str.m_impl.length());
 715 }
 716
 717 int wxString::compare(size_t nStart, size_t nLen,
 718                       const wxString& str) const
 719 {
 720     wxASSERT(nStart <= length());
 721     size_type strLen = length() - nStart;
 722     nLen = strLen < nLen ? strLen : nLen;
 723
 724     size_t pos, len;
 725     PosLenToImpl(nStart, nLen, &pos, &len);
 726
 727     return ::wxDoCmp(m_impl.data() + pos,  len,
 728                      str.m_impl.data(), str.m_impl.length());
 729 }
 730
 731 int wxString::compare(size_t nStart, size_t nLen,
 732                       const wxString& str,
 733                       size_t nStart2, size_t nLen2) const
 734 {
 735     wxASSERT(nStart <= length());
 736     wxASSERT(nStart2 <= str.length());
 737     size_type strLen  =     length() - nStart,
 738               strLen2 = str.length() - nStart2;
 739     nLen  = strLen  < nLen  ? strLen  : nLen;
 740     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 741
 742     size_t pos, len;
 743     PosLenToImpl(nStart, nLen, &pos, &len);
 744     size_t pos2, len2;
 745     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 746
 747     return ::wxDoCmp(m_impl.data() + pos, len,
 748                      str.m_impl.data() + pos2, len2);
 749 }
 750
 751 int wxString::compare(const char* sz) const
 752 {
 753     SubstrBufFromMB str(ImplStr(sz, npos));
 754     if ( str.len == npos )
 755         str.len = wxStringStrlen(str.data);
 756     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 757 }
 758
 759 int wxString::compare(const wchar_t* sz) const
 760 {
 761     SubstrBufFromWC str(ImplStr(sz, npos));
 762     if ( str.len == npos )
 763         str.len = wxStringStrlen(str.data);
 764     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 765 }
 766
 767 int wxString::compare(size_t nStart, size_t nLen,
 768                       const char* sz, size_t nCount) const
 769 {
 770     wxASSERT(nStart <= length());
 771     size_type strLen = length() - nStart;
 772     nLen = strLen < nLen ? strLen : nLen;
 773
 774     size_t pos, len;
 775     PosLenToImpl(nStart, nLen, &pos, &len);
 776
 777     SubstrBufFromMB str(ImplStr(sz, nCount));
 778     if ( str.len == npos )
 779         str.len = wxStringStrlen(str.data);
 780
 781     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 782 }
 783
 784 int wxString::compare(size_t nStart, size_t nLen,
 785                       const wchar_t* sz, size_t nCount) const
 786 {
 787     wxASSERT(nStart <= length());
 788     size_type strLen = length() - nStart;
 789     nLen = strLen < nLen ? strLen : nLen;
 790
 791     size_t pos, len;
 792     PosLenToImpl(nStart, nLen, &pos, &len);
 793
 794     SubstrBufFromWC str(ImplStr(sz, nCount));
 795     if ( str.len == npos )
 796         str.len = wxStringStrlen(str.data);
 797
 798     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 799 }
 800
 801 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 802
 803
 804 // ---------------------------------------------------------------------------
 805 // find_{first,last}_[not]_of functions
 806 // ---------------------------------------------------------------------------
 807
 808 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 809
 810 // NB: All these functions are implemented  with the argument being wxChar*,
 811 //     i.e. widechar string in any Unicode build, even though native string
 812 //     representation is char* in the UTF-8 build. This is because we couldn't
 813 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 814
 815 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 816 {
 817     return find_first_of(sz, nStart, wxStrlen(sz));
 818 }
 819
 820 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 821 {
 822     return find_first_not_of(sz, nStart, wxStrlen(sz));
 823 }
 824
 825 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 826 {
 827     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 828
 829     size_t idx = nStart;
 830     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 831     {
 832         if ( wxTmemchr(sz, *i, n) )
 833             return idx;
 834     }
 835
 836     return npos;
 837 }
 838
 839 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 840 {
 841     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 842
 843     size_t idx = nStart;
 844     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 845     {
 846         if ( !wxTmemchr(sz, *i, n) )
 847             return idx;
 848     }
 849
 850     return npos;
 851 }
 852
 853
 854 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 855 {
 856     return find_last_of(sz, nStart, wxStrlen(sz));
 857 }
 858
 859 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 860 {
 861     return find_last_not_of(sz, nStart, wxStrlen(sz));
 862 }
 863
 864 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 865 {
 866     size_t len = length();
 867
 868     if ( nStart == npos )
 869     {
 870         nStart = len - 1;
 871     }
 872     else
 873     {
 874         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 875     }
 876
 877     size_t idx = nStart;
 878     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 879           i != rend(); --idx, ++i )
 880     {
 881         if ( wxTmemchr(sz, *i, n) )
 882             return idx;
 883     }
 884
 885     return npos;
 886 }
 887
 888 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 889 {
 890     size_t len = length();
 891
 892     if ( nStart == npos )
 893     {
 894         nStart = len - 1;
 895     }
 896     else
 897     {
 898         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 899     }
 900
 901     size_t idx = nStart;
 902     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 903           i != rend(); --idx, ++i )
 904     {
 905         if ( !wxTmemchr(sz, *i, n) )
 906             return idx;
 907     }
 908
 909     return npos;
 910 }
 911
 912 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
 913 {
 914     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 915
 916     size_t idx = nStart;
 917     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 918     {
 919         if ( *i != ch )
 920             return idx;
 921     }
 922
 923     return npos;
 924 }
 925
 926 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
 927 {
 928     size_t len = length();
 929
 930     if ( nStart == npos )
 931     {
 932         nStart = len - 1;
 933     }
 934     else
 935     {
 936         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 937     }
 938
 939     size_t idx = nStart;
 940     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 941           i != rend(); --idx, ++i )
 942     {
 943         if ( *i != ch )
 944             return idx;
 945     }
 946
 947     return npos;
 948 }
 949
 950 // the functions above were implemented for wchar_t* arguments in Unicode
 951 // build and char* in ANSI build; below are implementations for the other
 952 // version:
 953 #if wxUSE_UNICODE
 954     #define wxOtherCharType char
 955     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
 956 #else
 957     #define wxOtherCharType wchar_t
 958     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
 959 #endif
 960
 961 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
 962     { return find_first_of(STRCONV(sz), nStart); }
 963
 964 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
 965                                size_t n) const
 966     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
 967 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
 968     { return find_last_of(STRCONV(sz), nStart); }
 969 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
 970                               size_t n) const
 971     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
 972 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
 973     { return find_first_not_of(STRCONV(sz), nStart); }
 974 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
 975                                    size_t n) const
 976     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
 977 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
 978     { return find_last_not_of(STRCONV(sz), nStart); }
 979 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
 980                                   size_t n) const
 981     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
 982
 983 #undef wxOtherCharType
 984 #undef STRCONV
 985
 986 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 987
 988 // ===========================================================================
 989 // other common string functions
 990 // ===========================================================================
 991
 992 int wxString::CmpNoCase(const wxString& s) const
 993 {
 994     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
 995
 996     const_iterator i1 = begin();
 997     const_iterator end1 = end();
 998     const_iterator i2 = s.begin();
 999     const_iterator end2 = s.end();
1000
1001     for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1002     {
1003         wxUniChar lower1 = (wxChar)wxTolower(*i1);
1004         wxUniChar lower2 = (wxChar)wxTolower(*i2);
1005         if ( lower1 != lower2 )
1006             return lower1 < lower2 ? -1 : 1;
1007     }
1008
1009     size_t len1 = length();
1010     size_t len2 = s.length();
1011
1012     if ( len1 < len2 )
1013         return -1;
1014     else if ( len1 > len2 )
1015         return 1;
1016     return 0;
1017 }
1018
1019
1020 #if wxUSE_UNICODE
1021
1022 #ifdef __MWERKS__
1023 #ifndef __SCHAR_MAX__
1024 #define __SCHAR_MAX__ 127
1025 #endif
1026 #endif
1027
1028 wxString wxString::FromAscii(const char *ascii, size_t len)
1029 {
1030     if (!ascii || len == 0)
1031        return wxEmptyString;
1032
1033     wxString res;
1034
1035     {
1036         wxStringInternalBuffer buf(res, len);
1037         wxStringCharType *dest = buf;
1038
1039         for ( ; len > 0; --len )
1040         {
1041             unsigned char c = (unsigned char)*ascii++;
1042             wxASSERT_MSG( c < 0x80,
1043                           _T("Non-ASCII value passed to FromAscii().") );
1044
1045             *dest++ = (wchar_t)c;
1046         }
1047     }
1048
1049     return res;
1050 }
1051
1052 wxString wxString::FromAscii(const char *ascii)
1053 {
1054     return FromAscii(ascii, wxStrlen(ascii));
1055 }
1056
1057 wxString wxString::FromAscii(char ascii)
1058 {
1059     // What do we do with '\0' ?
1060
1061     unsigned char c = (unsigned char)ascii;
1062
1063     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1064
1065     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1066     return wxString(wxUniChar((wchar_t)c));
1067 }
1068
1069 const wxCharBuffer wxString::ToAscii() const
1070 {
1071     // this will allocate enough space for the terminating NUL too
1072     wxCharBuffer buffer(length());
1073     char *dest = buffer.data();
1074
1075     for ( const_iterator i = begin(); i != end(); ++i )
1076     {
1077         wxUniChar c(*i);
1078         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1079         *dest++ = c.IsAscii() ? (char)c : '_';
1080
1081         // the output string can't have embedded NULs anyhow, so we can safely
1082         // stop at first of them even if we do have any
1083         if ( !c )
1084             break;
1085     }
1086
1087     return buffer;
1088 }
1089
1090 #endif // wxUSE_UNICODE
1091
1092 // extract string of length nCount starting at nFirst
1093 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1094 {
1095     size_t nLen = length();
1096
1097     // default value of nCount is npos and means "till the end"
1098     if ( nCount == npos )
1099     {
1100         nCount = nLen - nFirst;
1101     }
1102
1103     // out-of-bounds requests return sensible things
1104     if ( nFirst + nCount > nLen )
1105     {
1106         nCount = nLen - nFirst;
1107     }
1108
1109     if ( nFirst > nLen )
1110     {
1111         // AllocCopy() will return empty string
1112         return wxEmptyString;
1113     }
1114
1115     wxString dest(*this, nFirst, nCount);
1116     if ( dest.length() != nCount )
1117     {
1118         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1119     }
1120
1121     return dest;
1122 }
1123
1124 // check that the string starts with prefix and return the rest of the string
1125 // in the provided pointer if it is not NULL, otherwise return false
1126 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1127 {
1128     if ( compare(0, prefix.length(), prefix) != 0 )
1129         return false;
1130
1131     if ( rest )
1132     {
1133         // put the rest of the string into provided pointer
1134         rest->assign(*this, prefix.length(), npos);
1135     }
1136
1137     return true;
1138 }
1139
1140
1141 // check that the string ends with suffix and return the rest of it in the
1142 // provided pointer if it is not NULL, otherwise return false
1143 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1144 {
1145     int start = length() - suffix.length();
1146
1147     if ( start < 0 || compare(start, npos, suffix) != 0 )
1148         return false;
1149
1150     if ( rest )
1151     {
1152         // put the rest of the string into provided pointer
1153         rest->assign(*this, 0, start);
1154     }
1155
1156     return true;
1157 }
1158
1159
1160 // extract nCount last (rightmost) characters
1161 wxString wxString::Right(size_t nCount) const
1162 {
1163   if ( nCount > length() )
1164     nCount = length();
1165
1166   wxString dest(*this, length() - nCount, nCount);
1167   if ( dest.length() != nCount ) {
1168     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1169   }
1170   return dest;
1171 }
1172
1173 // get all characters after the last occurence of ch
1174 // (returns the whole string if ch not found)
1175 wxString wxString::AfterLast(wxUniChar ch) const
1176 {
1177   wxString str;
1178   int iPos = Find(ch, true);
1179   if ( iPos == wxNOT_FOUND )
1180     str = *this;
1181   else
1182     str = wx_str() + iPos + 1;
1183
1184   return str;
1185 }
1186
1187 // extract nCount first (leftmost) characters
1188 wxString wxString::Left(size_t nCount) const
1189 {
1190   if ( nCount > length() )
1191     nCount = length();
1192
1193   wxString dest(*this, 0, nCount);
1194   if ( dest.length() != nCount ) {
1195     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1196   }
1197   return dest;
1198 }
1199
1200 // get all characters before the first occurence of ch
1201 // (returns the whole string if ch not found)
1202 wxString wxString::BeforeFirst(wxUniChar ch) const
1203 {
1204   int iPos = Find(ch);
1205   if ( iPos == wxNOT_FOUND ) iPos = length();
1206   return wxString(*this, 0, iPos);
1207 }
1208
1209 /// get all characters before the last occurence of ch
1210 /// (returns empty string if ch not found)
1211 wxString wxString::BeforeLast(wxUniChar ch) const
1212 {
1213   wxString str;
1214   int iPos = Find(ch, true);
1215   if ( iPos != wxNOT_FOUND && iPos != 0 )
1216     str = wxString(c_str(), iPos);
1217
1218   return str;
1219 }
1220
1221 /// get all characters after the first occurence of ch
1222 /// (returns empty string if ch not found)
1223 wxString wxString::AfterFirst(wxUniChar ch) const
1224 {
1225   wxString str;
1226   int iPos = Find(ch);
1227   if ( iPos != wxNOT_FOUND )
1228     str = wx_str() + iPos + 1;
1229
1230   return str;
1231 }
1232
1233 // replace first (or all) occurences of some substring with another one
1234 size_t wxString::Replace(const wxString& strOld,
1235                          const wxString& strNew, bool bReplaceAll)
1236 {
1237     // if we tried to replace an empty string we'd enter an infinite loop below
1238     wxCHECK_MSG( !strOld.empty(), 0,
1239                  _T("wxString::Replace(): invalid parameter") );
1240
1241     size_t uiCount = 0;   // count of replacements made
1242
1243     size_t uiOldLen = strOld.length();
1244     size_t uiNewLen = strNew.length();
1245
1246     size_t dwPos = 0;
1247
1248     while ( (*this)[dwPos] != wxT('\0') )
1249     {
1250         //DO NOT USE STRSTR HERE
1251         //this string can contain embedded null characters,
1252         //so strstr will function incorrectly
1253         dwPos = find(strOld, dwPos);
1254         if ( dwPos == npos )
1255             break;                  // exit the loop
1256         else
1257         {
1258             //replace this occurance of the old string with the new one
1259             replace(dwPos, uiOldLen, strNew, uiNewLen);
1260
1261             //move up pos past the string that was replaced
1262             dwPos += uiNewLen;
1263
1264             //increase replace count
1265             ++uiCount;
1266
1267             // stop now?
1268             if ( !bReplaceAll )
1269                 break;                  // exit the loop
1270         }
1271     }
1272
1273     return uiCount;
1274 }
1275
1276 bool wxString::IsAscii() const
1277 {
1278     for ( const_iterator i = begin(); i != end(); ++i )
1279     {
1280         if ( !(*i).IsAscii() )
1281             return false;
1282     }
1283
1284     return true;
1285 }
1286
1287 bool wxString::IsWord() const
1288 {
1289     for ( const_iterator i = begin(); i != end(); ++i )
1290     {
1291         if ( !wxIsalpha(*i) )
1292             return false;
1293     }
1294
1295     return true;
1296 }
1297
1298 bool wxString::IsNumber() const
1299 {
1300     if ( empty() )
1301         return true;
1302
1303     const_iterator i = begin();
1304
1305     if ( *i == _T('-') || *i == _T('+') )
1306         ++i;
1307
1308     for ( ; i != end(); ++i )
1309     {
1310         if ( !wxIsdigit(*i) )
1311             return false;
1312     }
1313
1314     return true;
1315 }
1316
1317 wxString wxString::Strip(stripType w) const
1318 {
1319     wxString s = *this;
1320     if ( w & leading ) s.Trim(false);
1321     if ( w & trailing ) s.Trim(true);
1322     return s;
1323 }
1324
1325 // ---------------------------------------------------------------------------
1326 // case conversion
1327 // ---------------------------------------------------------------------------
1328
1329 wxString& wxString::MakeUpper()
1330 {
1331   for ( iterator it = begin(), en = end(); it != en; ++it )
1332     *it = (wxChar)wxToupper(*it);
1333
1334   return *this;
1335 }
1336
1337 wxString& wxString::MakeLower()
1338 {
1339   for ( iterator it = begin(), en = end(); it != en; ++it )
1340     *it = (wxChar)wxTolower(*it);
1341
1342   return *this;
1343 }
1344
1345 // ---------------------------------------------------------------------------
1346 // trimming and padding
1347 // ---------------------------------------------------------------------------
1348
1349 // some compilers (VC++ 6.0 not to name them) return true for a call to
1350 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1351 // live with this by checking that the character is a 7 bit one - even if this
1352 // may fail to detect some spaces (I don't know if Unicode doesn't have
1353 // space-like symbols somewhere except in the first 128 chars), it is arguably
1354 // still better than trimming away accented letters
1355 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1356
1357 // trims spaces (in the sense of isspace) from left or right side
1358 wxString& wxString::Trim(bool bFromRight)
1359 {
1360     // first check if we're going to modify the string at all
1361     if ( !empty() &&
1362          (
1363           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1364           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1365          )
1366        )
1367     {
1368         if ( bFromRight )
1369         {
1370             // find last non-space character
1371             reverse_iterator psz = rbegin();
1372             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1373                 ++psz;
1374
1375             // truncate at trailing space start
1376             erase(psz.base(), end());
1377         }
1378         else
1379         {
1380             // find first non-space character
1381             iterator psz = begin();
1382             while ( (psz != end()) && wxSafeIsspace(*psz) )
1383                 ++psz;
1384
1385             // fix up data and length
1386             erase(begin(), psz);
1387         }
1388     }
1389
1390     return *this;
1391 }
1392
1393 // adds nCount characters chPad to the string from either side
1394 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1395 {
1396     wxString s(chPad, nCount);
1397
1398     if ( bFromRight )
1399         *this += s;
1400     else
1401     {
1402         s += *this;
1403         swap(s);
1404     }
1405
1406     return *this;
1407 }
1408
1409 // truncate the string
1410 wxString& wxString::Truncate(size_t uiLen)
1411 {
1412     if ( uiLen < length() )
1413     {
1414         erase(begin() + uiLen, end());
1415     }
1416     //else: nothing to do, string is already short enough
1417
1418     return *this;
1419 }
1420
1421 // ---------------------------------------------------------------------------
1422 // finding (return wxNOT_FOUND if not found and index otherwise)
1423 // ---------------------------------------------------------------------------
1424
1425 // find a character
1426 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1427 {
1428     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1429
1430     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1431 }
1432
1433 // ----------------------------------------------------------------------------
1434 // conversion to numbers
1435 // ----------------------------------------------------------------------------
1436
1437 // The implementation of all the functions below is exactly the same so factor
1438 // it out. Note that number extraction works correctly on UTF-8 strings, so
1439 // we can use wxStringCharType and wx_str() for maximum efficiency.
1440
1441 #ifndef __WXWINCE__
1442     #define DO_IF_NOT_WINCE(x) x
1443 #else
1444     #define DO_IF_NOT_WINCE(x)
1445 #endif
1446
1447 #define WX_STRING_TO_INT_TYPE(val, base, func)                              \
1448     wxCHECK_MSG( val, false, _T("NULL output pointer") );                   \
1449     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );  \
1450                                                                             \
1451     DO_IF_NOT_WINCE( errno = 0; )                                           \
1452                                                                             \
1453     const wxStringCharType *start = wx_str();                               \
1454     wxStringCharType *end;                                                  \
1455     *val = func(start, &end, base);                                         \
1456                                                                             \
1457     /* return true only if scan was stopped by the terminating NUL and */   \
1458     /* if the string was not empty to start with and no under/overflow */   \
1459     /* occurred: */                                                         \
1460     return !*end && (end != start)                                          \
1461         DO_IF_NOT_WINCE( && (errno != ERANGE) )
1462
1463 bool wxString::ToLong(long *val, int base) const
1464 {
1465     WX_STRING_TO_INT_TYPE(val, base, wxStrtol);
1466 }
1467
1468 bool wxString::ToULong(unsigned long *val, int base) const
1469 {
1470     WX_STRING_TO_INT_TYPE(val, base, wxStrtoul);
1471 }
1472
1473 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1474 {
1475     WX_STRING_TO_INT_TYPE(val, base, wxStrtoll);
1476 }
1477
1478 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1479 {
1480     WX_STRING_TO_INT_TYPE(val, base, wxStrtoull);
1481 }
1482
1483 bool wxString::ToDouble(double *val) const
1484 {
1485     wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1486
1487 #ifndef __WXWINCE__
1488     errno = 0;
1489 #endif
1490
1491     const wxChar *start = c_str();
1492     wxChar *end;
1493     *val = wxStrtod(start, &end);
1494
1495     // return true only if scan was stopped by the terminating NUL and if the
1496     // string was not empty to start with and no under/overflow occurred
1497     return !*end && (end != start)
1498 #ifndef __WXWINCE__
1499         && (errno != ERANGE)
1500 #endif
1501     ;
1502 }
1503
1504 // ---------------------------------------------------------------------------
1505 // formatted output
1506 // ---------------------------------------------------------------------------
1507
1508 #if !wxUSE_UTF8_LOCALE_ONLY
1509 /* static */
1510 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1511 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1512 #else
1513 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1514 #endif
1515 {
1516     va_list argptr;
1517     va_start(argptr, format);
1518
1519     wxString s;
1520     s.PrintfV(format, argptr);
1521
1522     va_end(argptr);
1523
1524     return s;
1525 }
1526 #endif // !wxUSE_UTF8_LOCALE_ONLY
1527
1528 #if wxUSE_UNICODE_UTF8
1529 /* static */
1530 wxString wxString::DoFormatUtf8(const char *format, ...)
1531 {
1532     va_list argptr;
1533     va_start(argptr, format);
1534
1535     wxString s;
1536     s.PrintfV(format, argptr);
1537
1538     va_end(argptr);
1539
1540     return s;
1541 }
1542 #endif // wxUSE_UNICODE_UTF8
1543
1544 /* static */
1545 wxString wxString::FormatV(const wxString& format, va_list argptr)
1546 {
1547     wxString s;
1548     s.PrintfV(format, argptr);
1549     return s;
1550 }
1551
1552 #if !wxUSE_UTF8_LOCALE_ONLY
1553 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1554 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1555 #else
1556 int wxString::DoPrintfWchar(const wxChar *format, ...)
1557 #endif
1558 {
1559     va_list argptr;
1560     va_start(argptr, format);
1561
1562 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1563     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1564     // because it's the only cast that works safely for downcasting when
1565     // multiple inheritance is used:
1566     wxString *str = static_cast<wxString*>(this);
1567 #else
1568     wxString *str = this;
1569 #endif
1570
1571     int iLen = str->PrintfV(format, argptr);
1572
1573     va_end(argptr);
1574
1575     return iLen;
1576 }
1577 #endif // !wxUSE_UTF8_LOCALE_ONLY
1578
1579 #if wxUSE_UNICODE_UTF8
1580 int wxString::DoPrintfUtf8(const char *format, ...)
1581 {
1582     va_list argptr;
1583     va_start(argptr, format);
1584
1585     int iLen = PrintfV(format, argptr);
1586
1587     va_end(argptr);
1588
1589     return iLen;
1590 }
1591 #endif // wxUSE_UNICODE_UTF8
1592
1593 #if wxUSE_UNICODE_UTF8
1594 template<typename BufferType>
1595 #else
1596 // we only need one version in non-UTF8 builds and at least two Windows
1597 // compilers have problems with this function template, so use just one
1598 // normal function here
1599 #endif
1600 static int DoStringPrintfV(wxString& str,
1601                            const wxString& format, va_list argptr)
1602 {
1603     int size = 1024;
1604
1605     for ( ;; )
1606     {
1607 #if wxUSE_UNICODE_UTF8
1608         BufferType tmp(str, size + 1);
1609         typename BufferType::CharType *buf = tmp;
1610 #else
1611         wxStringBuffer tmp(str, size + 1);
1612         wxChar *buf = tmp;
1613 #endif
1614
1615         if ( !buf )
1616         {
1617             // out of memory
1618
1619             // in UTF-8 build, leaving uninitialized junk in the buffer
1620             // could result in invalid non-empty UTF-8 string, so just
1621             // reset the string to empty on failure:
1622             buf[0] = '\0';
1623             return -1;
1624         }
1625
1626         // wxVsnprintf() may modify the original arg pointer, so pass it
1627         // only a copy
1628         va_list argptrcopy;
1629         wxVaCopy(argptrcopy, argptr);
1630         int len = wxVsnprintf(buf, size, format, argptrcopy);
1631         va_end(argptrcopy);
1632
1633         // some implementations of vsnprintf() don't NUL terminate
1634         // the string if there is not enough space for it so
1635         // always do it manually
1636         buf[size] = _T('\0');
1637
1638         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1639         // total number of characters which would have been written if the
1640         // buffer were large enough (newer standards such as Unix98)
1641         if ( len < 0 )
1642         {
1643             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1644             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1645             //     is true if *both* of them use our own implementation,
1646             //     otherwise we can't be sure
1647 #if wxUSE_WXVSNPRINTF
1648             // we know that our own implementation of wxVsnprintf() returns -1
1649             // only for a format error - thus there's something wrong with
1650             // the user's format string
1651             buf[0] = '\0';
1652             return -1;
1653 #else // possibly using system version
1654             // assume it only returns error if there is not enough space, but
1655             // as we don't know how much we need, double the current size of
1656             // the buffer
1657             size *= 2;
1658 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1659         }
1660         else if ( len >= size )
1661         {
1662 #if wxUSE_WXVSNPRINTF
1663             // we know that our own implementation of wxVsnprintf() returns
1664             // size+1 when there's not enough space but that's not the size
1665             // of the required buffer!
1666             size *= 2;      // so we just double the current size of the buffer
1667 #else
1668             // some vsnprintf() implementations NUL-terminate the buffer and
1669             // some don't in len == size case, to be safe always add 1
1670             size = len + 1;
1671 #endif
1672         }
1673         else // ok, there was enough space
1674         {
1675             break;
1676         }
1677     }
1678
1679     // we could have overshot
1680     str.Shrink();
1681
1682     return str.length();
1683 }
1684
1685 int wxString::PrintfV(const wxString& format, va_list argptr)
1686 {
1687 #if wxUSE_UNICODE_UTF8
1688     #if wxUSE_STL_BASED_WXSTRING
1689         typedef wxStringTypeBuffer<char> Utf8Buffer;
1690     #else
1691         typedef wxStringInternalBuffer Utf8Buffer;
1692     #endif
1693 #endif
1694
1695 #if wxUSE_UTF8_LOCALE_ONLY
1696     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1697 #else
1698     #if wxUSE_UNICODE_UTF8
1699     if ( wxLocaleIsUtf8 )
1700         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1701     else
1702         // wxChar* version
1703         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1704     #else
1705         return DoStringPrintfV(*this, format, argptr);
1706     #endif // UTF8/WCHAR
1707 #endif
1708 }
1709
1710 // ----------------------------------------------------------------------------
1711 // misc other operations
1712 // ----------------------------------------------------------------------------
1713
1714 // returns true if the string matches the pattern which may contain '*' and
1715 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1716 // of them)
1717 bool wxString::Matches(const wxString& mask) const
1718 {
1719     // I disable this code as it doesn't seem to be faster (in fact, it seems
1720     // to be much slower) than the old, hand-written code below and using it
1721     // here requires always linking with libregex even if the user code doesn't
1722     // use it
1723 #if 0 // wxUSE_REGEX
1724     // first translate the shell-like mask into a regex
1725     wxString pattern;
1726     pattern.reserve(wxStrlen(pszMask));
1727
1728     pattern += _T('^');
1729     while ( *pszMask )
1730     {
1731         switch ( *pszMask )
1732         {
1733             case _T('?'):
1734                 pattern += _T('.');
1735                 break;
1736
1737             case _T('*'):
1738                 pattern += _T(".*");
1739                 break;
1740
1741             case _T('^'):
1742             case _T('.'):
1743             case _T('$'):
1744             case _T('('):
1745             case _T(')'):
1746             case _T('|'):
1747             case _T('+'):
1748             case _T('\\'):
1749                 // these characters are special in a RE, quote them
1750                 // (however note that we don't quote '[' and ']' to allow
1751                 // using them for Unix shell like matching)
1752                 pattern += _T('\\');
1753                 // fall through
1754
1755             default:
1756                 pattern += *pszMask;
1757         }
1758
1759         pszMask++;
1760     }
1761     pattern += _T('$');
1762
1763     // and now use it
1764     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1765 #else // !wxUSE_REGEX
1766   // TODO: this is, of course, awfully inefficient...
1767
1768   // FIXME-UTF8: implement using iterators, remove #if
1769 #if wxUSE_UNICODE_UTF8
1770   wxWCharBuffer maskBuf = mask.wc_str();
1771   wxWCharBuffer txtBuf = wc_str();
1772   const wxChar *pszMask = maskBuf.data();
1773   const wxChar *pszTxt = txtBuf.data();
1774 #else
1775   const wxChar *pszMask = mask.wx_str();
1776   // the char currently being checked
1777   const wxChar *pszTxt = wx_str();
1778 #endif
1779
1780   // the last location where '*' matched
1781   const wxChar *pszLastStarInText = NULL;
1782   const wxChar *pszLastStarInMask = NULL;
1783
1784 match:
1785   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1786     switch ( *pszMask ) {
1787       case wxT('?'):
1788         if ( *pszTxt == wxT('\0') )
1789           return false;
1790
1791         // pszTxt and pszMask will be incremented in the loop statement
1792
1793         break;
1794
1795       case wxT('*'):
1796         {
1797           // remember where we started to be able to backtrack later
1798           pszLastStarInText = pszTxt;
1799           pszLastStarInMask = pszMask;
1800
1801           // ignore special chars immediately following this one
1802           // (should this be an error?)
1803           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1804             pszMask++;
1805
1806           // if there is nothing more, match
1807           if ( *pszMask == wxT('\0') )
1808             return true;
1809
1810           // are there any other metacharacters in the mask?
1811           size_t uiLenMask;
1812           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1813
1814           if ( pEndMask != NULL ) {
1815             // we have to match the string between two metachars
1816             uiLenMask = pEndMask - pszMask;
1817           }
1818           else {
1819             // we have to match the remainder of the string
1820             uiLenMask = wxStrlen(pszMask);
1821           }
1822
1823           wxString strToMatch(pszMask, uiLenMask);
1824           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1825           if ( pMatch == NULL )
1826             return false;
1827
1828           // -1 to compensate "++" in the loop
1829           pszTxt = pMatch + uiLenMask - 1;
1830           pszMask += uiLenMask - 1;
1831         }
1832         break;
1833
1834       default:
1835         if ( *pszMask != *pszTxt )
1836           return false;
1837         break;
1838     }
1839   }
1840
1841   // match only if nothing left
1842   if ( *pszTxt == wxT('\0') )
1843     return true;
1844
1845   // if we failed to match, backtrack if we can
1846   if ( pszLastStarInText ) {
1847     pszTxt = pszLastStarInText + 1;
1848     pszMask = pszLastStarInMask;
1849
1850     pszLastStarInText = NULL;
1851
1852     // don't bother resetting pszLastStarInMask, it's unnecessary
1853
1854     goto match;
1855   }
1856
1857   return false;
1858 #endif // wxUSE_REGEX/!wxUSE_REGEX
1859 }
1860
1861 // Count the number of chars
1862 int wxString::Freq(wxUniChar ch) const
1863 {
1864     int count = 0;
1865     for ( const_iterator i = begin(); i != end(); ++i )
1866     {
1867         if ( *i == ch )
1868             count ++;
1869     }
1870     return count;
1871 }
1872
1873 // convert to upper case, return the copy of the string
1874 wxString wxString::Upper() const
1875 { wxString s(*this); return s.MakeUpper(); }
1876
1877 // convert to lower case, return the copy of the string
1878 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
1879
1880 // ----------------------------------------------------------------------------
1881 // wxUTF8StringBuffer
1882 // ----------------------------------------------------------------------------
1883
1884 #if wxUSE_UNICODE_WCHAR
1885 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1886 {
1887     wxMBConvStrictUTF8 conv;
1888     size_t wlen = conv.ToWChar(NULL, 0, m_buf);
1889     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1890
1891     wxStringInternalBuffer wbuf(m_str, wlen);
1892     conv.ToWChar(wbuf, wlen, m_buf);
1893 }
1894
1895 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
1896 {
1897     wxCHECK_RET(m_lenSet, "length not set");
1898
1899     wxMBConvStrictUTF8 conv;
1900     size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
1901     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1902
1903     wxStringInternalBufferLength wbuf(m_str, wlen);
1904     conv.ToWChar(wbuf, wlen, m_buf, m_len);
1905     wbuf.SetLength(wlen);
1906 }
1907 #endif // wxUSE_UNICODE_WCHAR