src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #ifdef __SALFORDC__
  39     #include <clib.h>
  40 #endif
  41
  42 #include "wx/hashmap.h"
  43
  44 // string handling functions used by wxString:
  45 #if wxUSE_UNICODE_UTF8
  46     #define wxStringMemcpy   memcpy
  47     #define wxStringMemcmp   memcmp
  48     #define wxStringMemchr   memchr
  49     #define wxStringStrlen   strlen
  50 #else
  51     #define wxStringMemcpy   wxTmemcpy
  52     #define wxStringMemcmp   wxTmemcmp
  53     #define wxStringMemchr   wxTmemchr
  54     #define wxStringStrlen   wxStrlen
  55 #endif
  56
  57
  58 // ---------------------------------------------------------------------------
  59 // static class variables definition
  60 // ---------------------------------------------------------------------------
  61
  62 //According to STL _must_ be a -1 size_t
  63 const size_t wxString::npos = (size_t) -1;
  64
  65 // ----------------------------------------------------------------------------
  66 // global functions
  67 // ----------------------------------------------------------------------------
  68
  69 #if wxUSE_STD_IOSTREAM
  70
  71 #include <iostream>
  72
  73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
  74 {
  75 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
  76     return os << (const char *)str.AsCharBuf();
  77 #else
  78     return os << str.AsInternal();
  79 #endif
  80 }
  81
  82 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
  83 {
  84     return os << str.c_str();
  85 }
  86
  87 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
  88 {
  89     return os << str.data();
  90 }
  91
  92 #ifndef __BORLANDC__
  93 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
  94 {
  95     return os << str.data();
  96 }
  97 #endif
  98
  99 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 100
 101 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
 102 {
 103     return wos << str.wc_str();
 104 }
 105
 106 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
 107 {
 108     return wos << str.AsWChar();
 109 }
 110
 111 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
 112 {
 113     return wos << str.data();
 114 }
 115
 116 #endif  // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 117
 118 #endif // wxUSE_STD_IOSTREAM
 119
 120 // ===========================================================================
 121 // wxString class core
 122 // ===========================================================================
 123
 124 #if wxUSE_UNICODE_UTF8
 125
 126 void wxString::PosLenToImpl(size_t pos, size_t len,
 127                             size_t *implPos, size_t *implLen) const
 128 {
 129     if ( pos == npos )
 130         *implPos = npos;
 131     else
 132     {
 133         const_iterator i = begin() + pos;
 134         *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
 135         if ( len == npos )
 136             *implLen = npos;
 137         else
 138         {
 139             // too large length is interpreted as "to the end of the string"
 140             // FIXME-UTF8: verify this is the case in std::string, assert
 141             // otherwise
 142             if ( pos + len > length() )
 143                 len = length() - pos;
 144
 145             *implLen = (i + len).impl() - i.impl();
 146         }
 147     }
 148 }
 149
 150 #endif // wxUSE_UNICODE_UTF8
 151
 152 // ----------------------------------------------------------------------------
 153 // wxCStrData converted strings caching
 154 // ----------------------------------------------------------------------------
 155
 156 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 157 //             string objects; re-enable after fixing this bug and benchmarking
 158 //             performance to see if using a hash is a good idea at all
 159 #if 0
 160
 161 // For backward compatibility reasons, it must be possible to assign the value
 162 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 163 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 164 // because the memory would be freed immediately, but it has to be valid as long
 165 // as the string is not modified, so that code like this still works:
 166 //
 167 // const wxChar *s = str.c_str();
 168 // while ( s ) { ... }
 169
 170 // FIXME-UTF8: not thread safe!
 171 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 172 //             destroyed, but we should do it when the string is modified, to
 173 //             keep memory usage down
 174 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 175 //             invalidated the cache on every change, we could keep the previous
 176 //             conversion
 177 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 178 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 179
 180 template<typename T>
 181 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 182 {
 183     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 184     if ( i != hash.end() )
 185     {
 186         free(i->second);
 187         hash.erase(i);
 188     }
 189 }
 190
 191 #if wxUSE_UNICODE
 192 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 193 //     so we have to use wxString* here and const-cast when used
 194 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 195                     wxStringCharConversionCache);
 196 static wxStringCharConversionCache gs_stringsCharCache;
 197
 198 const char* wxCStrData::AsChar() const
 199 {
 200     // remove previously cache value, if any (see FIXMEs above):
 201     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 202
 203     // convert the string and keep it:
 204     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 205         m_str->mb_str().release();
 206
 207     return s + m_offset;
 208 }
 209 #endif // wxUSE_UNICODE
 210
 211 #if !wxUSE_UNICODE_WCHAR
 212 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 213                     wxStringWCharConversionCache);
 214 static wxStringWCharConversionCache gs_stringsWCharCache;
 215
 216 const wchar_t* wxCStrData::AsWChar() const
 217 {
 218     // remove previously cache value, if any (see FIXMEs above):
 219     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 220
 221     // convert the string and keep it:
 222     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 223         m_str->wc_str().release();
 224
 225     return s + m_offset;
 226 }
 227 #endif // !wxUSE_UNICODE_WCHAR
 228
 229 wxString::~wxString()
 230 {
 231 #if wxUSE_UNICODE
 232     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 233     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 234 #endif
 235 #if !wxUSE_UNICODE_WCHAR
 236     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 237 #endif
 238 }
 239 #endif
 240
 241 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 242 const char* wxCStrData::AsChar() const
 243 {
 244 #if wxUSE_UNICODE_UTF8
 245     if ( wxLocaleIsUtf8 )
 246         return AsInternal();
 247 #endif
 248     // under non-UTF8 locales, we have to convert the internal UTF-8
 249     // representation using wxConvLibc and cache the result
 250
 251     wxString *str = wxConstCast(m_str, wxString);
 252
 253     // convert the string:
 254     //
 255     // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
 256     //             have it) but it's unfortunately not obvious to implement
 257     //             because we don't know how big buffer do we need for the
 258     //             given string length (in case of multibyte encodings, e.g.
 259     //             ISO-2022-JP or UTF-8 when internal representation is wchar_t)
 260     //
 261     //             One idea would be to store more than just m_convertedToChar
 262     //             in wxString: then we could record the length of the string
 263     //             which was converted the last time and try to reuse the same
 264     //             buffer if the current length is not greater than it (this
 265     //             could still fail because string could have been modified in
 266     //             place but it would work most of the time, so we'd do it and
 267     //             only allocate the new buffer if in-place conversion returned
 268     //             an error). We could also store a bit saying if the string
 269     //             was modified since the last conversion (and update it in all
 270     //             operation modifying the string, of course) to avoid unneeded
 271     //             consequential conversions. But both of these ideas require
 272     //             adding more fields to wxString and require profiling results
 273     //             to be sure that we really gain enough from them to justify
 274     //             doing it.
 275     wxCharBuffer buf(str->mb_str());
 276
 277     // if it failed, return empty string and not NULL to avoid crashes in code
 278     // written with either wxWidgets 2 wxString or std::string behaviour in
 279     // mind: neither of them ever returns NULL and so we shouldn't neither
 280     if ( !buf )
 281         return "";
 282
 283     if ( str->m_convertedToChar &&
 284          strlen(buf) == strlen(str->m_convertedToChar) )
 285     {
 286         // keep the same buffer for as long as possible, so that several calls
 287         // to c_str() in a row still work:
 288         strcpy(str->m_convertedToChar, buf);
 289     }
 290     else
 291     {
 292         str->m_convertedToChar = buf.release();
 293     }
 294
 295     // and keep it:
 296     return str->m_convertedToChar + m_offset;
 297 }
 298 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 299
 300 #if !wxUSE_UNICODE_WCHAR
 301 const wchar_t* wxCStrData::AsWChar() const
 302 {
 303     wxString *str = wxConstCast(m_str, wxString);
 304
 305     // convert the string:
 306     wxWCharBuffer buf(str->wc_str());
 307
 308     // notice that here, unlike above in AsChar(), conversion can't fail as our
 309     // internal UTF-8 is always well-formed -- or the string was corrupted and
 310     // all bets are off anyhow
 311
 312     // FIXME-UTF8: do the conversion in-place in the existing buffer
 313     if ( str->m_convertedToWChar &&
 314          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 315     {
 316         // keep the same buffer for as long as possible, so that several calls
 317         // to c_str() in a row still work:
 318         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 319     }
 320     else
 321     {
 322         str->m_convertedToWChar = buf.release();
 323     }
 324
 325     // and keep it:
 326     return str->m_convertedToWChar + m_offset;
 327 }
 328 #endif // !wxUSE_UNICODE_WCHAR
 329
 330 // ===========================================================================
 331 // wxString class core
 332 // ===========================================================================
 333
 334 // ---------------------------------------------------------------------------
 335 // construction and conversion
 336 // ---------------------------------------------------------------------------
 337
 338 #if wxUSE_UNICODE_WCHAR
 339 /* static */
 340 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 341                                                const wxMBConv& conv)
 342 {
 343     // anything to do?
 344     if ( !psz || nLength == 0 )
 345         return SubstrBufFromMB(L"", 0);
 346
 347     if ( nLength == npos )
 348         nLength = wxNO_LEN;
 349
 350     size_t wcLen;
 351     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 352     if ( !wcLen )
 353         return SubstrBufFromMB(_T(""), 0);
 354     else
 355         return SubstrBufFromMB(wcBuf, wcLen);
 356 }
 357 #endif // wxUSE_UNICODE_WCHAR
 358
 359 #if wxUSE_UNICODE_UTF8
 360 /* static */
 361 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 362                                                const wxMBConv& conv)
 363 {
 364     // anything to do?
 365     if ( !psz || nLength == 0 )
 366         return SubstrBufFromMB("", 0);
 367
 368     // if psz is already in UTF-8, we don't have to do the roundtrip to
 369     // wchar_t* and back:
 370     if ( conv.IsUTF8() )
 371     {
 372         // we need to validate the input because UTF8 iterators assume valid
 373         // UTF-8 sequence and psz may be invalid:
 374         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 375         {
 376             return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
 377         }
 378         // else: do the roundtrip through wchar_t*
 379     }
 380
 381     if ( nLength == npos )
 382         nLength = wxNO_LEN;
 383
 384     // first convert to wide string:
 385     size_t wcLen;
 386     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 387     if ( !wcLen )
 388         return SubstrBufFromMB("", 0);
 389
 390     // and then to UTF-8:
 391     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
 392     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 393     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 394
 395     return buf;
 396 }
 397 #endif // wxUSE_UNICODE_UTF8
 398
 399 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 400 /* static */
 401 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 402                                                const wxMBConv& conv)
 403 {
 404     // anything to do?
 405     if ( !pwz || nLength == 0 )
 406         return SubstrBufFromWC("", 0);
 407
 408     if ( nLength == npos )
 409         nLength = wxNO_LEN;
 410
 411     size_t mbLen;
 412     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 413     if ( !mbLen )
 414         return SubstrBufFromWC("", 0);
 415     else
 416         return SubstrBufFromWC(mbBuf, mbLen);
 417 }
 418 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 419
 420
 421 #if wxUSE_UNICODE_WCHAR
 422
 423 //Convert wxString in Unicode mode to a multi-byte string
 424 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 425 {
 426     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 427 }
 428
 429 #elif wxUSE_UNICODE_UTF8
 430
 431 const wxWCharBuffer wxString::wc_str() const
 432 {
 433     return wxMBConvStrictUTF8().cMB2WC
 434                                 (
 435                                     m_impl.c_str(),
 436                                     m_impl.length() + 1, // size, not length
 437                                     NULL
 438                                 );
 439 }
 440
 441 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 442 {
 443     if ( conv.IsUTF8() )
 444         return wxCharBuffer::CreateNonOwned(m_impl.c_str());
 445
 446     // FIXME-UTF8: use wc_str() here once we have buffers with length
 447
 448     size_t wcLen;
 449     wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
 450                                              (
 451                                                 m_impl.c_str(),
 452                                                 m_impl.length() + 1, // size
 453                                                 &wcLen
 454                                              ));
 455     if ( !wcLen )
 456         return wxCharBuffer("");
 457
 458     return conv.cWC2MB(wcBuf, wcLen+1, NULL);
 459 }
 460
 461 #else // ANSI
 462
 463 //Converts this string to a wide character string if unicode
 464 //mode is not enabled and wxUSE_WCHAR_T is enabled
 465 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 466 {
 467     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 468 }
 469
 470 #endif // Unicode/ANSI
 471
 472 // shrink to minimal size (releasing extra memory)
 473 bool wxString::Shrink()
 474 {
 475   wxString tmp(begin(), end());
 476   swap(tmp);
 477   return tmp.length() == length();
 478 }
 479
 480 // deprecated compatibility code:
 481 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 482 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 483 {
 484     return DoGetWriteBuf(nLen);
 485 }
 486
 487 void wxString::UngetWriteBuf()
 488 {
 489     DoUngetWriteBuf();
 490 }
 491
 492 void wxString::UngetWriteBuf(size_t nLen)
 493 {
 494     DoUngetWriteBuf(nLen);
 495 }
 496 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 497
 498
 499 // ---------------------------------------------------------------------------
 500 // data access
 501 // ---------------------------------------------------------------------------
 502
 503 // all functions are inline in string.h
 504
 505 // ---------------------------------------------------------------------------
 506 // concatenation operators
 507 // ---------------------------------------------------------------------------
 508
 509 /*
 510  * concatenation functions come in 5 flavours:
 511  *  string + string
 512  *  char   + string      and      string + char
 513  *  C str  + string      and      string + C str
 514  */
 515
 516 wxString operator+(const wxString& str1, const wxString& str2)
 517 {
 518 #if !wxUSE_STL_BASED_WXSTRING
 519     wxASSERT( str1.IsValid() );
 520     wxASSERT( str2.IsValid() );
 521 #endif
 522
 523     wxString s = str1;
 524     s += str2;
 525
 526     return s;
 527 }
 528
 529 wxString operator+(const wxString& str, wxUniChar ch)
 530 {
 531 #if !wxUSE_STL_BASED_WXSTRING
 532     wxASSERT( str.IsValid() );
 533 #endif
 534
 535     wxString s = str;
 536     s += ch;
 537
 538     return s;
 539 }
 540
 541 wxString operator+(wxUniChar ch, const wxString& str)
 542 {
 543 #if !wxUSE_STL_BASED_WXSTRING
 544     wxASSERT( str.IsValid() );
 545 #endif
 546
 547     wxString s = ch;
 548     s += str;
 549
 550     return s;
 551 }
 552
 553 wxString operator+(const wxString& str, const char *psz)
 554 {
 555 #if !wxUSE_STL_BASED_WXSTRING
 556     wxASSERT( str.IsValid() );
 557 #endif
 558
 559     wxString s;
 560     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 561         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 562     }
 563     s += str;
 564     s += psz;
 565
 566     return s;
 567 }
 568
 569 wxString operator+(const wxString& str, const wchar_t *pwz)
 570 {
 571 #if !wxUSE_STL_BASED_WXSTRING
 572     wxASSERT( str.IsValid() );
 573 #endif
 574
 575     wxString s;
 576     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 577         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 578     }
 579     s += str;
 580     s += pwz;
 581
 582     return s;
 583 }
 584
 585 wxString operator+(const char *psz, const wxString& str)
 586 {
 587 #if !wxUSE_STL_BASED_WXSTRING
 588     wxASSERT( str.IsValid() );
 589 #endif
 590
 591     wxString s;
 592     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 593         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 594     }
 595     s = psz;
 596     s += str;
 597
 598     return s;
 599 }
 600
 601 wxString operator+(const wchar_t *pwz, const wxString& str)
 602 {
 603 #if !wxUSE_STL_BASED_WXSTRING
 604     wxASSERT( str.IsValid() );
 605 #endif
 606
 607     wxString s;
 608     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 609         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 610     }
 611     s = pwz;
 612     s += str;
 613
 614     return s;
 615 }
 616
 617 // ---------------------------------------------------------------------------
 618 // string comparison
 619 // ---------------------------------------------------------------------------
 620
 621 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 622 {
 623     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 624                                : wxToupper(GetChar(0u)) == wxToupper(c));
 625 }
 626
 627 #ifdef HAVE_STD_STRING_COMPARE
 628
 629 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 630 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 631 //     sort strings in characters code point order by sorting the byte sequence
 632 //     in byte values order (i.e. what strcmp() and memcmp() do).
 633
 634 int wxString::compare(const wxString& str) const
 635 {
 636     return m_impl.compare(str.m_impl);
 637 }
 638
 639 int wxString::compare(size_t nStart, size_t nLen,
 640                       const wxString& str) const
 641 {
 642     size_t pos, len;
 643     PosLenToImpl(nStart, nLen, &pos, &len);
 644     return m_impl.compare(pos, len, str.m_impl);
 645 }
 646
 647 int wxString::compare(size_t nStart, size_t nLen,
 648                       const wxString& str,
 649                       size_t nStart2, size_t nLen2) const
 650 {
 651     size_t pos, len;
 652     PosLenToImpl(nStart, nLen, &pos, &len);
 653
 654     size_t pos2, len2;
 655     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 656
 657     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 658 }
 659
 660 int wxString::compare(const char* sz) const
 661 {
 662     return m_impl.compare(ImplStr(sz));
 663 }
 664
 665 int wxString::compare(const wchar_t* sz) const
 666 {
 667     return m_impl.compare(ImplStr(sz));
 668 }
 669
 670 int wxString::compare(size_t nStart, size_t nLen,
 671                       const char* sz, size_t nCount) const
 672 {
 673     size_t pos, len;
 674     PosLenToImpl(nStart, nLen, &pos, &len);
 675
 676     SubstrBufFromMB str(ImplStr(sz, nCount));
 677
 678     return m_impl.compare(pos, len, str.data, str.len);
 679 }
 680
 681 int wxString::compare(size_t nStart, size_t nLen,
 682                       const wchar_t* sz, size_t nCount) const
 683 {
 684     size_t pos, len;
 685     PosLenToImpl(nStart, nLen, &pos, &len);
 686
 687     SubstrBufFromWC str(ImplStr(sz, nCount));
 688
 689     return m_impl.compare(pos, len, str.data, str.len);
 690 }
 691
 692 #else // !HAVE_STD_STRING_COMPARE
 693
 694 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 695                           const wxStringCharType* s2, size_t l2)
 696 {
 697     if( l1 == l2 )
 698         return wxStringMemcmp(s1, s2, l1);
 699     else if( l1 < l2 )
 700     {
 701         int ret = wxStringMemcmp(s1, s2, l1);
 702         return ret == 0 ? -1 : ret;
 703     }
 704     else
 705     {
 706         int ret = wxStringMemcmp(s1, s2, l2);
 707         return ret == 0 ? +1 : ret;
 708     }
 709 }
 710
 711 int wxString::compare(const wxString& str) const
 712 {
 713     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 714                      str.m_impl.data(), str.m_impl.length());
 715 }
 716
 717 int wxString::compare(size_t nStart, size_t nLen,
 718                       const wxString& str) const
 719 {
 720     wxASSERT(nStart <= length());
 721     size_type strLen = length() - nStart;
 722     nLen = strLen < nLen ? strLen : nLen;
 723
 724     size_t pos, len;
 725     PosLenToImpl(nStart, nLen, &pos, &len);
 726
 727     return ::wxDoCmp(m_impl.data() + pos,  len,
 728                      str.m_impl.data(), str.m_impl.length());
 729 }
 730
 731 int wxString::compare(size_t nStart, size_t nLen,
 732                       const wxString& str,
 733                       size_t nStart2, size_t nLen2) const
 734 {
 735     wxASSERT(nStart <= length());
 736     wxASSERT(nStart2 <= str.length());
 737     size_type strLen  =     length() - nStart,
 738               strLen2 = str.length() - nStart2;
 739     nLen  = strLen  < nLen  ? strLen  : nLen;
 740     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 741
 742     size_t pos, len;
 743     PosLenToImpl(nStart, nLen, &pos, &len);
 744     size_t pos2, len2;
 745     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 746
 747     return ::wxDoCmp(m_impl.data() + pos, len,
 748                      str.m_impl.data() + pos2, len2);
 749 }
 750
 751 int wxString::compare(const char* sz) const
 752 {
 753     SubstrBufFromMB str(ImplStr(sz, npos));
 754     if ( str.len == npos )
 755         str.len = wxStringStrlen(str.data);
 756     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 757 }
 758
 759 int wxString::compare(const wchar_t* sz) const
 760 {
 761     SubstrBufFromWC str(ImplStr(sz, npos));
 762     if ( str.len == npos )
 763         str.len = wxStringStrlen(str.data);
 764     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 765 }
 766
 767 int wxString::compare(size_t nStart, size_t nLen,
 768                       const char* sz, size_t nCount) const
 769 {
 770     wxASSERT(nStart <= length());
 771     size_type strLen = length() - nStart;
 772     nLen = strLen < nLen ? strLen : nLen;
 773
 774     size_t pos, len;
 775     PosLenToImpl(nStart, nLen, &pos, &len);
 776
 777     SubstrBufFromMB str(ImplStr(sz, nCount));
 778     if ( str.len == npos )
 779         str.len = wxStringStrlen(str.data);
 780
 781     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 782 }
 783
 784 int wxString::compare(size_t nStart, size_t nLen,
 785                       const wchar_t* sz, size_t nCount) const
 786 {
 787     wxASSERT(nStart <= length());
 788     size_type strLen = length() - nStart;
 789     nLen = strLen < nLen ? strLen : nLen;
 790
 791     size_t pos, len;
 792     PosLenToImpl(nStart, nLen, &pos, &len);
 793
 794     SubstrBufFromWC str(ImplStr(sz, nCount));
 795     if ( str.len == npos )
 796         str.len = wxStringStrlen(str.data);
 797
 798     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 799 }
 800
 801 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 802
 803
 804 // ---------------------------------------------------------------------------
 805 // find_{first,last}_[not]_of functions
 806 // ---------------------------------------------------------------------------
 807
 808 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 809
 810 // NB: All these functions are implemented  with the argument being wxChar*,
 811 //     i.e. widechar string in any Unicode build, even though native string
 812 //     representation is char* in the UTF-8 build. This is because we couldn't
 813 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 814
 815 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 816 {
 817     return find_first_of(sz, nStart, wxStrlen(sz));
 818 }
 819
 820 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 821 {
 822     return find_first_not_of(sz, nStart, wxStrlen(sz));
 823 }
 824
 825 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 826 {
 827     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 828
 829     size_t idx = nStart;
 830     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 831     {
 832         if ( wxTmemchr(sz, *i, n) )
 833             return idx;
 834     }
 835
 836     return npos;
 837 }
 838
 839 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 840 {
 841     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 842
 843     size_t idx = nStart;
 844     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 845     {
 846         if ( !wxTmemchr(sz, *i, n) )
 847             return idx;
 848     }
 849
 850     return npos;
 851 }
 852
 853
 854 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 855 {
 856     return find_last_of(sz, nStart, wxStrlen(sz));
 857 }
 858
 859 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 860 {
 861     return find_last_not_of(sz, nStart, wxStrlen(sz));
 862 }
 863
 864 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 865 {
 866     size_t len = length();
 867
 868     if ( nStart == npos )
 869     {
 870         nStart = len - 1;
 871     }
 872     else
 873     {
 874         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 875     }
 876
 877     size_t idx = nStart;
 878     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 879           i != rend(); --idx, ++i )
 880     {
 881         if ( wxTmemchr(sz, *i, n) )
 882             return idx;
 883     }
 884
 885     return npos;
 886 }
 887
 888 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 889 {
 890     size_t len = length();
 891
 892     if ( nStart == npos )
 893     {
 894         nStart = len - 1;
 895     }
 896     else
 897     {
 898         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 899     }
 900
 901     size_t idx = nStart;
 902     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 903           i != rend(); --idx, ++i )
 904     {
 905         if ( !wxTmemchr(sz, *i, n) )
 906             return idx;
 907     }
 908
 909     return npos;
 910 }
 911
 912 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
 913 {
 914     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 915
 916     size_t idx = nStart;
 917     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 918     {
 919         if ( *i != ch )
 920             return idx;
 921     }
 922
 923     return npos;
 924 }
 925
 926 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
 927 {
 928     size_t len = length();
 929
 930     if ( nStart == npos )
 931     {
 932         nStart = len - 1;
 933     }
 934     else
 935     {
 936         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 937     }
 938
 939     size_t idx = nStart;
 940     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 941           i != rend(); --idx, ++i )
 942     {
 943         if ( *i != ch )
 944             return idx;
 945     }
 946
 947     return npos;
 948 }
 949
 950 // the functions above were implemented for wchar_t* arguments in Unicode
 951 // build and char* in ANSI build; below are implementations for the other
 952 // version:
 953 #if wxUSE_UNICODE
 954     #define wxOtherCharType char
 955     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
 956 #else
 957     #define wxOtherCharType wchar_t
 958     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
 959 #endif
 960
 961 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
 962     { return find_first_of(STRCONV(sz), nStart); }
 963
 964 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
 965                                size_t n) const
 966     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
 967 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
 968     { return find_last_of(STRCONV(sz), nStart); }
 969 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
 970                               size_t n) const
 971     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
 972 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
 973     { return find_first_not_of(STRCONV(sz), nStart); }
 974 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
 975                                    size_t n) const
 976     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
 977 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
 978     { return find_last_not_of(STRCONV(sz), nStart); }
 979 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
 980                                   size_t n) const
 981     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
 982
 983 #undef wxOtherCharType
 984 #undef STRCONV
 985
 986 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 987
 988 // ===========================================================================
 989 // other common string functions
 990 // ===========================================================================
 991
 992 int wxString::CmpNoCase(const wxString& s) const
 993 {
 994     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
 995
 996     const_iterator i1 = begin();
 997     const_iterator end1 = end();
 998     const_iterator i2 = s.begin();
 999     const_iterator end2 = s.end();
1000
1001     for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1002     {
1003         wxUniChar lower1 = (wxChar)wxTolower(*i1);
1004         wxUniChar lower2 = (wxChar)wxTolower(*i2);
1005         if ( lower1 != lower2 )
1006             return lower1 < lower2 ? -1 : 1;
1007     }
1008
1009     size_t len1 = length();
1010     size_t len2 = s.length();
1011
1012     if ( len1 < len2 )
1013         return -1;
1014     else if ( len1 > len2 )
1015         return 1;
1016     return 0;
1017 }
1018
1019
1020 #if wxUSE_UNICODE
1021
1022 #ifdef __MWERKS__
1023 #ifndef __SCHAR_MAX__
1024 #define __SCHAR_MAX__ 127
1025 #endif
1026 #endif
1027
1028 wxString wxString::FromAscii(const char *ascii, size_t len)
1029 {
1030     if (!ascii || len == 0)
1031        return wxEmptyString;
1032
1033     wxString res;
1034
1035     {
1036         wxStringInternalBuffer buf(res, len);
1037         wxStringCharType *dest = buf;
1038
1039         for ( ; len > 0; --len )
1040         {
1041             unsigned char c = (unsigned char)*ascii++;
1042             wxASSERT_MSG( c < 0x80,
1043                           _T("Non-ASCII value passed to FromAscii().") );
1044
1045             *dest++ = (wchar_t)c;
1046         }
1047     }
1048
1049     return res;
1050 }
1051
1052 wxString wxString::FromAscii(const char *ascii)
1053 {
1054     return FromAscii(ascii, wxStrlen(ascii));
1055 }
1056
1057 wxString wxString::FromAscii(char ascii)
1058 {
1059     // What do we do with '\0' ?
1060
1061     unsigned char c = (unsigned char)ascii;
1062
1063     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1064
1065     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1066     return wxString(wxUniChar((wchar_t)c));
1067 }
1068
1069 const wxCharBuffer wxString::ToAscii() const
1070 {
1071     // this will allocate enough space for the terminating NUL too
1072     wxCharBuffer buffer(length());
1073     char *dest = buffer.data();
1074
1075     for ( const_iterator i = begin(); i != end(); ++i )
1076     {
1077         wxUniChar c(*i);
1078         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1079         *dest++ = c.IsAscii() ? (char)c : '_';
1080
1081         // the output string can't have embedded NULs anyhow, so we can safely
1082         // stop at first of them even if we do have any
1083         if ( !c )
1084             break;
1085     }
1086
1087     return buffer;
1088 }
1089
1090 #endif // wxUSE_UNICODE
1091
1092 // extract string of length nCount starting at nFirst
1093 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1094 {
1095     size_t nLen = length();
1096
1097     // default value of nCount is npos and means "till the end"
1098     if ( nCount == npos )
1099     {
1100         nCount = nLen - nFirst;
1101     }
1102
1103     // out-of-bounds requests return sensible things
1104     if ( nFirst + nCount > nLen )
1105     {
1106         nCount = nLen - nFirst;
1107     }
1108
1109     if ( nFirst > nLen )
1110     {
1111         // AllocCopy() will return empty string
1112         return wxEmptyString;
1113     }
1114
1115     wxString dest(*this, nFirst, nCount);
1116     if ( dest.length() != nCount )
1117     {
1118         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1119     }
1120
1121     return dest;
1122 }
1123
1124 // check that the string starts with prefix and return the rest of the string
1125 // in the provided pointer if it is not NULL, otherwise return false
1126 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1127 {
1128     if ( compare(0, prefix.length(), prefix) != 0 )
1129         return false;
1130
1131     if ( rest )
1132     {
1133         // put the rest of the string into provided pointer
1134         rest->assign(*this, prefix.length(), npos);
1135     }
1136
1137     return true;
1138 }
1139
1140
1141 // check that the string ends with suffix and return the rest of it in the
1142 // provided pointer if it is not NULL, otherwise return false
1143 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1144 {
1145     int start = length() - suffix.length();
1146
1147     if ( start < 0 || compare(start, npos, suffix) != 0 )
1148         return false;
1149
1150     if ( rest )
1151     {
1152         // put the rest of the string into provided pointer
1153         rest->assign(*this, 0, start);
1154     }
1155
1156     return true;
1157 }
1158
1159
1160 // extract nCount last (rightmost) characters
1161 wxString wxString::Right(size_t nCount) const
1162 {
1163   if ( nCount > length() )
1164     nCount = length();
1165
1166   wxString dest(*this, length() - nCount, nCount);
1167   if ( dest.length() != nCount ) {
1168     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1169   }
1170   return dest;
1171 }
1172
1173 // get all characters after the last occurence of ch
1174 // (returns the whole string if ch not found)
1175 wxString wxString::AfterLast(wxUniChar ch) const
1176 {
1177   wxString str;
1178   int iPos = Find(ch, true);
1179   if ( iPos == wxNOT_FOUND )
1180     str = *this;
1181   else
1182     str = wx_str() + iPos + 1;
1183
1184   return str;
1185 }
1186
1187 // extract nCount first (leftmost) characters
1188 wxString wxString::Left(size_t nCount) const
1189 {
1190   if ( nCount > length() )
1191     nCount = length();
1192
1193   wxString dest(*this, 0, nCount);
1194   if ( dest.length() != nCount ) {
1195     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1196   }
1197   return dest;
1198 }
1199
1200 // get all characters before the first occurence of ch
1201 // (returns the whole string if ch not found)
1202 wxString wxString::BeforeFirst(wxUniChar ch) const
1203 {
1204   int iPos = Find(ch);
1205   if ( iPos == wxNOT_FOUND ) iPos = length();
1206   return wxString(*this, 0, iPos);
1207 }
1208
1209 /// get all characters before the last occurence of ch
1210 /// (returns empty string if ch not found)
1211 wxString wxString::BeforeLast(wxUniChar ch) const
1212 {
1213   wxString str;
1214   int iPos = Find(ch, true);
1215   if ( iPos != wxNOT_FOUND && iPos != 0 )
1216     str = wxString(c_str(), iPos);
1217
1218   return str;
1219 }
1220
1221 /// get all characters after the first occurence of ch
1222 /// (returns empty string if ch not found)
1223 wxString wxString::AfterFirst(wxUniChar ch) const
1224 {
1225   wxString str;
1226   int iPos = Find(ch);
1227   if ( iPos != wxNOT_FOUND )
1228     str = wx_str() + iPos + 1;
1229
1230   return str;
1231 }
1232
1233 // replace first (or all) occurences of some substring with another one
1234 size_t wxString::Replace(const wxString& strOld,
1235                          const wxString& strNew, bool bReplaceAll)
1236 {
1237     // if we tried to replace an empty string we'd enter an infinite loop below
1238     wxCHECK_MSG( !strOld.empty(), 0,
1239                  _T("wxString::Replace(): invalid parameter") );
1240
1241     size_t uiCount = 0;   // count of replacements made
1242
1243     size_t uiOldLen = strOld.length();
1244     size_t uiNewLen = strNew.length();
1245
1246     size_t dwPos = 0;
1247
1248     while ( (*this)[dwPos] != wxT('\0') )
1249     {
1250         //DO NOT USE STRSTR HERE
1251         //this string can contain embedded null characters,
1252         //so strstr will function incorrectly
1253         dwPos = find(strOld, dwPos);
1254         if ( dwPos == npos )
1255             break;                  // exit the loop
1256         else
1257         {
1258             //replace this occurance of the old string with the new one
1259             replace(dwPos, uiOldLen, strNew, uiNewLen);
1260
1261             //move up pos past the string that was replaced
1262             dwPos += uiNewLen;
1263
1264             //increase replace count
1265             ++uiCount;
1266
1267             // stop now?
1268             if ( !bReplaceAll )
1269                 break;                  // exit the loop
1270         }
1271     }
1272
1273     return uiCount;
1274 }
1275
1276 bool wxString::IsAscii() const
1277 {
1278     for ( const_iterator i = begin(); i != end(); ++i )
1279     {
1280         if ( !(*i).IsAscii() )
1281             return false;
1282     }
1283
1284     return true;
1285 }
1286
1287 bool wxString::IsWord() const
1288 {
1289     for ( const_iterator i = begin(); i != end(); ++i )
1290     {
1291         if ( !wxIsalpha(*i) )
1292             return false;
1293     }
1294
1295     return true;
1296 }
1297
1298 bool wxString::IsNumber() const
1299 {
1300     if ( empty() )
1301         return true;
1302
1303     const_iterator i = begin();
1304
1305     if ( *i == _T('-') || *i == _T('+') )
1306         ++i;
1307
1308     for ( ; i != end(); ++i )
1309     {
1310         if ( !wxIsdigit(*i) )
1311             return false;
1312     }
1313
1314     return true;
1315 }
1316
1317 wxString wxString::Strip(stripType w) const
1318 {
1319     wxString s = *this;
1320     if ( w & leading ) s.Trim(false);
1321     if ( w & trailing ) s.Trim(true);
1322     return s;
1323 }
1324
1325 // ---------------------------------------------------------------------------
1326 // case conversion
1327 // ---------------------------------------------------------------------------
1328
1329 wxString& wxString::MakeUpper()
1330 {
1331   for ( iterator it = begin(), en = end(); it != en; ++it )
1332     *it = (wxChar)wxToupper(*it);
1333
1334   return *this;
1335 }
1336
1337 wxString& wxString::MakeLower()
1338 {
1339   for ( iterator it = begin(), en = end(); it != en; ++it )
1340     *it = (wxChar)wxTolower(*it);
1341
1342   return *this;
1343 }
1344
1345 // ---------------------------------------------------------------------------
1346 // trimming and padding
1347 // ---------------------------------------------------------------------------
1348
1349 // some compilers (VC++ 6.0 not to name them) return true for a call to
1350 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1351 // to live with this by checking that the character is a 7 bit one - even if
1352 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1353 // space-like symbols somewhere except in the first 128 chars), it is arguably
1354 // still better than trimming away accented letters
1355 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1356
1357 // trims spaces (in the sense of isspace) from left or right side
1358 wxString& wxString::Trim(bool bFromRight)
1359 {
1360     // first check if we're going to modify the string at all
1361     if ( !empty() &&
1362          (
1363           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1364           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1365          )
1366        )
1367     {
1368         if ( bFromRight )
1369         {
1370             // find last non-space character
1371             reverse_iterator psz = rbegin();
1372             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1373                 ++psz;
1374
1375             // truncate at trailing space start
1376             erase(psz.base(), end());
1377         }
1378         else
1379         {
1380             // find first non-space character
1381             iterator psz = begin();
1382             while ( (psz != end()) && wxSafeIsspace(*psz) )
1383                 ++psz;
1384
1385             // fix up data and length
1386             erase(begin(), psz);
1387         }
1388     }
1389
1390     return *this;
1391 }
1392
1393 // adds nCount characters chPad to the string from either side
1394 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1395 {
1396     wxString s(chPad, nCount);
1397
1398     if ( bFromRight )
1399         *this += s;
1400     else
1401     {
1402         s += *this;
1403         swap(s);
1404     }
1405
1406     return *this;
1407 }
1408
1409 // truncate the string
1410 wxString& wxString::Truncate(size_t uiLen)
1411 {
1412     if ( uiLen < length() )
1413     {
1414         erase(begin() + uiLen, end());
1415     }
1416     //else: nothing to do, string is already short enough
1417
1418     return *this;
1419 }
1420
1421 // ---------------------------------------------------------------------------
1422 // finding (return wxNOT_FOUND if not found and index otherwise)
1423 // ---------------------------------------------------------------------------
1424
1425 // find a character
1426 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1427 {
1428     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1429
1430     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1431 }
1432
1433 // ----------------------------------------------------------------------------
1434 // conversion to numbers
1435 // ----------------------------------------------------------------------------
1436
1437 // The implementation of all the functions below is exactly the same so factor
1438 // it out. Note that number extraction works correctly on UTF-8 strings, so
1439 // we can use wxStringCharType and wx_str() for maximum efficiency.
1440
1441 #ifndef __WXWINCE__
1442     #define DO_IF_NOT_WINCE(x) x
1443 #else
1444     #define DO_IF_NOT_WINCE(x)
1445 #endif
1446
1447 #define WX_STRING_TO_INT_TYPE(out, base, func, T)                           \
1448     wxCHECK_MSG( out, false, _T("NULL output pointer") );                   \
1449     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );  \
1450                                                                             \
1451     DO_IF_NOT_WINCE( errno = 0; )                                           \
1452                                                                             \
1453     const wxStringCharType *start = wx_str();                               \
1454     wxStringCharType *end;                                                  \
1455     T val = func(start, &end, base);                                        \
1456                                                                             \
1457     /* return true only if scan was stopped by the terminating NUL and */   \
1458     /* if the string was not empty to start with and no under/overflow */   \
1459     /* occurred: */                                                         \
1460     if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )         \
1461         return false;                                                       \
1462     *out = val;                                                             \
1463     return true
1464
1465 bool wxString::ToLong(long *pVal, int base) const
1466 {
1467     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
1468 }
1469
1470 bool wxString::ToULong(unsigned long *pVal, int base) const
1471 {
1472     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
1473 }
1474
1475 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1476 {
1477     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
1478 }
1479
1480 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1481 {
1482     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
1483 }
1484
1485 bool wxString::ToDouble(double *pVal) const
1486 {
1487     wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
1488
1489     DO_IF_NOT_WINCE( errno = 0; )
1490
1491     const wxChar *start = c_str();
1492     wxChar *end;
1493     double val = wxStrtod(start, &end);
1494
1495     // return true only if scan was stopped by the terminating NUL and if the
1496     // string was not empty to start with and no under/overflow occurred
1497     if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1498         return false;
1499
1500     *pVal = val;
1501
1502     return true;
1503 }
1504
1505 // ---------------------------------------------------------------------------
1506 // formatted output
1507 // ---------------------------------------------------------------------------
1508
1509 #if !wxUSE_UTF8_LOCALE_ONLY
1510 /* static */
1511 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1512 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1513 #else
1514 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1515 #endif
1516 {
1517     va_list argptr;
1518     va_start(argptr, format);
1519
1520     wxString s;
1521     s.PrintfV(format, argptr);
1522
1523     va_end(argptr);
1524
1525     return s;
1526 }
1527 #endif // !wxUSE_UTF8_LOCALE_ONLY
1528
1529 #if wxUSE_UNICODE_UTF8
1530 /* static */
1531 wxString wxString::DoFormatUtf8(const char *format, ...)
1532 {
1533     va_list argptr;
1534     va_start(argptr, format);
1535
1536     wxString s;
1537     s.PrintfV(format, argptr);
1538
1539     va_end(argptr);
1540
1541     return s;
1542 }
1543 #endif // wxUSE_UNICODE_UTF8
1544
1545 /* static */
1546 wxString wxString::FormatV(const wxString& format, va_list argptr)
1547 {
1548     wxString s;
1549     s.PrintfV(format, argptr);
1550     return s;
1551 }
1552
1553 #if !wxUSE_UTF8_LOCALE_ONLY
1554 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1555 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1556 #else
1557 int wxString::DoPrintfWchar(const wxChar *format, ...)
1558 #endif
1559 {
1560     va_list argptr;
1561     va_start(argptr, format);
1562
1563 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1564     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1565     // because it's the only cast that works safely for downcasting when
1566     // multiple inheritance is used:
1567     wxString *str = static_cast<wxString*>(this);
1568 #else
1569     wxString *str = this;
1570 #endif
1571
1572     int iLen = str->PrintfV(format, argptr);
1573
1574     va_end(argptr);
1575
1576     return iLen;
1577 }
1578 #endif // !wxUSE_UTF8_LOCALE_ONLY
1579
1580 #if wxUSE_UNICODE_UTF8
1581 int wxString::DoPrintfUtf8(const char *format, ...)
1582 {
1583     va_list argptr;
1584     va_start(argptr, format);
1585
1586     int iLen = PrintfV(format, argptr);
1587
1588     va_end(argptr);
1589
1590     return iLen;
1591 }
1592 #endif // wxUSE_UNICODE_UTF8
1593
1594 /*
1595     Uses wxVsnprintf and places the result into the this string.
1596
1597     In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1598     it is vswprintf.  Due to a discrepancy between vsnprintf and vswprintf in
1599     the ISO C99 (and thus SUSv3) standard the return value for the case of
1600     an undersized buffer is inconsistent.  For conforming vsnprintf
1601     implementations the function must return the number of characters that
1602     would have been printed had the buffer been large enough.  For conforming
1603     vswprintf implementations the function must return a negative number
1604     and set errno.
1605
1606     What vswprintf sets errno to is undefined but Darwin seems to set it to
1607     EOVERFLOW.  The only expected errno are EILSEQ and EINVAL.  Both of
1608     those are defined in the standard and backed up by several conformance
1609     statements.  Note that ENOMEM mentioned in the manual page does not
1610     apply to swprintf, only wprintf and fwprintf.
1611
1612     Official manual page:
1613     http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1614
1615     Some conformance statements (AIX, Solaris):
1616     http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1617     http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1618
1619     Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1620     EILSEQ and EINVAL are specifically defined to mean the error is other than
1621     an undersized buffer and no other errno are defined we treat those two
1622     as meaning hard errors and everything else gets the old behavior which
1623     is to keep looping and increasing buffer size until the function succeeds.
1624
1625     In practice it's impossible to determine before compilation which behavior
1626     may be used.  The vswprintf function may have vsnprintf-like behavior or
1627     vice-versa.  Behavior detected on one release can theoretically change
1628     with an updated release.  Not to mention that configure testing for it
1629     would require the test to be run on the host system, not the build system
1630     which makes cross compilation difficult. Therefore, we make no assumptions
1631     about behavior and try our best to handle every known case, including the
1632     case where wxVsnprintf returns a negative number and fails to set errno.
1633
1634     There is yet one more non-standard implementation and that is our own.
1635     Fortunately, that can be detected at compile-time.
1636
1637     On top of all that, ISO C99 explicitly defines snprintf to write a null
1638     character to the last position of the specified buffer.  That would be at
1639     at the given buffer size minus 1.  It is supposed to do this even if it
1640     turns out that the buffer is sized too small.
1641
1642     Darwin (tested on 10.5) follows the C99 behavior exactly.
1643
1644     Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1645     errno even when it fails.  However, it only seems to ever fail due
1646     to an undersized buffer.
1647 */
1648 #if wxUSE_UNICODE_UTF8
1649 template<typename BufferType>
1650 #else
1651 // we only need one version in non-UTF8 builds and at least two Windows
1652 // compilers have problems with this function template, so use just one
1653 // normal function here
1654 #endif
1655 static int DoStringPrintfV(wxString& str,
1656                            const wxString& format, va_list argptr)
1657 {
1658     int size = 1024;
1659
1660     for ( ;; )
1661     {
1662 #if wxUSE_UNICODE_UTF8
1663         BufferType tmp(str, size + 1);
1664         typename BufferType::CharType *buf = tmp;
1665 #else
1666         wxStringBuffer tmp(str, size + 1);
1667         wxChar *buf = tmp;
1668 #endif
1669
1670         if ( !buf )
1671         {
1672             // out of memory
1673
1674             // in UTF-8 build, leaving uninitialized junk in the buffer
1675             // could result in invalid non-empty UTF-8 string, so just
1676             // reset the string to empty on failure:
1677             buf[0] = '\0';
1678             return -1;
1679         }
1680
1681         // wxVsnprintf() may modify the original arg pointer, so pass it
1682         // only a copy
1683         va_list argptrcopy;
1684         wxVaCopy(argptrcopy, argptr);
1685
1686 #ifndef __WXWINCE__
1687         // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1688         errno = 0;
1689 #endif
1690         int len = wxVsnprintf(buf, size, format, argptrcopy);
1691         va_end(argptrcopy);
1692
1693         // some implementations of vsnprintf() don't NUL terminate
1694         // the string if there is not enough space for it so
1695         // always do it manually
1696         // FIXME: This really seems to be the wrong and would be an off-by-one
1697         // bug except the code above allocates an extra character.
1698         buf[size] = _T('\0');
1699
1700         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1701         // total number of characters which would have been written if the
1702         // buffer were large enough (newer standards such as Unix98)
1703         if ( len < 0 )
1704         {
1705             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1706             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1707             //     is true if *both* of them use our own implementation,
1708             //     otherwise we can't be sure
1709 #if wxUSE_WXVSNPRINTF
1710             // we know that our own implementation of wxVsnprintf() returns -1
1711             // only for a format error - thus there's something wrong with
1712             // the user's format string
1713             buf[0] = '\0';
1714             return -1;
1715 #else // possibly using system version
1716             // assume it only returns error if there is not enough space, but
1717             // as we don't know how much we need, double the current size of
1718             // the buffer
1719 #ifndef __WXWINCE__
1720             if( (errno == EILSEQ) || (errno == EINVAL) )
1721             // If errno was set to one of the two well-known hard errors
1722             // then fail immediately to avoid an infinite loop.
1723                 return -1;
1724             else
1725 #endif // __WXWINCE__
1726             // still not enough, as we don't know how much we need, double the
1727             // current size of the buffer
1728                 size *= 2;
1729 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1730         }
1731         else if ( len >= size )
1732         {
1733 #if wxUSE_WXVSNPRINTF
1734             // we know that our own implementation of wxVsnprintf() returns
1735             // size+1 when there's not enough space but that's not the size
1736             // of the required buffer!
1737             size *= 2;      // so we just double the current size of the buffer
1738 #else
1739             // some vsnprintf() implementations NUL-terminate the buffer and
1740             // some don't in len == size case, to be safe always add 1
1741             // FIXME: I don't quite understand this comment.  The vsnprintf
1742             // function is specifically defined to return the number of
1743             // characters printed not including the null terminator.
1744             // So OF COURSE you need to add 1 to get the right buffer size.
1745             // The following line is definitely correct, no question.
1746             size = len + 1;
1747 #endif
1748         }
1749         else // ok, there was enough space
1750         {
1751             break;
1752         }
1753     }
1754
1755     // we could have overshot
1756     str.Shrink();
1757
1758     return str.length();
1759 }
1760
1761 int wxString::PrintfV(const wxString& format, va_list argptr)
1762 {
1763 #if wxUSE_UNICODE_UTF8
1764     #if wxUSE_STL_BASED_WXSTRING
1765         typedef wxStringTypeBuffer<char> Utf8Buffer;
1766     #else
1767         typedef wxStringInternalBuffer Utf8Buffer;
1768     #endif
1769 #endif
1770
1771 #if wxUSE_UTF8_LOCALE_ONLY
1772     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1773 #else
1774     #if wxUSE_UNICODE_UTF8
1775     if ( wxLocaleIsUtf8 )
1776         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1777     else
1778         // wxChar* version
1779         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1780     #else
1781         return DoStringPrintfV(*this, format, argptr);
1782     #endif // UTF8/WCHAR
1783 #endif
1784 }
1785
1786 // ----------------------------------------------------------------------------
1787 // misc other operations
1788 // ----------------------------------------------------------------------------
1789
1790 // returns true if the string matches the pattern which may contain '*' and
1791 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1792 // of them)
1793 bool wxString::Matches(const wxString& mask) const
1794 {
1795     // I disable this code as it doesn't seem to be faster (in fact, it seems
1796     // to be much slower) than the old, hand-written code below and using it
1797     // here requires always linking with libregex even if the user code doesn't
1798     // use it
1799 #if 0 // wxUSE_REGEX
1800     // first translate the shell-like mask into a regex
1801     wxString pattern;
1802     pattern.reserve(wxStrlen(pszMask));
1803
1804     pattern += _T('^');
1805     while ( *pszMask )
1806     {
1807         switch ( *pszMask )
1808         {
1809             case _T('?'):
1810                 pattern += _T('.');
1811                 break;
1812
1813             case _T('*'):
1814                 pattern += _T(".*");
1815                 break;
1816
1817             case _T('^'):
1818             case _T('.'):
1819             case _T('$'):
1820             case _T('('):
1821             case _T(')'):
1822             case _T('|'):
1823             case _T('+'):
1824             case _T('\\'):
1825                 // these characters are special in a RE, quote them
1826                 // (however note that we don't quote '[' and ']' to allow
1827                 // using them for Unix shell like matching)
1828                 pattern += _T('\\');
1829                 // fall through
1830
1831             default:
1832                 pattern += *pszMask;
1833         }
1834
1835         pszMask++;
1836     }
1837     pattern += _T('$');
1838
1839     // and now use it
1840     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1841 #else // !wxUSE_REGEX
1842   // TODO: this is, of course, awfully inefficient...
1843
1844   // FIXME-UTF8: implement using iterators, remove #if
1845 #if wxUSE_UNICODE_UTF8
1846   wxWCharBuffer maskBuf = mask.wc_str();
1847   wxWCharBuffer txtBuf = wc_str();
1848   const wxChar *pszMask = maskBuf.data();
1849   const wxChar *pszTxt = txtBuf.data();
1850 #else
1851   const wxChar *pszMask = mask.wx_str();
1852   // the char currently being checked
1853   const wxChar *pszTxt = wx_str();
1854 #endif
1855
1856   // the last location where '*' matched
1857   const wxChar *pszLastStarInText = NULL;
1858   const wxChar *pszLastStarInMask = NULL;
1859
1860 match:
1861   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1862     switch ( *pszMask ) {
1863       case wxT('?'):
1864         if ( *pszTxt == wxT('\0') )
1865           return false;
1866
1867         // pszTxt and pszMask will be incremented in the loop statement
1868
1869         break;
1870
1871       case wxT('*'):
1872         {
1873           // remember where we started to be able to backtrack later
1874           pszLastStarInText = pszTxt;
1875           pszLastStarInMask = pszMask;
1876
1877           // ignore special chars immediately following this one
1878           // (should this be an error?)
1879           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1880             pszMask++;
1881
1882           // if there is nothing more, match
1883           if ( *pszMask == wxT('\0') )
1884             return true;
1885
1886           // are there any other metacharacters in the mask?
1887           size_t uiLenMask;
1888           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1889
1890           if ( pEndMask != NULL ) {
1891             // we have to match the string between two metachars
1892             uiLenMask = pEndMask - pszMask;
1893           }
1894           else {
1895             // we have to match the remainder of the string
1896             uiLenMask = wxStrlen(pszMask);
1897           }
1898
1899           wxString strToMatch(pszMask, uiLenMask);
1900           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1901           if ( pMatch == NULL )
1902             return false;
1903
1904           // -1 to compensate "++" in the loop
1905           pszTxt = pMatch + uiLenMask - 1;
1906           pszMask += uiLenMask - 1;
1907         }
1908         break;
1909
1910       default:
1911         if ( *pszMask != *pszTxt )
1912           return false;
1913         break;
1914     }
1915   }
1916
1917   // match only if nothing left
1918   if ( *pszTxt == wxT('\0') )
1919     return true;
1920
1921   // if we failed to match, backtrack if we can
1922   if ( pszLastStarInText ) {
1923     pszTxt = pszLastStarInText + 1;
1924     pszMask = pszLastStarInMask;
1925
1926     pszLastStarInText = NULL;
1927
1928     // don't bother resetting pszLastStarInMask, it's unnecessary
1929
1930     goto match;
1931   }
1932
1933   return false;
1934 #endif // wxUSE_REGEX/!wxUSE_REGEX
1935 }
1936
1937 // Count the number of chars
1938 int wxString::Freq(wxUniChar ch) const
1939 {
1940     int count = 0;
1941     for ( const_iterator i = begin(); i != end(); ++i )
1942     {
1943         if ( *i == ch )
1944             count ++;
1945     }
1946     return count;
1947 }
1948
1949 // convert to upper case, return the copy of the string
1950 wxString wxString::Upper() const
1951 { wxString s(*this); return s.MakeUpper(); }
1952
1953 // convert to lower case, return the copy of the string
1954 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
1955
1956 // ----------------------------------------------------------------------------
1957 // wxUTF8StringBuffer
1958 // ----------------------------------------------------------------------------
1959
1960 #if wxUSE_UNICODE_WCHAR
1961 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1962 {
1963     wxMBConvStrictUTF8 conv;
1964     size_t wlen = conv.ToWChar(NULL, 0, m_buf);
1965     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1966
1967     wxStringInternalBuffer wbuf(m_str, wlen);
1968     conv.ToWChar(wbuf, wlen, m_buf);
1969 }
1970
1971 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
1972 {
1973     wxCHECK_RET(m_lenSet, "length not set");
1974
1975     wxMBConvStrictUTF8 conv;
1976     size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
1977     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1978
1979     wxStringInternalBufferLength wbuf(m_str, wlen);
1980     conv.ToWChar(wbuf, wlen, m_buf, m_len);
1981     wbuf.SetLength(wlen);
1982 }
1983 #endif // wxUSE_UNICODE_WCHAR