src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   8 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
   9 // Licence:     wxWindows licence
  10 /////////////////////////////////////////////////////////////////////////////
  11
  12 // ===========================================================================
  13 // headers, declarations, constants
  14 // ===========================================================================
  15
  16 // For compilers that support precompilation, includes "wx.h".
  17 #include "wx/wxprec.h"
  18
  19 #ifdef __BORLANDC__
  20     #pragma hdrstop
  21 #endif
  22
  23 #ifndef WX_PRECOMP
  24     #include "wx/string.h"
  25     #include "wx/wxcrtvararg.h"
  26     #include "wx/intl.h"
  27     #include "wx/log.h"
  28 #endif
  29
  30 #include <ctype.h>
  31
  32 #ifndef __WXWINCE__
  33     #include <errno.h>
  34 #endif
  35
  36 #include <string.h>
  37 #include <stdlib.h>
  38
  39 #include "wx/hashmap.h"
  40 #include "wx/vector.h"
  41 #include "wx/xlocale.h"
  42
  43 #ifdef __WINDOWS__
  44     #include "wx/msw/wrapwin.h"
  45 #endif // __WINDOWS__
  46
  47 #if wxUSE_STD_IOSTREAM
  48     #include <sstream>
  49 #endif
  50
  51 // string handling functions used by wxString:
  52 #if wxUSE_UNICODE_UTF8
  53     #define wxStringMemcpy   memcpy
  54     #define wxStringMemcmp   memcmp
  55     #define wxStringMemchr   memchr
  56     #define wxStringStrlen   strlen
  57 #else
  58     #define wxStringMemcpy   wxTmemcpy
  59     #define wxStringMemcmp   wxTmemcmp
  60     #define wxStringMemchr   wxTmemchr
  61     #define wxStringStrlen   wxStrlen
  62 #endif
  63
  64 // define a function declared in wx/buffer.h here as we don't have buffer.cpp
  65 // and don't want to add it just because of this simple function
  66 namespace wxPrivate
  67 {
  68
  69 // wxXXXBuffer classes can be (implicitly) used during global statics
  70 // initialization so wrap the status UntypedBufferData variable in a function
  71 // to make it safe to access it even before all global statics are initialized
  72 UntypedBufferData *GetUntypedNullData()
  73 {
  74     static UntypedBufferData s_untypedNullData(NULL, 0);
  75
  76     return &s_untypedNullData;
  77 }
  78
  79 } // namespace wxPrivate
  80
  81 // ---------------------------------------------------------------------------
  82 // static class variables definition
  83 // ---------------------------------------------------------------------------
  84
  85 //According to STL _must_ be a -1 size_t
  86 const size_t wxString::npos = (size_t) -1;
  87
  88 #if wxUSE_STRING_POS_CACHE
  89
  90 #ifdef wxHAS_COMPILER_TLS
  91
  92 wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
  93
  94 #else // !wxHAS_COMPILER_TLS
  95
  96 struct wxStrCacheInitializer
  97 {
  98     wxStrCacheInitializer()
  99     {
 100         // calling this function triggers s_cache initialization in it, and
 101         // from now on it becomes safe to call from multiple threads
 102         wxString::GetCache();
 103     }
 104 };
 105
 106 /*
 107 wxString::Cache& wxString::GetCache()
 108 {
 109     static wxTLS_TYPE(Cache) s_cache;
 110
 111     return wxTLS_VALUE(s_cache);
 112 }
 113 */
 114
 115 static wxStrCacheInitializer gs_stringCacheInit;
 116
 117 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
 118
 119 // gdb seems to be unable to display thread-local variables correctly, at least
 120 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
 121 #if wxDEBUG_LEVEL >= 2
 122
 123 struct wxStrCacheDumper
 124 {
 125     static void ShowAll()
 126     {
 127         puts("*** wxString cache dump:");
 128         for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
 129         {
 130             const wxString::Cache::Element&
 131                 c = wxString::GetCacheBegin()[n];
 132
 133             printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
 134                    n,
 135                    n == wxString::LastUsedCacheElement() ? " [*]" : "",
 136                    c.str,
 137                    (unsigned long)c.pos,
 138                    (unsigned long)c.impl,
 139                    (long)c.len);
 140         }
 141     }
 142 };
 143
 144 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
 145
 146 #endif // wxDEBUG_LEVEL >= 2
 147
 148 #ifdef wxPROFILE_STRING_CACHE
 149
 150 wxString::CacheStats wxString::ms_cacheStats;
 151
 152 struct wxStrCacheStatsDumper
 153 {
 154     ~wxStrCacheStatsDumper()
 155     {
 156         const wxString::CacheStats& stats = wxString::ms_cacheStats;
 157
 158         if ( stats.postot )
 159         {
 160             puts("*** wxString cache statistics:");
 161             printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
 162                    stats.postot);
 163             printf("\tHits %u (of which %u not used) or %.2f%%\n",
 164                    stats.poshits,
 165                    stats.mishits,
 166                    100.*float(stats.poshits - stats.mishits)/stats.postot);
 167             printf("\tAverage position requested: %.2f\n",
 168                    float(stats.sumpos) / stats.postot);
 169             printf("\tAverage offset after cached hint: %.2f\n",
 170                    float(stats.sumofs) / stats.postot);
 171         }
 172
 173         if ( stats.lentot )
 174         {
 175             printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
 176                    stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
 177         }
 178     }
 179 };
 180
 181 static wxStrCacheStatsDumper s_showCacheStats;
 182
 183 #endif // wxPROFILE_STRING_CACHE
 184
 185 #endif // wxUSE_STRING_POS_CACHE
 186
 187 // ----------------------------------------------------------------------------
 188 // global functions
 189 // ----------------------------------------------------------------------------
 190
 191 #if wxUSE_STD_IOSTREAM
 192
 193 #include <iostream>
 194
 195 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
 196 {
 197 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
 198     const wxScopedCharBuffer buf(str.AsCharBuf());
 199     if ( !buf )
 200         os.clear(wxSTD ios_base::failbit);
 201     else
 202         os << buf.data();
 203
 204     return os;
 205 #else
 206     return os << str.AsInternal();
 207 #endif
 208 }
 209
 210 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
 211 {
 212     return os << str.c_str();
 213 }
 214
 215 wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str)
 216 {
 217     return os << str.data();
 218 }
 219
 220 #ifndef __BORLANDC__
 221 wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str)
 222 {
 223     return os << str.data();
 224 }
 225 #endif
 226
 227 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 228
 229 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
 230 {
 231     return wos << str.wc_str();
 232 }
 233
 234 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
 235 {
 236     return wos << str.AsWChar();
 237 }
 238
 239 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str)
 240 {
 241     return wos << str.data();
 242 }
 243
 244 #endif  // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 245
 246 #endif // wxUSE_STD_IOSTREAM
 247
 248 // ===========================================================================
 249 // wxString class core
 250 // ===========================================================================
 251
 252 #if wxUSE_UNICODE_UTF8
 253
 254 void wxString::PosLenToImpl(size_t pos, size_t len,
 255                             size_t *implPos, size_t *implLen) const
 256 {
 257     if ( pos == npos )
 258     {
 259         *implPos = npos;
 260     }
 261     else // have valid start position
 262     {
 263         const const_iterator b = GetIterForNthChar(pos);
 264         *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
 265         if ( len == npos )
 266         {
 267             *implLen = npos;
 268         }
 269         else // have valid length too
 270         {
 271             // we need to handle the case of length specifying a substring
 272             // going beyond the end of the string, just as std::string does
 273             const const_iterator e(end());
 274             const_iterator i(b);
 275             while ( len && i <= e )
 276             {
 277                 ++i;
 278                 --len;
 279             }
 280
 281             *implLen = i.impl() - b.impl();
 282         }
 283     }
 284 }
 285
 286 #endif // wxUSE_UNICODE_UTF8
 287
 288 // ----------------------------------------------------------------------------
 289 // wxCStrData converted strings caching
 290 // ----------------------------------------------------------------------------
 291
 292 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 293 //             string objects; re-enable after fixing this bug and benchmarking
 294 //             performance to see if using a hash is a good idea at all
 295 #if 0
 296
 297 // For backward compatibility reasons, it must be possible to assign the value
 298 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 299 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 300 // because the memory would be freed immediately, but it has to be valid as long
 301 // as the string is not modified, so that code like this still works:
 302 //
 303 // const wxChar *s = str.c_str();
 304 // while ( s ) { ... }
 305
 306 // FIXME-UTF8: not thread safe!
 307 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 308 //             destroyed, but we should do it when the string is modified, to
 309 //             keep memory usage down
 310 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 311 //             invalidated the cache on every change, we could keep the previous
 312 //             conversion
 313 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 314 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 315
 316 template<typename T>
 317 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 318 {
 319     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 320     if ( i != hash.end() )
 321     {
 322         free(i->second);
 323         hash.erase(i);
 324     }
 325 }
 326
 327 #if wxUSE_UNICODE
 328 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 329 //     so we have to use wxString* here and const-cast when used
 330 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 331                     wxStringCharConversionCache);
 332 static wxStringCharConversionCache gs_stringsCharCache;
 333
 334 const char* wxCStrData::AsChar() const
 335 {
 336     // remove previously cache value, if any (see FIXMEs above):
 337     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 338
 339     // convert the string and keep it:
 340     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 341         m_str->mb_str().release();
 342
 343     return s + m_offset;
 344 }
 345 #endif // wxUSE_UNICODE
 346
 347 #if !wxUSE_UNICODE_WCHAR
 348 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 349                     wxStringWCharConversionCache);
 350 static wxStringWCharConversionCache gs_stringsWCharCache;
 351
 352 const wchar_t* wxCStrData::AsWChar() const
 353 {
 354     // remove previously cache value, if any (see FIXMEs above):
 355     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 356
 357     // convert the string and keep it:
 358     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 359         m_str->wc_str().release();
 360
 361     return s + m_offset;
 362 }
 363 #endif // !wxUSE_UNICODE_WCHAR
 364
 365 wxString::~wxString()
 366 {
 367 #if wxUSE_UNICODE
 368     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 369     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 370 #endif
 371 #if !wxUSE_UNICODE_WCHAR
 372     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 373 #endif
 374 }
 375 #endif
 376
 377 // ===========================================================================
 378 // wxString class core
 379 // ===========================================================================
 380
 381 // ---------------------------------------------------------------------------
 382 // construction and conversion
 383 // ---------------------------------------------------------------------------
 384
 385 #if wxUSE_UNICODE_WCHAR
 386 /* static */
 387 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 388                                                const wxMBConv& conv)
 389 {
 390     // anything to do?
 391     if ( !psz || nLength == 0 )
 392         return SubstrBufFromMB(wxWCharBuffer(L""), 0);
 393
 394     if ( nLength == npos )
 395         nLength = wxNO_LEN;
 396
 397     size_t wcLen;
 398     wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 399     if ( !wcLen )
 400         return SubstrBufFromMB(wxWCharBuffer(L""), 0);
 401     else
 402         return SubstrBufFromMB(wcBuf, wcLen);
 403 }
 404 #endif // wxUSE_UNICODE_WCHAR
 405
 406 #if wxUSE_UNICODE_UTF8
 407 /* static */
 408 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 409                                                const wxMBConv& conv)
 410 {
 411     // anything to do?
 412     if ( !psz || nLength == 0 )
 413         return SubstrBufFromMB(wxCharBuffer(""), 0);
 414
 415     // if psz is already in UTF-8, we don't have to do the roundtrip to
 416     // wchar_t* and back:
 417     if ( conv.IsUTF8() )
 418     {
 419         // we need to validate the input because UTF8 iterators assume valid
 420         // UTF-8 sequence and psz may be invalid:
 421         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 422         {
 423             // we must pass the real string length to SubstrBufFromMB ctor
 424             if ( nLength == npos )
 425                 nLength = psz ? strlen(psz) : 0;
 426             return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz, nLength),
 427                                    nLength);
 428         }
 429         // else: do the roundtrip through wchar_t*
 430     }
 431
 432     if ( nLength == npos )
 433         nLength = wxNO_LEN;
 434
 435     // first convert to wide string:
 436     size_t wcLen;
 437     wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 438     if ( !wcLen )
 439         return SubstrBufFromMB(wxCharBuffer(""), 0);
 440
 441     // and then to UTF-8:
 442     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
 443     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 444     wxASSERT_MSG( buf.data, wxT("conversion to UTF-8 failed") );
 445
 446     return buf;
 447 }
 448 #endif // wxUSE_UNICODE_UTF8
 449
 450 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 451 /* static */
 452 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 453                                                const wxMBConv& conv)
 454 {
 455     // anything to do?
 456     if ( !pwz || nLength == 0 )
 457         return SubstrBufFromWC(wxCharBuffer(""), 0);
 458
 459     if ( nLength == npos )
 460         nLength = wxNO_LEN;
 461
 462     size_t mbLen;
 463     wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 464     if ( !mbLen )
 465         return SubstrBufFromWC(wxCharBuffer(""), 0);
 466     else
 467         return SubstrBufFromWC(mbBuf, mbLen);
 468 }
 469 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 470
 471 // This std::string::c_str()-like method returns a wide char pointer to string
 472 // contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return
 473 // a pointer to the internal representation. Otherwise a conversion is required
 474 // and it returns a temporary buffer.
 475 //
 476 // However for compatibility with c_str() and to avoid breaking existing code
 477 // doing
 478 //
 479 //      for ( const wchar_t *p = s.wc_str(); *p; p++ )
 480 //          ... use *p...
 481 //
 482 // we actually need to ensure that the returned buffer is _not_ temporary and
 483 // so we use wxString::m_convertedToWChar to store the returned data
 484 #if !wxUSE_UNICODE_WCHAR
 485
 486 const wchar_t *wxString::AsWChar(const wxMBConv& conv) const
 487 {
 488     const char * const strMB = m_impl.c_str();
 489     const size_t lenMB = m_impl.length();
 490
 491     // find out the size of the buffer needed
 492     const size_t lenWC = conv.ToWChar(NULL, 0, strMB, lenMB);
 493     if ( lenWC == wxCONV_FAILED )
 494         return NULL;
 495
 496     // keep the same buffer if the string size didn't change: this is not only
 497     // an optimization but also ensure that code which modifies string
 498     // character by character (without changing its length) can continue to use
 499     // the pointer returned by a previous wc_str() call even after changing the
 500     // string
 501
 502     // TODO-UTF8: we could check for ">" instead of "!=" here as this would
 503     //            allow to save on buffer reallocations but at the cost of
 504     //            consuming (even) more memory, we should benchmark this to
 505     //            determine if it's worth doing
 506     if ( !m_convertedToWChar.m_str || lenWC != m_convertedToWChar.m_len )
 507     {
 508         if ( !const_cast<wxString *>(this)->m_convertedToWChar.Extend(lenWC) )
 509             return NULL;
 510     }
 511
 512     // finally do convert
 513     m_convertedToWChar.m_str[lenWC] = L'\0';
 514     if ( conv.ToWChar(m_convertedToWChar.m_str, lenWC,
 515                       strMB, lenMB) == wxCONV_FAILED )
 516         return NULL;
 517
 518     return m_convertedToWChar.m_str;
 519 }
 520
 521 #endif // !wxUSE_UNICODE_WCHAR
 522
 523
 524 // Same thing for mb_str() which returns a normal char pointer to string
 525 // contents: this always requires converting it to the specified encoding in
 526 // non-ANSI build except if we need to convert to UTF-8 and this is what we
 527 // already use internally.
 528 #if wxUSE_UNICODE
 529
 530 const char *wxString::AsChar(const wxMBConv& conv) const
 531 {
 532 #if wxUSE_UNICODE_UTF8
 533     if ( conv.IsUTF8() )
 534         return m_impl.c_str();
 535
 536     const wchar_t * const strWC = AsWChar(wxMBConvStrictUTF8());
 537     const size_t lenWC = m_convertedToWChar.m_len;
 538 #else // wxUSE_UNICODE_WCHAR
 539     const wchar_t * const strWC = m_impl.c_str();
 540     const size_t lenWC = m_impl.length();
 541 #endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR
 542
 543     const size_t lenMB = conv.FromWChar(NULL, 0, strWC, lenWC);
 544     if ( lenMB == wxCONV_FAILED )
 545         return NULL;
 546
 547     if ( !m_convertedToChar.m_str || lenMB != m_convertedToChar.m_len )
 548     {
 549         if ( !const_cast<wxString *>(this)->m_convertedToChar.Extend(lenMB) )
 550             return NULL;
 551     }
 552
 553     m_convertedToChar.m_str[lenMB] = '\0';
 554     if ( conv.FromWChar(m_convertedToChar.m_str, lenMB,
 555                         strWC, lenWC) == wxCONV_FAILED )
 556         return NULL;
 557
 558     return m_convertedToChar.m_str;
 559 }
 560
 561 #endif // wxUSE_UNICODE
 562
 563 // shrink to minimal size (releasing extra memory)
 564 bool wxString::Shrink()
 565 {
 566   wxString tmp(begin(), end());
 567   swap(tmp);
 568   return tmp.length() == length();
 569 }
 570
 571 // deprecated compatibility code:
 572 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 573 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 574 {
 575     return DoGetWriteBuf(nLen);
 576 }
 577
 578 void wxString::UngetWriteBuf()
 579 {
 580     DoUngetWriteBuf();
 581 }
 582
 583 void wxString::UngetWriteBuf(size_t nLen)
 584 {
 585     DoUngetWriteBuf(nLen);
 586 }
 587 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 588
 589
 590 // ---------------------------------------------------------------------------
 591 // data access
 592 // ---------------------------------------------------------------------------
 593
 594 // all functions are inline in string.h
 595
 596 // ---------------------------------------------------------------------------
 597 // concatenation operators
 598 // ---------------------------------------------------------------------------
 599
 600 /*
 601  * concatenation functions come in 5 flavours:
 602  *  string + string
 603  *  char   + string      and      string + char
 604  *  C str  + string      and      string + C str
 605  */
 606
 607 wxString operator+(const wxString& str1, const wxString& str2)
 608 {
 609 #if !wxUSE_STL_BASED_WXSTRING
 610     wxASSERT( str1.IsValid() );
 611     wxASSERT( str2.IsValid() );
 612 #endif
 613
 614     wxString s = str1;
 615     s += str2;
 616
 617     return s;
 618 }
 619
 620 wxString operator+(const wxString& str, wxUniChar ch)
 621 {
 622 #if !wxUSE_STL_BASED_WXSTRING
 623     wxASSERT( str.IsValid() );
 624 #endif
 625
 626     wxString s = str;
 627     s += ch;
 628
 629     return s;
 630 }
 631
 632 wxString operator+(wxUniChar ch, const wxString& str)
 633 {
 634 #if !wxUSE_STL_BASED_WXSTRING
 635     wxASSERT( str.IsValid() );
 636 #endif
 637
 638     wxString s = ch;
 639     s += str;
 640
 641     return s;
 642 }
 643
 644 wxString operator+(const wxString& str, const char *psz)
 645 {
 646 #if !wxUSE_STL_BASED_WXSTRING
 647     wxASSERT( str.IsValid() );
 648 #endif
 649
 650     wxString s;
 651     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 652         wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
 653     }
 654     s += str;
 655     s += psz;
 656
 657     return s;
 658 }
 659
 660 wxString operator+(const wxString& str, const wchar_t *pwz)
 661 {
 662 #if !wxUSE_STL_BASED_WXSTRING
 663     wxASSERT( str.IsValid() );
 664 #endif
 665
 666     wxString s;
 667     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 668         wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
 669     }
 670     s += str;
 671     s += pwz;
 672
 673     return s;
 674 }
 675
 676 wxString operator+(const char *psz, const wxString& str)
 677 {
 678 #if !wxUSE_STL_BASED_WXSTRING
 679     wxASSERT( str.IsValid() );
 680 #endif
 681
 682     wxString s;
 683     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 684         wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
 685     }
 686     s = psz;
 687     s += str;
 688
 689     return s;
 690 }
 691
 692 wxString operator+(const wchar_t *pwz, const wxString& str)
 693 {
 694 #if !wxUSE_STL_BASED_WXSTRING
 695     wxASSERT( str.IsValid() );
 696 #endif
 697
 698     wxString s;
 699     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 700         wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
 701     }
 702     s = pwz;
 703     s += str;
 704
 705     return s;
 706 }
 707
 708 // ---------------------------------------------------------------------------
 709 // string comparison
 710 // ---------------------------------------------------------------------------
 711
 712 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 713 {
 714     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 715                                : wxToupper(GetChar(0u)) == wxToupper(c));
 716 }
 717
 718 #ifdef HAVE_STD_STRING_COMPARE
 719
 720 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 721 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 722 //     sort strings in characters code point order by sorting the byte sequence
 723 //     in byte values order (i.e. what strcmp() and memcmp() do).
 724
 725 int wxString::compare(const wxString& str) const
 726 {
 727     return m_impl.compare(str.m_impl);
 728 }
 729
 730 int wxString::compare(size_t nStart, size_t nLen,
 731                       const wxString& str) const
 732 {
 733     size_t pos, len;
 734     PosLenToImpl(nStart, nLen, &pos, &len);
 735     return m_impl.compare(pos, len, str.m_impl);
 736 }
 737
 738 int wxString::compare(size_t nStart, size_t nLen,
 739                       const wxString& str,
 740                       size_t nStart2, size_t nLen2) const
 741 {
 742     size_t pos, len;
 743     PosLenToImpl(nStart, nLen, &pos, &len);
 744
 745     size_t pos2, len2;
 746     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 747
 748     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 749 }
 750
 751 int wxString::compare(const char* sz) const
 752 {
 753     return m_impl.compare(ImplStr(sz));
 754 }
 755
 756 int wxString::compare(const wchar_t* sz) const
 757 {
 758     return m_impl.compare(ImplStr(sz));
 759 }
 760
 761 int wxString::compare(size_t nStart, size_t nLen,
 762                       const char* sz, size_t nCount) const
 763 {
 764     size_t pos, len;
 765     PosLenToImpl(nStart, nLen, &pos, &len);
 766
 767     SubstrBufFromMB str(ImplStr(sz, nCount));
 768
 769     return m_impl.compare(pos, len, str.data, str.len);
 770 }
 771
 772 int wxString::compare(size_t nStart, size_t nLen,
 773                       const wchar_t* sz, size_t nCount) const
 774 {
 775     size_t pos, len;
 776     PosLenToImpl(nStart, nLen, &pos, &len);
 777
 778     SubstrBufFromWC str(ImplStr(sz, nCount));
 779
 780     return m_impl.compare(pos, len, str.data, str.len);
 781 }
 782
 783 #else // !HAVE_STD_STRING_COMPARE
 784
 785 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 786                           const wxStringCharType* s2, size_t l2)
 787 {
 788     if( l1 == l2 )
 789         return wxStringMemcmp(s1, s2, l1);
 790     else if( l1 < l2 )
 791     {
 792         int ret = wxStringMemcmp(s1, s2, l1);
 793         return ret == 0 ? -1 : ret;
 794     }
 795     else
 796     {
 797         int ret = wxStringMemcmp(s1, s2, l2);
 798         return ret == 0 ? +1 : ret;
 799     }
 800 }
 801
 802 int wxString::compare(const wxString& str) const
 803 {
 804     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 805                      str.m_impl.data(), str.m_impl.length());
 806 }
 807
 808 int wxString::compare(size_t nStart, size_t nLen,
 809                       const wxString& str) const
 810 {
 811     wxASSERT(nStart <= length());
 812     size_type strLen = length() - nStart;
 813     nLen = strLen < nLen ? strLen : nLen;
 814
 815     size_t pos, len;
 816     PosLenToImpl(nStart, nLen, &pos, &len);
 817
 818     return ::wxDoCmp(m_impl.data() + pos,  len,
 819                      str.m_impl.data(), str.m_impl.length());
 820 }
 821
 822 int wxString::compare(size_t nStart, size_t nLen,
 823                       const wxString& str,
 824                       size_t nStart2, size_t nLen2) const
 825 {
 826     wxASSERT(nStart <= length());
 827     wxASSERT(nStart2 <= str.length());
 828     size_type strLen  =     length() - nStart,
 829               strLen2 = str.length() - nStart2;
 830     nLen  = strLen  < nLen  ? strLen  : nLen;
 831     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 832
 833     size_t pos, len;
 834     PosLenToImpl(nStart, nLen, &pos, &len);
 835     size_t pos2, len2;
 836     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 837
 838     return ::wxDoCmp(m_impl.data() + pos, len,
 839                      str.m_impl.data() + pos2, len2);
 840 }
 841
 842 int wxString::compare(const char* sz) const
 843 {
 844     SubstrBufFromMB str(ImplStr(sz, npos));
 845     if ( str.len == npos )
 846         str.len = wxStringStrlen(str.data);
 847     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 848 }
 849
 850 int wxString::compare(const wchar_t* sz) const
 851 {
 852     SubstrBufFromWC str(ImplStr(sz, npos));
 853     if ( str.len == npos )
 854         str.len = wxStringStrlen(str.data);
 855     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 856 }
 857
 858 int wxString::compare(size_t nStart, size_t nLen,
 859                       const char* sz, size_t nCount) const
 860 {
 861     wxASSERT(nStart <= length());
 862     size_type strLen = length() - nStart;
 863     nLen = strLen < nLen ? strLen : nLen;
 864
 865     size_t pos, len;
 866     PosLenToImpl(nStart, nLen, &pos, &len);
 867
 868     SubstrBufFromMB str(ImplStr(sz, nCount));
 869     if ( str.len == npos )
 870         str.len = wxStringStrlen(str.data);
 871
 872     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 873 }
 874
 875 int wxString::compare(size_t nStart, size_t nLen,
 876                       const wchar_t* sz, size_t nCount) const
 877 {
 878     wxASSERT(nStart <= length());
 879     size_type strLen = length() - nStart;
 880     nLen = strLen < nLen ? strLen : nLen;
 881
 882     size_t pos, len;
 883     PosLenToImpl(nStart, nLen, &pos, &len);
 884
 885     SubstrBufFromWC str(ImplStr(sz, nCount));
 886     if ( str.len == npos )
 887         str.len = wxStringStrlen(str.data);
 888
 889     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 890 }
 891
 892 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 893
 894
 895 // ---------------------------------------------------------------------------
 896 // find_{first,last}_[not]_of functions
 897 // ---------------------------------------------------------------------------
 898
 899 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 900
 901 // NB: All these functions are implemented  with the argument being wxChar*,
 902 //     i.e. widechar string in any Unicode build, even though native string
 903 //     representation is char* in the UTF-8 build. This is because we couldn't
 904 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 905
 906 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 907 {
 908     return find_first_of(sz, nStart, wxStrlen(sz));
 909 }
 910
 911 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 912 {
 913     return find_first_not_of(sz, nStart, wxStrlen(sz));
 914 }
 915
 916 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 917 {
 918     wxASSERT_MSG( nStart <= length(),  wxT("invalid index") );
 919
 920     size_t idx = nStart;
 921     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 922     {
 923         if ( wxTmemchr(sz, *i, n) )
 924             return idx;
 925     }
 926
 927     return npos;
 928 }
 929
 930 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 931 {
 932     wxASSERT_MSG( nStart <= length(),  wxT("invalid index") );
 933
 934     size_t idx = nStart;
 935     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 936     {
 937         if ( !wxTmemchr(sz, *i, n) )
 938             return idx;
 939     }
 940
 941     return npos;
 942 }
 943
 944
 945 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 946 {
 947     return find_last_of(sz, nStart, wxStrlen(sz));
 948 }
 949
 950 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 951 {
 952     return find_last_not_of(sz, nStart, wxStrlen(sz));
 953 }
 954
 955 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 956 {
 957     size_t len = length();
 958
 959     if ( nStart == npos )
 960     {
 961         nStart = len - 1;
 962     }
 963     else
 964     {
 965         wxASSERT_MSG( nStart <= len, wxT("invalid index") );
 966     }
 967
 968     size_t idx = nStart;
 969     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 970           i != rend(); --idx, ++i )
 971     {
 972         if ( wxTmemchr(sz, *i, n) )
 973             return idx;
 974     }
 975
 976     return npos;
 977 }
 978
 979 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 980 {
 981     size_t len = length();
 982
 983     if ( nStart == npos )
 984     {
 985         nStart = len - 1;
 986     }
 987     else
 988     {
 989         wxASSERT_MSG( nStart <= len, wxT("invalid index") );
 990     }
 991
 992     size_t idx = nStart;
 993     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 994           i != rend(); --idx, ++i )
 995     {
 996         if ( !wxTmemchr(sz, *i, n) )
 997             return idx;
 998     }
 999
1000     return npos;
1001 }
1002
1003 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
1004 {
1005     wxASSERT_MSG( nStart <= length(),  wxT("invalid index") );
1006
1007     size_t idx = nStart;
1008     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1009     {
1010         if ( *i != ch )
1011             return idx;
1012     }
1013
1014     return npos;
1015 }
1016
1017 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1018 {
1019     size_t len = length();
1020
1021     if ( nStart == npos )
1022     {
1023         nStart = len - 1;
1024     }
1025     else
1026     {
1027         wxASSERT_MSG( nStart <= len, wxT("invalid index") );
1028     }
1029
1030     size_t idx = nStart;
1031     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1032           i != rend(); --idx, ++i )
1033     {
1034         if ( *i != ch )
1035             return idx;
1036     }
1037
1038     return npos;
1039 }
1040
1041 // the functions above were implemented for wchar_t* arguments in Unicode
1042 // build and char* in ANSI build; below are implementations for the other
1043 // version:
1044 #if wxUSE_UNICODE
1045     #define wxOtherCharType char
1046     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
1047 #else
1048     #define wxOtherCharType wchar_t
1049     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
1050 #endif
1051
1052 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1053     { return find_first_of(STRCONV(sz), nStart); }
1054
1055 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1056                                size_t n) const
1057     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1058 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1059     { return find_last_of(STRCONV(sz), nStart); }
1060 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1061                               size_t n) const
1062     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1063 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1064     { return find_first_not_of(STRCONV(sz), nStart); }
1065 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1066                                    size_t n) const
1067     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1068 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1069     { return find_last_not_of(STRCONV(sz), nStart); }
1070 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1071                                   size_t n) const
1072     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1073
1074 #undef wxOtherCharType
1075 #undef STRCONV
1076
1077 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1078
1079 // ===========================================================================
1080 // other common string functions
1081 // ===========================================================================
1082
1083 int wxString::CmpNoCase(const wxString& s) const
1084 {
1085 #if !wxUSE_UNICODE_UTF8
1086     // We compare NUL-delimited chunks of the strings inside the loop. We will
1087     // do as many iterations as there are embedded NULs in the string, i.e.
1088     // usually we will run it just once.
1089
1090     typedef const wxStringImpl::value_type *pchar_type;
1091     const pchar_type thisBegin = m_impl.c_str();
1092     const pchar_type thatBegin = s.m_impl.c_str();
1093
1094     const pchar_type thisEnd = thisBegin + m_impl.length();
1095     const pchar_type thatEnd = thatBegin + s.m_impl.length();
1096
1097     pchar_type thisCur = thisBegin;
1098     pchar_type thatCur = thatBegin;
1099
1100     int rc;
1101     for ( ;; )
1102     {
1103         // Compare until the next NUL, if the strings differ this is the final
1104         // result.
1105         rc = wxStricmp(thisCur, thatCur);
1106         if ( rc )
1107             break;
1108
1109         const size_t lenChunk = wxStrlen(thisCur);
1110         thisCur += lenChunk;
1111         thatCur += lenChunk;
1112
1113         // Skip all the NULs as wxStricmp() doesn't handle them.
1114         for ( ; !*thisCur; thisCur++, thatCur++ )
1115         {
1116             // Check if we exhausted either of the strings.
1117             if ( thisCur == thisEnd )
1118             {
1119                 // This one is exhausted, is the other one too?
1120                 return thatCur == thatEnd ? 0 : -1;
1121             }
1122
1123             if ( thatCur == thatEnd )
1124             {
1125                 // Because of the test above we know that this one is not
1126                 // exhausted yet so it's greater than the other one that is.
1127                 return 1;
1128             }
1129
1130             if ( *thatCur )
1131             {
1132                 // Anything non-NUL is greater than NUL.
1133                 return -1;
1134             }
1135         }
1136     }
1137
1138     return rc;
1139 #else // wxUSE_UNICODE_UTF8
1140     // CRT functions can't be used for case-insensitive comparison of UTF-8
1141     // strings so do it in the naive, simple and inefficient way.
1142
1143     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1144     const_iterator i1 = begin();
1145     const_iterator end1 = end();
1146     const_iterator i2 = s.begin();
1147     const_iterator end2 = s.end();
1148
1149     for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1150     {
1151         wxUniChar lower1 = (wxChar)wxTolower(*i1);
1152         wxUniChar lower2 = (wxChar)wxTolower(*i2);
1153         if ( lower1 != lower2 )
1154             return lower1 < lower2 ? -1 : 1;
1155     }
1156
1157     size_t len1 = length();
1158     size_t len2 = s.length();
1159
1160     if ( len1 < len2 )
1161         return -1;
1162     else if ( len1 > len2 )
1163         return 1;
1164     return 0;
1165 #endif // !wxUSE_UNICODE_UTF8/wxUSE_UNICODE_UTF8
1166 }
1167
1168
1169 #if wxUSE_UNICODE
1170
1171 wxString wxString::FromAscii(const char *ascii, size_t len)
1172 {
1173     if (!ascii || len == 0)
1174        return wxEmptyString;
1175
1176     wxString res;
1177
1178     {
1179         wxStringInternalBuffer buf(res, len);
1180         wxStringCharType *dest = buf;
1181
1182         for ( ; len > 0; --len )
1183         {
1184             unsigned char c = (unsigned char)*ascii++;
1185             wxASSERT_MSG( c < 0x80,
1186                           wxT("Non-ASCII value passed to FromAscii().") );
1187
1188             *dest++ = (wchar_t)c;
1189         }
1190     }
1191
1192     return res;
1193 }
1194
1195 wxString wxString::FromAscii(const char *ascii)
1196 {
1197     return FromAscii(ascii, wxStrlen(ascii));
1198 }
1199
1200 wxString wxString::FromAscii(char ascii)
1201 {
1202     // What do we do with '\0' ?
1203
1204     unsigned char c = (unsigned char)ascii;
1205
1206     wxASSERT_MSG( c < 0x80, wxT("Non-ASCII value passed to FromAscii().") );
1207
1208     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1209     return wxString(wxUniChar((wchar_t)c));
1210 }
1211
1212 const wxScopedCharBuffer wxString::ToAscii() const
1213 {
1214     // this will allocate enough space for the terminating NUL too
1215     wxCharBuffer buffer(length());
1216     char *dest = buffer.data();
1217
1218     for ( const_iterator i = begin(); i != end(); ++i )
1219     {
1220         wxUniChar c(*i);
1221         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1222         *dest++ = c.IsAscii() ? (char)c : '_';
1223
1224         // the output string can't have embedded NULs anyhow, so we can safely
1225         // stop at first of them even if we do have any
1226         if ( !c )
1227             break;
1228     }
1229
1230     return buffer;
1231 }
1232
1233 #endif // wxUSE_UNICODE
1234
1235 // extract string of length nCount starting at nFirst
1236 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1237 {
1238     size_t nLen = length();
1239
1240     // default value of nCount is npos and means "till the end"
1241     if ( nCount == npos )
1242     {
1243         nCount = nLen - nFirst;
1244     }
1245
1246     // out-of-bounds requests return sensible things
1247     if ( nFirst + nCount > nLen )
1248     {
1249         nCount = nLen - nFirst;
1250     }
1251
1252     if ( nFirst > nLen )
1253     {
1254         // AllocCopy() will return empty string
1255         return wxEmptyString;
1256     }
1257
1258     wxString dest(*this, nFirst, nCount);
1259     if ( dest.length() != nCount )
1260     {
1261         wxFAIL_MSG( wxT("out of memory in wxString::Mid") );
1262     }
1263
1264     return dest;
1265 }
1266
1267 // check that the string starts with prefix and return the rest of the string
1268 // in the provided pointer if it is not NULL, otherwise return false
1269 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1270 {
1271     if ( compare(0, prefix.length(), prefix) != 0 )
1272         return false;
1273
1274     if ( rest )
1275     {
1276         // put the rest of the string into provided pointer
1277         rest->assign(*this, prefix.length(), npos);
1278     }
1279
1280     return true;
1281 }
1282
1283
1284 // check that the string ends with suffix and return the rest of it in the
1285 // provided pointer if it is not NULL, otherwise return false
1286 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1287 {
1288     int start = length() - suffix.length();
1289
1290     if ( start < 0 || compare(start, npos, suffix) != 0 )
1291         return false;
1292
1293     if ( rest )
1294     {
1295         // put the rest of the string into provided pointer
1296         rest->assign(*this, 0, start);
1297     }
1298
1299     return true;
1300 }
1301
1302
1303 // extract nCount last (rightmost) characters
1304 wxString wxString::Right(size_t nCount) const
1305 {
1306   if ( nCount > length() )
1307     nCount = length();
1308
1309   wxString dest(*this, length() - nCount, nCount);
1310   if ( dest.length() != nCount ) {
1311     wxFAIL_MSG( wxT("out of memory in wxString::Right") );
1312   }
1313   return dest;
1314 }
1315
1316 // get all characters after the last occurrence of ch
1317 // (returns the whole string if ch not found)
1318 wxString wxString::AfterLast(wxUniChar ch) const
1319 {
1320   wxString str;
1321   int iPos = Find(ch, true);
1322   if ( iPos == wxNOT_FOUND )
1323     str = *this;
1324   else
1325     str.assign(*this, iPos + 1, npos);
1326
1327   return str;
1328 }
1329
1330 // extract nCount first (leftmost) characters
1331 wxString wxString::Left(size_t nCount) const
1332 {
1333   if ( nCount > length() )
1334     nCount = length();
1335
1336   wxString dest(*this, 0, nCount);
1337   if ( dest.length() != nCount ) {
1338     wxFAIL_MSG( wxT("out of memory in wxString::Left") );
1339   }
1340   return dest;
1341 }
1342
1343 // get all characters before the first occurrence of ch
1344 // (returns the whole string if ch not found)
1345 wxString wxString::BeforeFirst(wxUniChar ch, wxString *rest) const
1346 {
1347   int iPos = Find(ch);
1348   if ( iPos == wxNOT_FOUND )
1349   {
1350     iPos = length();
1351     if ( rest )
1352       rest->clear();
1353   }
1354   else
1355   {
1356     if ( rest )
1357       rest->assign(*this, iPos + 1, npos);
1358   }
1359
1360   return wxString(*this, 0, iPos);
1361 }
1362
1363 /// get all characters before the last occurrence of ch
1364 /// (returns empty string if ch not found)
1365 wxString wxString::BeforeLast(wxUniChar ch, wxString *rest) const
1366 {
1367   wxString str;
1368   int iPos = Find(ch, true);
1369   if ( iPos != wxNOT_FOUND )
1370   {
1371     if ( iPos != 0 )
1372       str.assign(*this, 0, iPos);
1373
1374     if ( rest )
1375       rest->assign(*this, iPos + 1, npos);
1376   }
1377   else
1378   {
1379     if ( rest )
1380       *rest = *this;
1381   }
1382
1383   return str;
1384 }
1385
1386 /// get all characters after the first occurrence of ch
1387 /// (returns empty string if ch not found)
1388 wxString wxString::AfterFirst(wxUniChar ch) const
1389 {
1390   wxString str;
1391   int iPos = Find(ch);
1392   if ( iPos != wxNOT_FOUND )
1393       str.assign(*this, iPos + 1, npos);
1394
1395   return str;
1396 }
1397
1398 // replace first (or all) occurrences of some substring with another one
1399 size_t wxString::Replace(const wxString& strOld,
1400                          const wxString& strNew, bool bReplaceAll)
1401 {
1402     // if we tried to replace an empty string we'd enter an infinite loop below
1403     wxCHECK_MSG( !strOld.empty(), 0,
1404                  wxT("wxString::Replace(): invalid parameter") );
1405
1406     wxSTRING_INVALIDATE_CACHE();
1407
1408     size_t uiCount = 0;   // count of replacements made
1409
1410     // optimize the special common case: replacement of one character by
1411     // another one (in UTF-8 case we can only do this for ASCII characters)
1412     //
1413     // benchmarks show that this special version is around 3 times faster
1414     // (depending on the proportion of matching characters and UTF-8/wchar_t
1415     // build)
1416     if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1417     {
1418         const wxStringCharType chOld = strOld.m_impl[0],
1419                                chNew = strNew.m_impl[0];
1420
1421         // this loop is the simplified version of the one below
1422         for ( size_t pos = 0; ; )
1423         {
1424             pos = m_impl.find(chOld, pos);
1425             if ( pos == npos )
1426                 break;
1427
1428             m_impl[pos++] = chNew;
1429
1430             uiCount++;
1431
1432             if ( !bReplaceAll )
1433                 break;
1434         }
1435     }
1436     else if ( !bReplaceAll)
1437     {
1438         size_t pos = m_impl.find(strOld.m_impl, 0);
1439         if ( pos != npos )
1440         {
1441             m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl);
1442             uiCount = 1;
1443         }
1444     }
1445     else // replace all occurrences
1446     {
1447         const size_t uiOldLen = strOld.m_impl.length();
1448         const size_t uiNewLen = strNew.m_impl.length();
1449
1450         // first scan the string to find all positions at which the replacement
1451         // should be made
1452         wxVector<size_t> replacePositions;
1453
1454         size_t pos;
1455         for ( pos = m_impl.find(strOld.m_impl, 0);
1456               pos != npos;
1457               pos = m_impl.find(strOld.m_impl, pos + uiOldLen))
1458         {
1459             replacePositions.push_back(pos);
1460             ++uiCount;
1461         }
1462
1463         if ( !uiCount )
1464             return 0;
1465
1466         // allocate enough memory for the whole new string
1467         wxString tmp;
1468         tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen));
1469
1470         // copy this string to tmp doing replacements on the fly
1471         size_t replNum = 0;
1472         for ( pos = 0; replNum < uiCount; replNum++ )
1473         {
1474             const size_t nextReplPos = replacePositions[replNum];
1475
1476             if ( pos != nextReplPos )
1477             {
1478                 tmp.m_impl.append(m_impl, pos, nextReplPos - pos);
1479             }
1480
1481             tmp.m_impl.append(strNew.m_impl);
1482             pos = nextReplPos + uiOldLen;
1483         }
1484
1485         if ( pos != m_impl.length() )
1486         {
1487             // append the rest of the string unchanged
1488             tmp.m_impl.append(m_impl, pos, m_impl.length() - pos);
1489         }
1490
1491         swap(tmp);
1492     }
1493
1494     return uiCount;
1495 }
1496
1497 bool wxString::IsAscii() const
1498 {
1499     for ( const_iterator i = begin(); i != end(); ++i )
1500     {
1501         if ( !(*i).IsAscii() )
1502             return false;
1503     }
1504
1505     return true;
1506 }
1507
1508 bool wxString::IsWord() const
1509 {
1510     for ( const_iterator i = begin(); i != end(); ++i )
1511     {
1512         if ( !wxIsalpha(*i) )
1513             return false;
1514     }
1515
1516     return true;
1517 }
1518
1519 bool wxString::IsNumber() const
1520 {
1521     if ( empty() )
1522         return true;
1523
1524     const_iterator i = begin();
1525
1526     if ( *i == wxT('-') || *i == wxT('+') )
1527         ++i;
1528
1529     for ( ; i != end(); ++i )
1530     {
1531         if ( !wxIsdigit(*i) )
1532             return false;
1533     }
1534
1535     return true;
1536 }
1537
1538 wxString wxString::Strip(stripType w) const
1539 {
1540     wxString s = *this;
1541     if ( w & leading ) s.Trim(false);
1542     if ( w & trailing ) s.Trim(true);
1543     return s;
1544 }
1545
1546 // ---------------------------------------------------------------------------
1547 // case conversion
1548 // ---------------------------------------------------------------------------
1549
1550 wxString& wxString::MakeUpper()
1551 {
1552   for ( iterator it = begin(), en = end(); it != en; ++it )
1553     *it = (wxChar)wxToupper(*it);
1554
1555   return *this;
1556 }
1557
1558 wxString& wxString::MakeLower()
1559 {
1560   for ( iterator it = begin(), en = end(); it != en; ++it )
1561     *it = (wxChar)wxTolower(*it);
1562
1563   return *this;
1564 }
1565
1566 wxString& wxString::MakeCapitalized()
1567 {
1568     const iterator en = end();
1569     iterator it = begin();
1570     if ( it != en )
1571     {
1572         *it = (wxChar)wxToupper(*it);
1573         for ( ++it; it != en; ++it )
1574             *it = (wxChar)wxTolower(*it);
1575     }
1576
1577     return *this;
1578 }
1579
1580 // ---------------------------------------------------------------------------
1581 // trimming and padding
1582 // ---------------------------------------------------------------------------
1583
1584 // some compilers (VC++ 6.0 not to name them) return true for a call to
1585 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1586 // to live with this by checking that the character is a 7 bit one - even if
1587 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1588 // space-like symbols somewhere except in the first 128 chars), it is arguably
1589 // still better than trimming away accented letters
1590 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1591
1592 // trims spaces (in the sense of isspace) from left or right side
1593 wxString& wxString::Trim(bool bFromRight)
1594 {
1595     // first check if we're going to modify the string at all
1596     if ( !empty() &&
1597          (
1598           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1599           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1600          )
1601        )
1602     {
1603         if ( bFromRight )
1604         {
1605             // find last non-space character
1606             reverse_iterator psz = rbegin();
1607             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1608                 ++psz;
1609
1610             // truncate at trailing space start
1611             erase(psz.base(), end());
1612         }
1613         else
1614         {
1615             // find first non-space character
1616             iterator psz = begin();
1617             while ( (psz != end()) && wxSafeIsspace(*psz) )
1618                 ++psz;
1619
1620             // fix up data and length
1621             erase(begin(), psz);
1622         }
1623     }
1624
1625     return *this;
1626 }
1627
1628 // adds nCount characters chPad to the string from either side
1629 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1630 {
1631     wxString s(chPad, nCount);
1632
1633     if ( bFromRight )
1634         *this += s;
1635     else
1636     {
1637         s += *this;
1638         swap(s);
1639     }
1640
1641     return *this;
1642 }
1643
1644 // truncate the string
1645 wxString& wxString::Truncate(size_t uiLen)
1646 {
1647     if ( uiLen < length() )
1648     {
1649         erase(begin() + uiLen, end());
1650     }
1651     //else: nothing to do, string is already short enough
1652
1653     return *this;
1654 }
1655
1656 // ---------------------------------------------------------------------------
1657 // finding (return wxNOT_FOUND if not found and index otherwise)
1658 // ---------------------------------------------------------------------------
1659
1660 // find a character
1661 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1662 {
1663     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1664
1665     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1666 }
1667
1668 // ----------------------------------------------------------------------------
1669 // conversion to numbers
1670 // ----------------------------------------------------------------------------
1671
1672 // The implementation of all the functions below is exactly the same so factor
1673 // it out. Note that number extraction works correctly on UTF-8 strings, so
1674 // we can use wxStringCharType and wx_str() for maximum efficiency.
1675
1676 #ifndef __WXWINCE__
1677     #define DO_IF_NOT_WINCE(x) x
1678 #else
1679     #define DO_IF_NOT_WINCE(x)
1680 #endif
1681
1682 #define WX_STRING_TO_X_TYPE_START                                           \
1683     wxCHECK_MSG( pVal, false, wxT("NULL output pointer") );                  \
1684     DO_IF_NOT_WINCE( errno = 0; )                                           \
1685     const wxStringCharType *start = wx_str();                               \
1686     wxStringCharType *end;
1687
1688 // notice that we return false without modifying the output parameter at all if
1689 // nothing could be parsed but we do modify it and return false then if we did
1690 // parse something successfully but not the entire string
1691 #define WX_STRING_TO_X_TYPE_END                                             \
1692     if ( end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )                 \
1693         return false;                                                       \
1694     *pVal = val;                                                            \
1695     return !*end;
1696
1697 bool wxString::ToLong(long *pVal, int base) const
1698 {
1699     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1700
1701     WX_STRING_TO_X_TYPE_START
1702     long val = wxStrtol(start, &end, base);
1703     WX_STRING_TO_X_TYPE_END
1704 }
1705
1706 bool wxString::ToULong(unsigned long *pVal, int base) const
1707 {
1708     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1709
1710     WX_STRING_TO_X_TYPE_START
1711     unsigned long val = wxStrtoul(start, &end, base);
1712     WX_STRING_TO_X_TYPE_END
1713 }
1714
1715 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1716 {
1717     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1718
1719     WX_STRING_TO_X_TYPE_START
1720     wxLongLong_t val = wxStrtoll(start, &end, base);
1721     WX_STRING_TO_X_TYPE_END
1722 }
1723
1724 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1725 {
1726     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1727
1728     WX_STRING_TO_X_TYPE_START
1729     wxULongLong_t val = wxStrtoull(start, &end, base);
1730     WX_STRING_TO_X_TYPE_END
1731 }
1732
1733 bool wxString::ToDouble(double *pVal) const
1734 {
1735     WX_STRING_TO_X_TYPE_START
1736     double val = wxStrtod(start, &end);
1737     WX_STRING_TO_X_TYPE_END
1738 }
1739
1740 #if wxUSE_XLOCALE
1741
1742 bool wxString::ToCLong(long *pVal, int base) const
1743 {
1744     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1745
1746     WX_STRING_TO_X_TYPE_START
1747 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1748     long val = wxStrtol_lA(start, &end, base, wxCLocale);
1749 #else
1750     long val = wxStrtol_l(start, &end, base, wxCLocale);
1751 #endif
1752     WX_STRING_TO_X_TYPE_END
1753 }
1754
1755 bool wxString::ToCULong(unsigned long *pVal, int base) const
1756 {
1757     wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1758
1759     WX_STRING_TO_X_TYPE_START
1760 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1761     unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale);
1762 #else
1763     unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale);
1764 #endif
1765     WX_STRING_TO_X_TYPE_END
1766 }
1767
1768 bool wxString::ToCDouble(double *pVal) const
1769 {
1770     WX_STRING_TO_X_TYPE_START
1771 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1772     double val = wxStrtod_lA(start, &end, wxCLocale);
1773 #else
1774     double val = wxStrtod_l(start, &end, wxCLocale);
1775 #endif
1776     WX_STRING_TO_X_TYPE_END
1777 }
1778
1779 #else // wxUSE_XLOCALE
1780
1781 // Provide implementation of these functions even when wxUSE_XLOCALE is
1782 // disabled, we still need them in wxWidgets internal code.
1783
1784 // For integers we just assume the current locale uses the same number
1785 // representation as the C one as there is nothing else we can do.
1786 bool wxString::ToCLong(long *pVal, int base) const
1787 {
1788     return ToLong(pVal, base);
1789 }
1790
1791 bool wxString::ToCULong(unsigned long *pVal, int base) const
1792 {
1793     return ToULong(pVal, base);
1794 }
1795
1796 // For floating point numbers we have to handle the problem of the decimal
1797 // point which is different in different locales.
1798 bool wxString::ToCDouble(double *pVal) const
1799 {
1800     // Create a copy of this string using the decimal point instead of whatever
1801     // separator the current locale uses.
1802 #if wxUSE_INTL
1803     wxString sep = wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT,
1804                                      wxLOCALE_CAT_NUMBER);
1805     if ( sep == "." )
1806     {
1807         // We can avoid an unnecessary string copy in this case.
1808         return ToDouble(pVal);
1809     }
1810 #else // !wxUSE_INTL
1811     // We don't know what the current separator is so it might even be a point
1812     // already, try to parse the string as a double:
1813     if ( ToDouble(pVal) )
1814     {
1815         // It must have been the point, nothing else to do.
1816         return true;
1817     }
1818
1819     // Try to guess the separator, using the most common alternative value.
1820     wxString sep(",");
1821 #endif // wxUSE_INTL/!wxUSE_INTL
1822     wxString cstr(*this);
1823     cstr.Replace(".", sep);
1824
1825     return cstr.ToDouble(pVal);
1826 }
1827
1828 #endif  // wxUSE_XLOCALE/!wxUSE_XLOCALE
1829
1830 // ----------------------------------------------------------------------------
1831 // number to string conversion
1832 // ----------------------------------------------------------------------------
1833
1834 /* static */
1835 wxString wxString::FromDouble(double val, int precision)
1836 {
1837     wxCHECK_MSG( precision >= -1, wxString(), "Invalid negative precision" );
1838
1839     wxString format;
1840     if ( precision == -1 )
1841     {
1842         format = "%g";
1843     }
1844     else // Use fixed precision.
1845     {
1846         format.Printf("%%.%df", precision);
1847     }
1848
1849     return wxString::Format(format, val);
1850 }
1851
1852 /* static */
1853 wxString wxString::FromCDouble(double val, int precision)
1854 {
1855     wxCHECK_MSG( precision >= -1, wxString(), "Invalid negative precision" );
1856
1857 #if wxUSE_STD_IOSTREAM && wxUSE_STD_STRING
1858     // We assume that we can use the ostream and not wstream for numbers.
1859     wxSTD ostringstream os;
1860     if ( precision != -1 )
1861     {
1862         os.precision(precision);
1863         os.setf(std::ios::fixed, std::ios::floatfield);
1864     }
1865
1866     os << val;
1867     return os.str();
1868 #else // !wxUSE_STD_IOSTREAM
1869     // Can't use iostream locale support, fall back to the manual method
1870     // instead.
1871     wxString s = FromDouble(val, precision);
1872 #if wxUSE_INTL
1873     wxString sep = wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT,
1874                                      wxLOCALE_CAT_NUMBER);
1875 #else // !wxUSE_INTL
1876     // As above, this is the most common alternative value. Notice that here it
1877     // doesn't matter if we guess wrongly and the current separator is already
1878     // ".": we'll just waste a call to Replace() in this case.
1879     wxString sep(",");
1880 #endif // wxUSE_INTL/!wxUSE_INTL
1881
1882     s.Replace(sep, ".");
1883     return s;
1884 #endif // wxUSE_STD_IOSTREAM/!wxUSE_STD_IOSTREAM
1885 }
1886
1887 // ---------------------------------------------------------------------------
1888 // formatted output
1889 // ---------------------------------------------------------------------------
1890
1891 #if !wxUSE_UTF8_LOCALE_ONLY
1892 /* static */
1893 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1894 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1895 #else
1896 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1897 #endif
1898 {
1899     va_list argptr;
1900     va_start(argptr, format);
1901
1902     wxString s;
1903     s.PrintfV(format, argptr);
1904
1905     va_end(argptr);
1906
1907     return s;
1908 }
1909 #endif // !wxUSE_UTF8_LOCALE_ONLY
1910
1911 #if wxUSE_UNICODE_UTF8
1912 /* static */
1913 wxString wxString::DoFormatUtf8(const char *format, ...)
1914 {
1915     va_list argptr;
1916     va_start(argptr, format);
1917
1918     wxString s;
1919     s.PrintfV(format, argptr);
1920
1921     va_end(argptr);
1922
1923     return s;
1924 }
1925 #endif // wxUSE_UNICODE_UTF8
1926
1927 /* static */
1928 wxString wxString::FormatV(const wxString& format, va_list argptr)
1929 {
1930     wxString s;
1931     s.PrintfV(format, argptr);
1932     return s;
1933 }
1934
1935 #if !wxUSE_UTF8_LOCALE_ONLY
1936 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1937 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1938 #else
1939 int wxString::DoPrintfWchar(const wxChar *format, ...)
1940 #endif
1941 {
1942     va_list argptr;
1943     va_start(argptr, format);
1944
1945 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1946     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1947     // because it's the only cast that works safely for downcasting when
1948     // multiple inheritance is used:
1949     wxString *str = static_cast<wxString*>(this);
1950 #else
1951     wxString *str = this;
1952 #endif
1953
1954     int iLen = str->PrintfV(format, argptr);
1955
1956     va_end(argptr);
1957
1958     return iLen;
1959 }
1960 #endif // !wxUSE_UTF8_LOCALE_ONLY
1961
1962 #if wxUSE_UNICODE_UTF8
1963 int wxString::DoPrintfUtf8(const char *format, ...)
1964 {
1965     va_list argptr;
1966     va_start(argptr, format);
1967
1968     int iLen = PrintfV(format, argptr);
1969
1970     va_end(argptr);
1971
1972     return iLen;
1973 }
1974 #endif // wxUSE_UNICODE_UTF8
1975
1976 /*
1977     Uses wxVsnprintf and places the result into the this string.
1978
1979     In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1980     it is vswprintf.  Due to a discrepancy between vsnprintf and vswprintf in
1981     the ISO C99 (and thus SUSv3) standard the return value for the case of
1982     an undersized buffer is inconsistent.  For conforming vsnprintf
1983     implementations the function must return the number of characters that
1984     would have been printed had the buffer been large enough.  For conforming
1985     vswprintf implementations the function must return a negative number
1986     and set errno.
1987
1988     What vswprintf sets errno to is undefined but Darwin seems to set it to
1989     EOVERFLOW.  The only expected errno are EILSEQ and EINVAL.  Both of
1990     those are defined in the standard and backed up by several conformance
1991     statements.  Note that ENOMEM mentioned in the manual page does not
1992     apply to swprintf, only wprintf and fwprintf.
1993
1994     Official manual page:
1995     http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1996
1997     Some conformance statements (AIX, Solaris):
1998     http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1999     http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
2000
2001     Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
2002     EILSEQ and EINVAL are specifically defined to mean the error is other than
2003     an undersized buffer and no other errno are defined we treat those two
2004     as meaning hard errors and everything else gets the old behaviour which
2005     is to keep looping and increasing buffer size until the function succeeds.
2006
2007     In practice it's impossible to determine before compilation which behaviour
2008     may be used.  The vswprintf function may have vsnprintf-like behaviour or
2009     vice-versa.  Behaviour detected on one release can theoretically change
2010     with an updated release.  Not to mention that configure testing for it
2011     would require the test to be run on the host system, not the build system
2012     which makes cross compilation difficult. Therefore, we make no assumptions
2013     about behaviour and try our best to handle every known case, including the
2014     case where wxVsnprintf returns a negative number and fails to set errno.
2015
2016     There is yet one more non-standard implementation and that is our own.
2017     Fortunately, that can be detected at compile-time.
2018
2019     On top of all that, ISO C99 explicitly defines snprintf to write a null
2020     character to the last position of the specified buffer.  That would be at
2021     at the given buffer size minus 1.  It is supposed to do this even if it
2022     turns out that the buffer is sized too small.
2023
2024     Darwin (tested on 10.5) follows the C99 behaviour exactly.
2025
2026     Glibc 2.6 almost follows the C99 behaviour except vswprintf never sets
2027     errno even when it fails.  However, it only seems to ever fail due
2028     to an undersized buffer.
2029 */
2030 #if wxUSE_UNICODE_UTF8
2031 template<typename BufferType>
2032 #else
2033 // we only need one version in non-UTF8 builds and at least two Windows
2034 // compilers have problems with this function template, so use just one
2035 // normal function here
2036 #endif
2037 static int DoStringPrintfV(wxString& str,
2038                            const wxString& format, va_list argptr)
2039 {
2040     int size = 1024;
2041
2042     for ( ;; )
2043     {
2044 #if wxUSE_UNICODE_UTF8
2045         BufferType tmp(str, size + 1);
2046         typename BufferType::CharType *buf = tmp;
2047 #else
2048         wxStringBuffer tmp(str, size + 1);
2049         wxChar *buf = tmp;
2050 #endif
2051
2052         if ( !buf )
2053         {
2054             // out of memory
2055             return -1;
2056         }
2057
2058         // wxVsnprintf() may modify the original arg pointer, so pass it
2059         // only a copy
2060         va_list argptrcopy;
2061         wxVaCopy(argptrcopy, argptr);
2062
2063 #ifndef __WXWINCE__
2064         // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
2065         errno = 0;
2066 #endif
2067         int len = wxVsnprintf(buf, size, format, argptrcopy);
2068         va_end(argptrcopy);
2069
2070         // some implementations of vsnprintf() don't NUL terminate
2071         // the string if there is not enough space for it so
2072         // always do it manually
2073         // FIXME: This really seems to be the wrong and would be an off-by-one
2074         // bug except the code above allocates an extra character.
2075         buf[size] = wxT('\0');
2076
2077         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
2078         // total number of characters which would have been written if the
2079         // buffer were large enough (newer standards such as Unix98)
2080         if ( len < 0 )
2081         {
2082             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
2083             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
2084             //     is true if *both* of them use our own implementation,
2085             //     otherwise we can't be sure
2086 #if wxUSE_WXVSNPRINTF
2087             // we know that our own implementation of wxVsnprintf() returns -1
2088             // only for a format error - thus there's something wrong with
2089             // the user's format string
2090             buf[0] = '\0';
2091             return -1;
2092 #else // possibly using system version
2093             // assume it only returns error if there is not enough space, but
2094             // as we don't know how much we need, double the current size of
2095             // the buffer
2096 #ifndef __WXWINCE__
2097             if( (errno == EILSEQ) || (errno == EINVAL) )
2098             // If errno was set to one of the two well-known hard errors
2099             // then fail immediately to avoid an infinite loop.
2100                 return -1;
2101             else
2102 #endif // __WXWINCE__
2103             // still not enough, as we don't know how much we need, double the
2104             // current size of the buffer
2105                 size *= 2;
2106 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
2107         }
2108         else if ( len >= size )
2109         {
2110 #if wxUSE_WXVSNPRINTF
2111             // we know that our own implementation of wxVsnprintf() returns
2112             // size+1 when there's not enough space but that's not the size
2113             // of the required buffer!
2114             size *= 2;      // so we just double the current size of the buffer
2115 #else
2116             // some vsnprintf() implementations NUL-terminate the buffer and
2117             // some don't in len == size case, to be safe always add 1
2118             // FIXME: I don't quite understand this comment.  The vsnprintf
2119             // function is specifically defined to return the number of
2120             // characters printed not including the null terminator.
2121             // So OF COURSE you need to add 1 to get the right buffer size.
2122             // The following line is definitely correct, no question.
2123             size = len + 1;
2124 #endif
2125         }
2126         else // ok, there was enough space
2127         {
2128             break;
2129         }
2130     }
2131
2132     // we could have overshot
2133     str.Shrink();
2134
2135     return str.length();
2136 }
2137
2138 int wxString::PrintfV(const wxString& format, va_list argptr)
2139 {
2140 #if wxUSE_UNICODE_UTF8
2141     #if wxUSE_STL_BASED_WXSTRING
2142         typedef wxStringTypeBuffer<char> Utf8Buffer;
2143     #else
2144         typedef wxStringInternalBuffer Utf8Buffer;
2145     #endif
2146 #endif
2147
2148 #if wxUSE_UTF8_LOCALE_ONLY
2149     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2150 #else
2151     #if wxUSE_UNICODE_UTF8
2152     if ( wxLocaleIsUtf8 )
2153         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2154     else
2155         // wxChar* version
2156         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2157     #else
2158         return DoStringPrintfV(*this, format, argptr);
2159     #endif // UTF8/WCHAR
2160 #endif
2161 }
2162
2163 // ----------------------------------------------------------------------------
2164 // misc other operations
2165 // ----------------------------------------------------------------------------
2166
2167 // returns true if the string matches the pattern which may contain '*' and
2168 // '?' metacharacters (as usual, '?' matches any character and '*' any number
2169 // of them)
2170 bool wxString::Matches(const wxString& mask) const
2171 {
2172     // I disable this code as it doesn't seem to be faster (in fact, it seems
2173     // to be much slower) than the old, hand-written code below and using it
2174     // here requires always linking with libregex even if the user code doesn't
2175     // use it
2176 #if 0 // wxUSE_REGEX
2177     // first translate the shell-like mask into a regex
2178     wxString pattern;
2179     pattern.reserve(wxStrlen(pszMask));
2180
2181     pattern += wxT('^');
2182     while ( *pszMask )
2183     {
2184         switch ( *pszMask )
2185         {
2186             case wxT('?'):
2187                 pattern += wxT('.');
2188                 break;
2189
2190             case wxT('*'):
2191                 pattern += wxT(".*");
2192                 break;
2193
2194             case wxT('^'):
2195             case wxT('.'):
2196             case wxT('$'):
2197             case wxT('('):
2198             case wxT(')'):
2199             case wxT('|'):
2200             case wxT('+'):
2201             case wxT('\\'):
2202                 // these characters are special in a RE, quote them
2203                 // (however note that we don't quote '[' and ']' to allow
2204                 // using them for Unix shell like matching)
2205                 pattern += wxT('\\');
2206                 // fall through
2207
2208             default:
2209                 pattern += *pszMask;
2210         }
2211
2212         pszMask++;
2213     }
2214     pattern += wxT('$');
2215
2216     // and now use it
2217     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2218 #else // !wxUSE_REGEX
2219   // TODO: this is, of course, awfully inefficient...
2220
2221   // FIXME-UTF8: implement using iterators, remove #if
2222 #if wxUSE_UNICODE_UTF8
2223   const wxScopedWCharBuffer maskBuf = mask.wc_str();
2224   const wxScopedWCharBuffer txtBuf = wc_str();
2225   const wxChar *pszMask = maskBuf.data();
2226   const wxChar *pszTxt = txtBuf.data();
2227 #else
2228   const wxChar *pszMask = mask.wx_str();
2229   // the char currently being checked
2230   const wxChar *pszTxt = wx_str();
2231 #endif
2232
2233   // the last location where '*' matched
2234   const wxChar *pszLastStarInText = NULL;
2235   const wxChar *pszLastStarInMask = NULL;
2236
2237 match:
2238   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
2239     switch ( *pszMask ) {
2240       case wxT('?'):
2241         if ( *pszTxt == wxT('\0') )
2242           return false;
2243
2244         // pszTxt and pszMask will be incremented in the loop statement
2245
2246         break;
2247
2248       case wxT('*'):
2249         {
2250           // remember where we started to be able to backtrack later
2251           pszLastStarInText = pszTxt;
2252           pszLastStarInMask = pszMask;
2253
2254           // ignore special chars immediately following this one
2255           // (should this be an error?)
2256           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
2257             pszMask++;
2258
2259           // if there is nothing more, match
2260           if ( *pszMask == wxT('\0') )
2261             return true;
2262
2263           // are there any other metacharacters in the mask?
2264           size_t uiLenMask;
2265           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2266
2267           if ( pEndMask != NULL ) {
2268             // we have to match the string between two metachars
2269             uiLenMask = pEndMask - pszMask;
2270           }
2271           else {
2272             // we have to match the remainder of the string
2273             uiLenMask = wxStrlen(pszMask);
2274           }
2275
2276           wxString strToMatch(pszMask, uiLenMask);
2277           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2278           if ( pMatch == NULL )
2279             return false;
2280
2281           // -1 to compensate "++" in the loop
2282           pszTxt = pMatch + uiLenMask - 1;
2283           pszMask += uiLenMask - 1;
2284         }
2285         break;
2286
2287       default:
2288         if ( *pszMask != *pszTxt )
2289           return false;
2290         break;
2291     }
2292   }
2293
2294   // match only if nothing left
2295   if ( *pszTxt == wxT('\0') )
2296     return true;
2297
2298   // if we failed to match, backtrack if we can
2299   if ( pszLastStarInText ) {
2300     pszTxt = pszLastStarInText + 1;
2301     pszMask = pszLastStarInMask;
2302
2303     pszLastStarInText = NULL;
2304
2305     // don't bother resetting pszLastStarInMask, it's unnecessary
2306
2307     goto match;
2308   }
2309
2310   return false;
2311 #endif // wxUSE_REGEX/!wxUSE_REGEX
2312 }
2313
2314 // Count the number of chars
2315 int wxString::Freq(wxUniChar ch) const
2316 {
2317     int count = 0;
2318     for ( const_iterator i = begin(); i != end(); ++i )
2319     {
2320         if ( *i == ch )
2321             count ++;
2322     }
2323     return count;
2324 }
2325