src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #include "wx/hashmap.h"
  39
  40 // string handling functions used by wxString:
  41 #if wxUSE_UNICODE_UTF8
  42     #define wxStringMemcpy   memcpy
  43     #define wxStringMemcmp   memcmp
  44     #define wxStringMemchr   memchr
  45     #define wxStringStrlen   strlen
  46 #else
  47     #define wxStringMemcpy   wxTmemcpy
  48     #define wxStringMemcmp   wxTmemcmp
  49     #define wxStringMemchr   wxTmemchr
  50     #define wxStringStrlen   wxStrlen
  51 #endif
  52
  53
  54 // ---------------------------------------------------------------------------
  55 // static class variables definition
  56 // ---------------------------------------------------------------------------
  57
  58 //According to STL _must_ be a -1 size_t
  59 const size_t wxString::npos = (size_t) -1;
  60
  61 #if wxUSE_STRING_POS_CACHE
  62
  63 #ifdef wxHAS_COMPILER_TLS
  64
  65 wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
  66
  67 #else // !wxHAS_COMPILER_TLS
  68
  69 struct wxStrCacheInitializer
  70 {
  71     wxStrCacheInitializer()
  72     {
  73         // calling this function triggers s_cache initialization in it, and
  74         // from now on it becomes safe to call from multiple threads
  75         wxString::GetCache();
  76     }
  77 };
  78
  79 /*
  80 wxString::Cache& wxString::GetCache()
  81 {
  82     static wxTLS_TYPE(Cache) s_cache;
  83
  84     return wxTLS_VALUE(s_cache);
  85 }
  86 */
  87
  88 static wxStrCacheInitializer gs_stringCacheInit;
  89
  90 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
  91
  92 // gdb seems to be unable to display thread-local variables correctly, at least
  93 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
  94 #ifdef __WXDEBUG__
  95
  96 struct wxStrCacheDumper
  97 {
  98     static void ShowAll()
  99     {
 100         puts("*** wxString cache dump:");
 101         for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
 102         {
 103             const wxString::Cache::Element&
 104                 c = wxString::GetCacheBegin()[n];
 105
 106             printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
 107                    n,
 108                    n == wxString::LastUsedCacheElement() ? " [*]" : "",
 109                    c.str,
 110                    (unsigned long)c.pos,
 111                    (unsigned long)c.impl,
 112                    (long)c.len);
 113         }
 114     }
 115 };
 116
 117 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
 118
 119 #endif // __WXDEBUG__
 120
 121 #ifdef wxPROFILE_STRING_CACHE
 122
 123 wxString::CacheStats wxString::ms_cacheStats;
 124
 125 struct wxStrCacheStatsDumper
 126 {
 127     ~wxStrCacheStatsDumper()
 128     {
 129         const wxString::CacheStats& stats = wxString::ms_cacheStats;
 130
 131         if ( stats.postot )
 132         {
 133             puts("*** wxString cache statistics:");
 134             printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
 135                    stats.postot);
 136             printf("\tHits %u (of which %u not used) or %.2f%%\n",
 137                    stats.poshits,
 138                    stats.mishits,
 139                    100.*float(stats.poshits - stats.mishits)/stats.postot);
 140             printf("\tAverage position requested: %.2f\n",
 141                    float(stats.sumpos) / stats.postot);
 142             printf("\tAverage offset after cached hint: %.2f\n",
 143                    float(stats.sumofs) / stats.postot);
 144         }
 145
 146         if ( stats.lentot )
 147         {
 148             printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
 149                    stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
 150         }
 151     }
 152 };
 153
 154 static wxStrCacheStatsDumper s_showCacheStats;
 155
 156 #endif // wxPROFILE_STRING_CACHE
 157
 158 #endif // wxUSE_STRING_POS_CACHE
 159
 160 // ----------------------------------------------------------------------------
 161 // global functions
 162 // ----------------------------------------------------------------------------
 163
 164 #if wxUSE_STD_IOSTREAM
 165
 166 #include <iostream>
 167
 168 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
 169 {
 170 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
 171     const wxCharBuffer buf(str.AsCharBuf());
 172     if ( !buf )
 173         os.clear(wxSTD ios_base::failbit);
 174     else
 175         os << buf.data();
 176
 177     return os;
 178 #else
 179     return os << str.AsInternal();
 180 #endif
 181 }
 182
 183 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
 184 {
 185     return os << str.c_str();
 186 }
 187
 188 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
 189 {
 190     return os << str.data();
 191 }
 192
 193 #ifndef __BORLANDC__
 194 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
 195 {
 196     return os << str.data();
 197 }
 198 #endif
 199
 200 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 201
 202 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
 203 {
 204     return wos << str.wc_str();
 205 }
 206
 207 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
 208 {
 209     return wos << str.AsWChar();
 210 }
 211
 212 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
 213 {
 214     return wos << str.data();
 215 }
 216
 217 #endif  // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 218
 219 #endif // wxUSE_STD_IOSTREAM
 220
 221 // ===========================================================================
 222 // wxString class core
 223 // ===========================================================================
 224
 225 #if wxUSE_UNICODE_UTF8
 226
 227 void wxString::PosLenToImpl(size_t pos, size_t len,
 228                             size_t *implPos, size_t *implLen) const
 229 {
 230     if ( pos == npos )
 231     {
 232         *implPos = npos;
 233     }
 234     else // have valid start position
 235     {
 236         const const_iterator b = GetIterForNthChar(pos);
 237         *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
 238         if ( len == npos )
 239         {
 240             *implLen = npos;
 241         }
 242         else // have valid length too
 243         {
 244             // we need to handle the case of length specifying a substring
 245             // going beyond the end of the string, just as std::string does
 246             const const_iterator e(end());
 247             const_iterator i(b);
 248             while ( len && i <= e )
 249             {
 250                 ++i;
 251                 --len;
 252             }
 253
 254             *implLen = i.impl() - b.impl();
 255         }
 256     }
 257 }
 258
 259 #endif // wxUSE_UNICODE_UTF8
 260
 261 // ----------------------------------------------------------------------------
 262 // wxCStrData converted strings caching
 263 // ----------------------------------------------------------------------------
 264
 265 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 266 //             string objects; re-enable after fixing this bug and benchmarking
 267 //             performance to see if using a hash is a good idea at all
 268 #if 0
 269
 270 // For backward compatibility reasons, it must be possible to assign the value
 271 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 272 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 273 // because the memory would be freed immediately, but it has to be valid as long
 274 // as the string is not modified, so that code like this still works:
 275 //
 276 // const wxChar *s = str.c_str();
 277 // while ( s ) { ... }
 278
 279 // FIXME-UTF8: not thread safe!
 280 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 281 //             destroyed, but we should do it when the string is modified, to
 282 //             keep memory usage down
 283 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 284 //             invalidated the cache on every change, we could keep the previous
 285 //             conversion
 286 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 287 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 288
 289 template<typename T>
 290 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 291 {
 292     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 293     if ( i != hash.end() )
 294     {
 295         free(i->second);
 296         hash.erase(i);
 297     }
 298 }
 299
 300 #if wxUSE_UNICODE
 301 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 302 //     so we have to use wxString* here and const-cast when used
 303 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 304                     wxStringCharConversionCache);
 305 static wxStringCharConversionCache gs_stringsCharCache;
 306
 307 const char* wxCStrData::AsChar() const
 308 {
 309     // remove previously cache value, if any (see FIXMEs above):
 310     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 311
 312     // convert the string and keep it:
 313     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 314         m_str->mb_str().release();
 315
 316     return s + m_offset;
 317 }
 318 #endif // wxUSE_UNICODE
 319
 320 #if !wxUSE_UNICODE_WCHAR
 321 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 322                     wxStringWCharConversionCache);
 323 static wxStringWCharConversionCache gs_stringsWCharCache;
 324
 325 const wchar_t* wxCStrData::AsWChar() const
 326 {
 327     // remove previously cache value, if any (see FIXMEs above):
 328     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 329
 330     // convert the string and keep it:
 331     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 332         m_str->wc_str().release();
 333
 334     return s + m_offset;
 335 }
 336 #endif // !wxUSE_UNICODE_WCHAR
 337
 338 wxString::~wxString()
 339 {
 340 #if wxUSE_UNICODE
 341     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 342     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 343 #endif
 344 #if !wxUSE_UNICODE_WCHAR
 345     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 346 #endif
 347 }
 348 #endif
 349
 350 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 351 const char* wxCStrData::AsChar() const
 352 {
 353 #if wxUSE_UNICODE_UTF8
 354     if ( wxLocaleIsUtf8 )
 355         return AsInternal();
 356 #endif
 357     // under non-UTF8 locales, we have to convert the internal UTF-8
 358     // representation using wxConvLibc and cache the result
 359
 360     wxString *str = wxConstCast(m_str, wxString);
 361
 362     // convert the string:
 363     //
 364     // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
 365     //             have it) but it's unfortunately not obvious to implement
 366     //             because we don't know how big buffer do we need for the
 367     //             given string length (in case of multibyte encodings, e.g.
 368     //             ISO-2022-JP or UTF-8 when internal representation is wchar_t)
 369     //
 370     //             One idea would be to store more than just m_convertedToChar
 371     //             in wxString: then we could record the length of the string
 372     //             which was converted the last time and try to reuse the same
 373     //             buffer if the current length is not greater than it (this
 374     //             could still fail because string could have been modified in
 375     //             place but it would work most of the time, so we'd do it and
 376     //             only allocate the new buffer if in-place conversion returned
 377     //             an error). We could also store a bit saying if the string
 378     //             was modified since the last conversion (and update it in all
 379     //             operation modifying the string, of course) to avoid unneeded
 380     //             consequential conversions. But both of these ideas require
 381     //             adding more fields to wxString and require profiling results
 382     //             to be sure that we really gain enough from them to justify
 383     //             doing it.
 384     wxCharBuffer buf(str->mb_str());
 385
 386     // if it failed, return empty string and not NULL to avoid crashes in code
 387     // written with either wxWidgets 2 wxString or std::string behaviour in
 388     // mind: neither of them ever returns NULL and so we shouldn't neither
 389     if ( !buf )
 390         return "";
 391
 392     if ( str->m_convertedToChar &&
 393          strlen(buf) == strlen(str->m_convertedToChar) )
 394     {
 395         // keep the same buffer for as long as possible, so that several calls
 396         // to c_str() in a row still work:
 397         strcpy(str->m_convertedToChar, buf);
 398     }
 399     else
 400     {
 401         str->m_convertedToChar = buf.release();
 402     }
 403
 404     // and keep it:
 405     return str->m_convertedToChar + m_offset;
 406 }
 407 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 408
 409 #if !wxUSE_UNICODE_WCHAR
 410 const wchar_t* wxCStrData::AsWChar() const
 411 {
 412     wxString *str = wxConstCast(m_str, wxString);
 413
 414     // convert the string:
 415     wxWCharBuffer buf(str->wc_str());
 416
 417     // notice that here, unlike above in AsChar(), conversion can't fail as our
 418     // internal UTF-8 is always well-formed -- or the string was corrupted and
 419     // all bets are off anyhow
 420
 421     // FIXME-UTF8: do the conversion in-place in the existing buffer
 422     if ( str->m_convertedToWChar &&
 423          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 424     {
 425         // keep the same buffer for as long as possible, so that several calls
 426         // to c_str() in a row still work:
 427         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 428     }
 429     else
 430     {
 431         str->m_convertedToWChar = buf.release();
 432     }
 433
 434     // and keep it:
 435     return str->m_convertedToWChar + m_offset;
 436 }
 437 #endif // !wxUSE_UNICODE_WCHAR
 438
 439 // ===========================================================================
 440 // wxString class core
 441 // ===========================================================================
 442
 443 // ---------------------------------------------------------------------------
 444 // construction and conversion
 445 // ---------------------------------------------------------------------------
 446
 447 #if wxUSE_UNICODE_WCHAR
 448 /* static */
 449 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 450                                                const wxMBConv& conv)
 451 {
 452     // anything to do?
 453     if ( !psz || nLength == 0 )
 454         return SubstrBufFromMB(L"", 0);
 455
 456     if ( nLength == npos )
 457         nLength = wxNO_LEN;
 458
 459     size_t wcLen;
 460     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 461     if ( !wcLen )
 462         return SubstrBufFromMB(_T(""), 0);
 463     else
 464         return SubstrBufFromMB(wcBuf, wcLen);
 465 }
 466 #endif // wxUSE_UNICODE_WCHAR
 467
 468 #if wxUSE_UNICODE_UTF8
 469 /* static */
 470 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 471                                                const wxMBConv& conv)
 472 {
 473     // anything to do?
 474     if ( !psz || nLength == 0 )
 475         return SubstrBufFromMB("", 0);
 476
 477     // if psz is already in UTF-8, we don't have to do the roundtrip to
 478     // wchar_t* and back:
 479     if ( conv.IsUTF8() )
 480     {
 481         // we need to validate the input because UTF8 iterators assume valid
 482         // UTF-8 sequence and psz may be invalid:
 483         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 484         {
 485             // we must pass the real string length to SubstrBufFromMB ctor
 486             if ( nLength == npos )
 487                 nLength = psz ? strlen(psz) : 0;
 488             return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
 489         }
 490         // else: do the roundtrip through wchar_t*
 491     }
 492
 493     if ( nLength == npos )
 494         nLength = wxNO_LEN;
 495
 496     // first convert to wide string:
 497     size_t wcLen;
 498     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 499     if ( !wcLen )
 500         return SubstrBufFromMB("", 0);
 501
 502     // and then to UTF-8:
 503     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
 504     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 505     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 506
 507     return buf;
 508 }
 509 #endif // wxUSE_UNICODE_UTF8
 510
 511 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 512 /* static */
 513 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 514                                                const wxMBConv& conv)
 515 {
 516     // anything to do?
 517     if ( !pwz || nLength == 0 )
 518         return SubstrBufFromWC("", 0);
 519
 520     if ( nLength == npos )
 521         nLength = wxNO_LEN;
 522
 523     size_t mbLen;
 524     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 525     if ( !mbLen )
 526         return SubstrBufFromWC("", 0);
 527     else
 528         return SubstrBufFromWC(mbBuf, mbLen);
 529 }
 530 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 531
 532
 533 #if wxUSE_UNICODE_WCHAR
 534
 535 //Convert wxString in Unicode mode to a multi-byte string
 536 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 537 {
 538     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 539 }
 540
 541 #elif wxUSE_UNICODE_UTF8
 542
 543 const wxWCharBuffer wxString::wc_str() const
 544 {
 545     return wxMBConvStrictUTF8().cMB2WC
 546                                 (
 547                                     m_impl.c_str(),
 548                                     m_impl.length() + 1, // size, not length
 549                                     NULL
 550                                 );
 551 }
 552
 553 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 554 {
 555     if ( conv.IsUTF8() )
 556         return wxCharBuffer::CreateNonOwned(m_impl.c_str());
 557
 558     // FIXME-UTF8: use wc_str() here once we have buffers with length
 559
 560     size_t wcLen;
 561     wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
 562                                              (
 563                                                 m_impl.c_str(),
 564                                                 m_impl.length() + 1, // size
 565                                                 &wcLen
 566                                              ));
 567     if ( !wcLen )
 568         return wxCharBuffer("");
 569
 570     return conv.cWC2MB(wcBuf, wcLen+1, NULL);
 571 }
 572
 573 #else // ANSI
 574
 575 //Converts this string to a wide character string if unicode
 576 //mode is not enabled and wxUSE_WCHAR_T is enabled
 577 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 578 {
 579     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 580 }
 581
 582 #endif // Unicode/ANSI
 583
 584 // shrink to minimal size (releasing extra memory)
 585 bool wxString::Shrink()
 586 {
 587   wxString tmp(begin(), end());
 588   swap(tmp);
 589   return tmp.length() == length();
 590 }
 591
 592 // deprecated compatibility code:
 593 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 594 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 595 {
 596     return DoGetWriteBuf(nLen);
 597 }
 598
 599 void wxString::UngetWriteBuf()
 600 {
 601     DoUngetWriteBuf();
 602 }
 603
 604 void wxString::UngetWriteBuf(size_t nLen)
 605 {
 606     DoUngetWriteBuf(nLen);
 607 }
 608 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 609
 610
 611 // ---------------------------------------------------------------------------
 612 // data access
 613 // ---------------------------------------------------------------------------
 614
 615 // all functions are inline in string.h
 616
 617 // ---------------------------------------------------------------------------
 618 // concatenation operators
 619 // ---------------------------------------------------------------------------
 620
 621 /*
 622  * concatenation functions come in 5 flavours:
 623  *  string + string
 624  *  char   + string      and      string + char
 625  *  C str  + string      and      string + C str
 626  */
 627
 628 wxString operator+(const wxString& str1, const wxString& str2)
 629 {
 630 #if !wxUSE_STL_BASED_WXSTRING
 631     wxASSERT( str1.IsValid() );
 632     wxASSERT( str2.IsValid() );
 633 #endif
 634
 635     wxString s = str1;
 636     s += str2;
 637
 638     return s;
 639 }
 640
 641 wxString operator+(const wxString& str, wxUniChar ch)
 642 {
 643 #if !wxUSE_STL_BASED_WXSTRING
 644     wxASSERT( str.IsValid() );
 645 #endif
 646
 647     wxString s = str;
 648     s += ch;
 649
 650     return s;
 651 }
 652
 653 wxString operator+(wxUniChar ch, const wxString& str)
 654 {
 655 #if !wxUSE_STL_BASED_WXSTRING
 656     wxASSERT( str.IsValid() );
 657 #endif
 658
 659     wxString s = ch;
 660     s += str;
 661
 662     return s;
 663 }
 664
 665 wxString operator+(const wxString& str, const char *psz)
 666 {
 667 #if !wxUSE_STL_BASED_WXSTRING
 668     wxASSERT( str.IsValid() );
 669 #endif
 670
 671     wxString s;
 672     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 673         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 674     }
 675     s += str;
 676     s += psz;
 677
 678     return s;
 679 }
 680
 681 wxString operator+(const wxString& str, const wchar_t *pwz)
 682 {
 683 #if !wxUSE_STL_BASED_WXSTRING
 684     wxASSERT( str.IsValid() );
 685 #endif
 686
 687     wxString s;
 688     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 689         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 690     }
 691     s += str;
 692     s += pwz;
 693
 694     return s;
 695 }
 696
 697 wxString operator+(const char *psz, const wxString& str)
 698 {
 699 #if !wxUSE_STL_BASED_WXSTRING
 700     wxASSERT( str.IsValid() );
 701 #endif
 702
 703     wxString s;
 704     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 705         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 706     }
 707     s = psz;
 708     s += str;
 709
 710     return s;
 711 }
 712
 713 wxString operator+(const wchar_t *pwz, const wxString& str)
 714 {
 715 #if !wxUSE_STL_BASED_WXSTRING
 716     wxASSERT( str.IsValid() );
 717 #endif
 718
 719     wxString s;
 720     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 721         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 722     }
 723     s = pwz;
 724     s += str;
 725
 726     return s;
 727 }
 728
 729 // ---------------------------------------------------------------------------
 730 // string comparison
 731 // ---------------------------------------------------------------------------
 732
 733 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 734 {
 735     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 736                                : wxToupper(GetChar(0u)) == wxToupper(c));
 737 }
 738
 739 #ifdef HAVE_STD_STRING_COMPARE
 740
 741 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 742 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 743 //     sort strings in characters code point order by sorting the byte sequence
 744 //     in byte values order (i.e. what strcmp() and memcmp() do).
 745
 746 int wxString::compare(const wxString& str) const
 747 {
 748     return m_impl.compare(str.m_impl);
 749 }
 750
 751 int wxString::compare(size_t nStart, size_t nLen,
 752                       const wxString& str) const
 753 {
 754     size_t pos, len;
 755     PosLenToImpl(nStart, nLen, &pos, &len);
 756     return m_impl.compare(pos, len, str.m_impl);
 757 }
 758
 759 int wxString::compare(size_t nStart, size_t nLen,
 760                       const wxString& str,
 761                       size_t nStart2, size_t nLen2) const
 762 {
 763     size_t pos, len;
 764     PosLenToImpl(nStart, nLen, &pos, &len);
 765
 766     size_t pos2, len2;
 767     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 768
 769     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 770 }
 771
 772 int wxString::compare(const char* sz) const
 773 {
 774     return m_impl.compare(ImplStr(sz));
 775 }
 776
 777 int wxString::compare(const wchar_t* sz) const
 778 {
 779     return m_impl.compare(ImplStr(sz));
 780 }
 781
 782 int wxString::compare(size_t nStart, size_t nLen,
 783                       const char* sz, size_t nCount) const
 784 {
 785     size_t pos, len;
 786     PosLenToImpl(nStart, nLen, &pos, &len);
 787
 788     SubstrBufFromMB str(ImplStr(sz, nCount));
 789
 790     return m_impl.compare(pos, len, str.data, str.len);
 791 }
 792
 793 int wxString::compare(size_t nStart, size_t nLen,
 794                       const wchar_t* sz, size_t nCount) const
 795 {
 796     size_t pos, len;
 797     PosLenToImpl(nStart, nLen, &pos, &len);
 798
 799     SubstrBufFromWC str(ImplStr(sz, nCount));
 800
 801     return m_impl.compare(pos, len, str.data, str.len);
 802 }
 803
 804 #else // !HAVE_STD_STRING_COMPARE
 805
 806 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 807                           const wxStringCharType* s2, size_t l2)
 808 {
 809     if( l1 == l2 )
 810         return wxStringMemcmp(s1, s2, l1);
 811     else if( l1 < l2 )
 812     {
 813         int ret = wxStringMemcmp(s1, s2, l1);
 814         return ret == 0 ? -1 : ret;
 815     }
 816     else
 817     {
 818         int ret = wxStringMemcmp(s1, s2, l2);
 819         return ret == 0 ? +1 : ret;
 820     }
 821 }
 822
 823 int wxString::compare(const wxString& str) const
 824 {
 825     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 826                      str.m_impl.data(), str.m_impl.length());
 827 }
 828
 829 int wxString::compare(size_t nStart, size_t nLen,
 830                       const wxString& str) const
 831 {
 832     wxASSERT(nStart <= length());
 833     size_type strLen = length() - nStart;
 834     nLen = strLen < nLen ? strLen : nLen;
 835
 836     size_t pos, len;
 837     PosLenToImpl(nStart, nLen, &pos, &len);
 838
 839     return ::wxDoCmp(m_impl.data() + pos,  len,
 840                      str.m_impl.data(), str.m_impl.length());
 841 }
 842
 843 int wxString::compare(size_t nStart, size_t nLen,
 844                       const wxString& str,
 845                       size_t nStart2, size_t nLen2) const
 846 {
 847     wxASSERT(nStart <= length());
 848     wxASSERT(nStart2 <= str.length());
 849     size_type strLen  =     length() - nStart,
 850               strLen2 = str.length() - nStart2;
 851     nLen  = strLen  < nLen  ? strLen  : nLen;
 852     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 853
 854     size_t pos, len;
 855     PosLenToImpl(nStart, nLen, &pos, &len);
 856     size_t pos2, len2;
 857     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 858
 859     return ::wxDoCmp(m_impl.data() + pos, len,
 860                      str.m_impl.data() + pos2, len2);
 861 }
 862
 863 int wxString::compare(const char* sz) const
 864 {
 865     SubstrBufFromMB str(ImplStr(sz, npos));
 866     if ( str.len == npos )
 867         str.len = wxStringStrlen(str.data);
 868     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 869 }
 870
 871 int wxString::compare(const wchar_t* sz) const
 872 {
 873     SubstrBufFromWC str(ImplStr(sz, npos));
 874     if ( str.len == npos )
 875         str.len = wxStringStrlen(str.data);
 876     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 877 }
 878
 879 int wxString::compare(size_t nStart, size_t nLen,
 880                       const char* sz, size_t nCount) const
 881 {
 882     wxASSERT(nStart <= length());
 883     size_type strLen = length() - nStart;
 884     nLen = strLen < nLen ? strLen : nLen;
 885
 886     size_t pos, len;
 887     PosLenToImpl(nStart, nLen, &pos, &len);
 888
 889     SubstrBufFromMB str(ImplStr(sz, nCount));
 890     if ( str.len == npos )
 891         str.len = wxStringStrlen(str.data);
 892
 893     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 894 }
 895
 896 int wxString::compare(size_t nStart, size_t nLen,
 897                       const wchar_t* sz, size_t nCount) const
 898 {
 899     wxASSERT(nStart <= length());
 900     size_type strLen = length() - nStart;
 901     nLen = strLen < nLen ? strLen : nLen;
 902
 903     size_t pos, len;
 904     PosLenToImpl(nStart, nLen, &pos, &len);
 905
 906     SubstrBufFromWC str(ImplStr(sz, nCount));
 907     if ( str.len == npos )
 908         str.len = wxStringStrlen(str.data);
 909
 910     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 911 }
 912
 913 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 914
 915
 916 // ---------------------------------------------------------------------------
 917 // find_{first,last}_[not]_of functions
 918 // ---------------------------------------------------------------------------
 919
 920 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 921
 922 // NB: All these functions are implemented  with the argument being wxChar*,
 923 //     i.e. widechar string in any Unicode build, even though native string
 924 //     representation is char* in the UTF-8 build. This is because we couldn't
 925 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 926
 927 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 928 {
 929     return find_first_of(sz, nStart, wxStrlen(sz));
 930 }
 931
 932 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 933 {
 934     return find_first_not_of(sz, nStart, wxStrlen(sz));
 935 }
 936
 937 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 938 {
 939     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 940
 941     size_t idx = nStart;
 942     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 943     {
 944         if ( wxTmemchr(sz, *i, n) )
 945             return idx;
 946     }
 947
 948     return npos;
 949 }
 950
 951 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 952 {
 953     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 954
 955     size_t idx = nStart;
 956     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 957     {
 958         if ( !wxTmemchr(sz, *i, n) )
 959             return idx;
 960     }
 961
 962     return npos;
 963 }
 964
 965
 966 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 967 {
 968     return find_last_of(sz, nStart, wxStrlen(sz));
 969 }
 970
 971 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 972 {
 973     return find_last_not_of(sz, nStart, wxStrlen(sz));
 974 }
 975
 976 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 977 {
 978     size_t len = length();
 979
 980     if ( nStart == npos )
 981     {
 982         nStart = len - 1;
 983     }
 984     else
 985     {
 986         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 987     }
 988
 989     size_t idx = nStart;
 990     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 991           i != rend(); --idx, ++i )
 992     {
 993         if ( wxTmemchr(sz, *i, n) )
 994             return idx;
 995     }
 996
 997     return npos;
 998 }
 999
1000 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
1001 {
1002     size_t len = length();
1003
1004     if ( nStart == npos )
1005     {
1006         nStart = len - 1;
1007     }
1008     else
1009     {
1010         wxASSERT_MSG( nStart <= len, _T("invalid index") );
1011     }
1012
1013     size_t idx = nStart;
1014     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1015           i != rend(); --idx, ++i )
1016     {
1017         if ( !wxTmemchr(sz, *i, n) )
1018             return idx;
1019     }
1020
1021     return npos;
1022 }
1023
1024 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
1025 {
1026     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
1027
1028     size_t idx = nStart;
1029     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1030     {
1031         if ( *i != ch )
1032             return idx;
1033     }
1034
1035     return npos;
1036 }
1037
1038 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1039 {
1040     size_t len = length();
1041
1042     if ( nStart == npos )
1043     {
1044         nStart = len - 1;
1045     }
1046     else
1047     {
1048         wxASSERT_MSG( nStart <= len, _T("invalid index") );
1049     }
1050
1051     size_t idx = nStart;
1052     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1053           i != rend(); --idx, ++i )
1054     {
1055         if ( *i != ch )
1056             return idx;
1057     }
1058
1059     return npos;
1060 }
1061
1062 // the functions above were implemented for wchar_t* arguments in Unicode
1063 // build and char* in ANSI build; below are implementations for the other
1064 // version:
1065 #if wxUSE_UNICODE
1066     #define wxOtherCharType char
1067     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
1068 #else
1069     #define wxOtherCharType wchar_t
1070     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
1071 #endif
1072
1073 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1074     { return find_first_of(STRCONV(sz), nStart); }
1075
1076 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1077                                size_t n) const
1078     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1079 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1080     { return find_last_of(STRCONV(sz), nStart); }
1081 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1082                               size_t n) const
1083     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1084 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1085     { return find_first_not_of(STRCONV(sz), nStart); }
1086 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1087                                    size_t n) const
1088     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1089 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1090     { return find_last_not_of(STRCONV(sz), nStart); }
1091 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1092                                   size_t n) const
1093     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1094
1095 #undef wxOtherCharType
1096 #undef STRCONV
1097
1098 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1099
1100 // ===========================================================================
1101 // other common string functions
1102 // ===========================================================================
1103
1104 int wxString::CmpNoCase(const wxString& s) const
1105 {
1106     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1107
1108     const_iterator i1 = begin();
1109     const_iterator end1 = end();
1110     const_iterator i2 = s.begin();
1111     const_iterator end2 = s.end();
1112
1113     for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1114     {
1115         wxUniChar lower1 = (wxChar)wxTolower(*i1);
1116         wxUniChar lower2 = (wxChar)wxTolower(*i2);
1117         if ( lower1 != lower2 )
1118             return lower1 < lower2 ? -1 : 1;
1119     }
1120
1121     size_t len1 = length();
1122     size_t len2 = s.length();
1123
1124     if ( len1 < len2 )
1125         return -1;
1126     else if ( len1 > len2 )
1127         return 1;
1128     return 0;
1129 }
1130
1131
1132 #if wxUSE_UNICODE
1133
1134 #ifdef __MWERKS__
1135 #ifndef __SCHAR_MAX__
1136 #define __SCHAR_MAX__ 127
1137 #endif
1138 #endif
1139
1140 wxString wxString::FromAscii(const char *ascii, size_t len)
1141 {
1142     if (!ascii || len == 0)
1143        return wxEmptyString;
1144
1145     wxString res;
1146
1147     {
1148         wxStringInternalBuffer buf(res, len);
1149         wxStringCharType *dest = buf;
1150
1151         for ( ; len > 0; --len )
1152         {
1153             unsigned char c = (unsigned char)*ascii++;
1154             wxASSERT_MSG( c < 0x80,
1155                           _T("Non-ASCII value passed to FromAscii().") );
1156
1157             *dest++ = (wchar_t)c;
1158         }
1159     }
1160
1161     return res;
1162 }
1163
1164 wxString wxString::FromAscii(const char *ascii)
1165 {
1166     return FromAscii(ascii, wxStrlen(ascii));
1167 }
1168
1169 wxString wxString::FromAscii(char ascii)
1170 {
1171     // What do we do with '\0' ?
1172
1173     unsigned char c = (unsigned char)ascii;
1174
1175     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1176
1177     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1178     return wxString(wxUniChar((wchar_t)c));
1179 }
1180
1181 const wxCharBuffer wxString::ToAscii() const
1182 {
1183     // this will allocate enough space for the terminating NUL too
1184     wxCharBuffer buffer(length());
1185     char *dest = buffer.data();
1186
1187     for ( const_iterator i = begin(); i != end(); ++i )
1188     {
1189         wxUniChar c(*i);
1190         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1191         *dest++ = c.IsAscii() ? (char)c : '_';
1192
1193         // the output string can't have embedded NULs anyhow, so we can safely
1194         // stop at first of them even if we do have any
1195         if ( !c )
1196             break;
1197     }
1198
1199     return buffer;
1200 }
1201
1202 #endif // wxUSE_UNICODE
1203
1204 // extract string of length nCount starting at nFirst
1205 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1206 {
1207     size_t nLen = length();
1208
1209     // default value of nCount is npos and means "till the end"
1210     if ( nCount == npos )
1211     {
1212         nCount = nLen - nFirst;
1213     }
1214
1215     // out-of-bounds requests return sensible things
1216     if ( nFirst + nCount > nLen )
1217     {
1218         nCount = nLen - nFirst;
1219     }
1220
1221     if ( nFirst > nLen )
1222     {
1223         // AllocCopy() will return empty string
1224         return wxEmptyString;
1225     }
1226
1227     wxString dest(*this, nFirst, nCount);
1228     if ( dest.length() != nCount )
1229     {
1230         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1231     }
1232
1233     return dest;
1234 }
1235
1236 // check that the string starts with prefix and return the rest of the string
1237 // in the provided pointer if it is not NULL, otherwise return false
1238 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1239 {
1240     if ( compare(0, prefix.length(), prefix) != 0 )
1241         return false;
1242
1243     if ( rest )
1244     {
1245         // put the rest of the string into provided pointer
1246         rest->assign(*this, prefix.length(), npos);
1247     }
1248
1249     return true;
1250 }
1251
1252
1253 // check that the string ends with suffix and return the rest of it in the
1254 // provided pointer if it is not NULL, otherwise return false
1255 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1256 {
1257     int start = length() - suffix.length();
1258
1259     if ( start < 0 || compare(start, npos, suffix) != 0 )
1260         return false;
1261
1262     if ( rest )
1263     {
1264         // put the rest of the string into provided pointer
1265         rest->assign(*this, 0, start);
1266     }
1267
1268     return true;
1269 }
1270
1271
1272 // extract nCount last (rightmost) characters
1273 wxString wxString::Right(size_t nCount) const
1274 {
1275   if ( nCount > length() )
1276     nCount = length();
1277
1278   wxString dest(*this, length() - nCount, nCount);
1279   if ( dest.length() != nCount ) {
1280     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1281   }
1282   return dest;
1283 }
1284
1285 // get all characters after the last occurrence of ch
1286 // (returns the whole string if ch not found)
1287 wxString wxString::AfterLast(wxUniChar ch) const
1288 {
1289   wxString str;
1290   int iPos = Find(ch, true);
1291   if ( iPos == wxNOT_FOUND )
1292     str = *this;
1293   else
1294     str.assign(*this, iPos + 1, npos);
1295
1296   return str;
1297 }
1298
1299 // extract nCount first (leftmost) characters
1300 wxString wxString::Left(size_t nCount) const
1301 {
1302   if ( nCount > length() )
1303     nCount = length();
1304
1305   wxString dest(*this, 0, nCount);
1306   if ( dest.length() != nCount ) {
1307     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1308   }
1309   return dest;
1310 }
1311
1312 // get all characters before the first occurrence of ch
1313 // (returns the whole string if ch not found)
1314 wxString wxString::BeforeFirst(wxUniChar ch) const
1315 {
1316   int iPos = Find(ch);
1317   if ( iPos == wxNOT_FOUND )
1318       iPos = length();
1319   return wxString(*this, 0, iPos);
1320 }
1321
1322 /// get all characters before the last occurrence of ch
1323 /// (returns empty string if ch not found)
1324 wxString wxString::BeforeLast(wxUniChar ch) const
1325 {
1326   wxString str;
1327   int iPos = Find(ch, true);
1328   if ( iPos != wxNOT_FOUND && iPos != 0 )
1329     str = wxString(c_str(), iPos);
1330
1331   return str;
1332 }
1333
1334 /// get all characters after the first occurrence of ch
1335 /// (returns empty string if ch not found)
1336 wxString wxString::AfterFirst(wxUniChar ch) const
1337 {
1338   wxString str;
1339   int iPos = Find(ch);
1340   if ( iPos != wxNOT_FOUND )
1341       str.assign(*this, iPos + 1, npos);
1342
1343   return str;
1344 }
1345
1346 // replace first (or all) occurrences of some substring with another one
1347 size_t wxString::Replace(const wxString& strOld,
1348                          const wxString& strNew, bool bReplaceAll)
1349 {
1350     // if we tried to replace an empty string we'd enter an infinite loop below
1351     wxCHECK_MSG( !strOld.empty(), 0,
1352                  _T("wxString::Replace(): invalid parameter") );
1353
1354     wxSTRING_INVALIDATE_CACHE();
1355
1356     size_t uiCount = 0;   // count of replacements made
1357
1358     // optimize the special common case: replacement of one character by
1359     // another one (in UTF-8 case we can only do this for ASCII characters)
1360     //
1361     // benchmarks show that this special version is around 3 times faster
1362     // (depending on the proportion of matching characters and UTF-8/wchar_t
1363     // build)
1364     if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1365     {
1366         const wxStringCharType chOld = strOld.m_impl[0],
1367                                chNew = strNew.m_impl[0];
1368
1369         // this loop is the simplified version of the one below
1370         for ( size_t pos = 0; ; )
1371         {
1372             pos = m_impl.find(chOld, pos);
1373             if ( pos == npos )
1374                 break;
1375
1376             m_impl[pos++] = chNew;
1377
1378             uiCount++;
1379
1380             if ( !bReplaceAll )
1381                 break;
1382         }
1383     }
1384     else // general case
1385     {
1386         const size_t uiOldLen = strOld.m_impl.length();
1387         const size_t uiNewLen = strNew.m_impl.length();
1388
1389         for ( size_t pos = 0; ; )
1390         {
1391             pos = m_impl.find(strOld.m_impl, pos);
1392             if ( pos == npos )
1393                 break;
1394
1395             // replace this occurrence of the old string with the new one
1396             m_impl.replace(pos, uiOldLen, strNew.m_impl);
1397
1398             // move up pos past the string that was replaced
1399             pos += uiNewLen;
1400
1401             // increase replace count
1402             uiCount++;
1403
1404             // stop after the first one?
1405             if ( !bReplaceAll )
1406                 break;
1407         }
1408     }
1409
1410     return uiCount;
1411 }
1412
1413 bool wxString::IsAscii() const
1414 {
1415     for ( const_iterator i = begin(); i != end(); ++i )
1416     {
1417         if ( !(*i).IsAscii() )
1418             return false;
1419     }
1420
1421     return true;
1422 }
1423
1424 bool wxString::IsWord() const
1425 {
1426     for ( const_iterator i = begin(); i != end(); ++i )
1427     {
1428         if ( !wxIsalpha(*i) )
1429             return false;
1430     }
1431
1432     return true;
1433 }
1434
1435 bool wxString::IsNumber() const
1436 {
1437     if ( empty() )
1438         return true;
1439
1440     const_iterator i = begin();
1441
1442     if ( *i == _T('-') || *i == _T('+') )
1443         ++i;
1444
1445     for ( ; i != end(); ++i )
1446     {
1447         if ( !wxIsdigit(*i) )
1448             return false;
1449     }
1450
1451     return true;
1452 }
1453
1454 wxString wxString::Strip(stripType w) const
1455 {
1456     wxString s = *this;
1457     if ( w & leading ) s.Trim(false);
1458     if ( w & trailing ) s.Trim(true);
1459     return s;
1460 }
1461
1462 // ---------------------------------------------------------------------------
1463 // case conversion
1464 // ---------------------------------------------------------------------------
1465
1466 wxString& wxString::MakeUpper()
1467 {
1468   for ( iterator it = begin(), en = end(); it != en; ++it )
1469     *it = (wxChar)wxToupper(*it);
1470
1471   return *this;
1472 }
1473
1474 wxString& wxString::MakeLower()
1475 {
1476   for ( iterator it = begin(), en = end(); it != en; ++it )
1477     *it = (wxChar)wxTolower(*it);
1478
1479   return *this;
1480 }
1481
1482 wxString& wxString::MakeCapitalized()
1483 {
1484     const iterator en = end();
1485     iterator it = begin();
1486     if ( it != en )
1487     {
1488         *it = (wxChar)wxToupper(*it);
1489         for ( ++it; it != en; ++it )
1490             *it = (wxChar)wxTolower(*it);
1491     }
1492
1493     return *this;
1494 }
1495
1496 // ---------------------------------------------------------------------------
1497 // trimming and padding
1498 // ---------------------------------------------------------------------------
1499
1500 // some compilers (VC++ 6.0 not to name them) return true for a call to
1501 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1502 // to live with this by checking that the character is a 7 bit one - even if
1503 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1504 // space-like symbols somewhere except in the first 128 chars), it is arguably
1505 // still better than trimming away accented letters
1506 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1507
1508 // trims spaces (in the sense of isspace) from left or right side
1509 wxString& wxString::Trim(bool bFromRight)
1510 {
1511     // first check if we're going to modify the string at all
1512     if ( !empty() &&
1513          (
1514           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1515           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1516          )
1517        )
1518     {
1519         if ( bFromRight )
1520         {
1521             // find last non-space character
1522             reverse_iterator psz = rbegin();
1523             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1524                 ++psz;
1525
1526             // truncate at trailing space start
1527             erase(psz.base(), end());
1528         }
1529         else
1530         {
1531             // find first non-space character
1532             iterator psz = begin();
1533             while ( (psz != end()) && wxSafeIsspace(*psz) )
1534                 ++psz;
1535
1536             // fix up data and length
1537             erase(begin(), psz);
1538         }
1539     }
1540
1541     return *this;
1542 }
1543
1544 // adds nCount characters chPad to the string from either side
1545 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1546 {
1547     wxString s(chPad, nCount);
1548
1549     if ( bFromRight )
1550         *this += s;
1551     else
1552     {
1553         s += *this;
1554         swap(s);
1555     }
1556
1557     return *this;
1558 }
1559
1560 // truncate the string
1561 wxString& wxString::Truncate(size_t uiLen)
1562 {
1563     if ( uiLen < length() )
1564     {
1565         erase(begin() + uiLen, end());
1566     }
1567     //else: nothing to do, string is already short enough
1568
1569     return *this;
1570 }
1571
1572 // ---------------------------------------------------------------------------
1573 // finding (return wxNOT_FOUND if not found and index otherwise)
1574 // ---------------------------------------------------------------------------
1575
1576 // find a character
1577 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1578 {
1579     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1580
1581     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1582 }
1583
1584 // ----------------------------------------------------------------------------
1585 // conversion to numbers
1586 // ----------------------------------------------------------------------------
1587
1588 // The implementation of all the functions below is exactly the same so factor
1589 // it out. Note that number extraction works correctly on UTF-8 strings, so
1590 // we can use wxStringCharType and wx_str() for maximum efficiency.
1591
1592 #ifndef __WXWINCE__
1593     #define DO_IF_NOT_WINCE(x) x
1594 #else
1595     #define DO_IF_NOT_WINCE(x)
1596 #endif
1597
1598 #define WX_STRING_TO_INT_TYPE(out, base, func, T)                           \
1599     wxCHECK_MSG( out, false, _T("NULL output pointer") );                   \
1600     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );  \
1601                                                                             \
1602     DO_IF_NOT_WINCE( errno = 0; )                                           \
1603                                                                             \
1604     const wxStringCharType *start = wx_str();                               \
1605     wxStringCharType *end;                                                  \
1606     T val = func(start, &end, base);                                        \
1607                                                                             \
1608     /* return true only if scan was stopped by the terminating NUL and */   \
1609     /* if the string was not empty to start with and no under/overflow */   \
1610     /* occurred: */                                                         \
1611     if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )         \
1612         return false;                                                       \
1613     *out = val;                                                             \
1614     return true
1615
1616 bool wxString::ToLong(long *pVal, int base) const
1617 {
1618     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
1619 }
1620
1621 bool wxString::ToULong(unsigned long *pVal, int base) const
1622 {
1623     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
1624 }
1625
1626 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1627 {
1628     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
1629 }
1630
1631 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1632 {
1633     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
1634 }
1635
1636 bool wxString::ToDouble(double *pVal) const
1637 {
1638     wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
1639
1640     DO_IF_NOT_WINCE( errno = 0; )
1641
1642     const wxChar *start = c_str();
1643     wxChar *end;
1644     double val = wxStrtod(start, &end);
1645
1646     // return true only if scan was stopped by the terminating NUL and if the
1647     // string was not empty to start with and no under/overflow occurred
1648     if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1649         return false;
1650
1651     *pVal = val;
1652
1653     return true;
1654 }
1655
1656 // ---------------------------------------------------------------------------
1657 // formatted output
1658 // ---------------------------------------------------------------------------
1659
1660 #if !wxUSE_UTF8_LOCALE_ONLY
1661 /* static */
1662 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1663 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1664 #else
1665 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1666 #endif
1667 {
1668     va_list argptr;
1669     va_start(argptr, format);
1670
1671     wxString s;
1672     s.PrintfV(format, argptr);
1673
1674     va_end(argptr);
1675
1676     return s;
1677 }
1678 #endif // !wxUSE_UTF8_LOCALE_ONLY
1679
1680 #if wxUSE_UNICODE_UTF8
1681 /* static */
1682 wxString wxString::DoFormatUtf8(const char *format, ...)
1683 {
1684     va_list argptr;
1685     va_start(argptr, format);
1686
1687     wxString s;
1688     s.PrintfV(format, argptr);
1689
1690     va_end(argptr);
1691
1692     return s;
1693 }
1694 #endif // wxUSE_UNICODE_UTF8
1695
1696 /* static */
1697 wxString wxString::FormatV(const wxString& format, va_list argptr)
1698 {
1699     wxString s;
1700     s.PrintfV(format, argptr);
1701     return s;
1702 }
1703
1704 #if !wxUSE_UTF8_LOCALE_ONLY
1705 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1706 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1707 #else
1708 int wxString::DoPrintfWchar(const wxChar *format, ...)
1709 #endif
1710 {
1711     va_list argptr;
1712     va_start(argptr, format);
1713
1714 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1715     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1716     // because it's the only cast that works safely for downcasting when
1717     // multiple inheritance is used:
1718     wxString *str = static_cast<wxString*>(this);
1719 #else
1720     wxString *str = this;
1721 #endif
1722
1723     int iLen = str->PrintfV(format, argptr);
1724
1725     va_end(argptr);
1726
1727     return iLen;
1728 }
1729 #endif // !wxUSE_UTF8_LOCALE_ONLY
1730
1731 #if wxUSE_UNICODE_UTF8
1732 int wxString::DoPrintfUtf8(const char *format, ...)
1733 {
1734     va_list argptr;
1735     va_start(argptr, format);
1736
1737     int iLen = PrintfV(format, argptr);
1738
1739     va_end(argptr);
1740
1741     return iLen;
1742 }
1743 #endif // wxUSE_UNICODE_UTF8
1744
1745 /*
1746     Uses wxVsnprintf and places the result into the this string.
1747
1748     In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1749     it is vswprintf.  Due to a discrepancy between vsnprintf and vswprintf in
1750     the ISO C99 (and thus SUSv3) standard the return value for the case of
1751     an undersized buffer is inconsistent.  For conforming vsnprintf
1752     implementations the function must return the number of characters that
1753     would have been printed had the buffer been large enough.  For conforming
1754     vswprintf implementations the function must return a negative number
1755     and set errno.
1756
1757     What vswprintf sets errno to is undefined but Darwin seems to set it to
1758     EOVERFLOW.  The only expected errno are EILSEQ and EINVAL.  Both of
1759     those are defined in the standard and backed up by several conformance
1760     statements.  Note that ENOMEM mentioned in the manual page does not
1761     apply to swprintf, only wprintf and fwprintf.
1762
1763     Official manual page:
1764     http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1765
1766     Some conformance statements (AIX, Solaris):
1767     http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1768     http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1769
1770     Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1771     EILSEQ and EINVAL are specifically defined to mean the error is other than
1772     an undersized buffer and no other errno are defined we treat those two
1773     as meaning hard errors and everything else gets the old behavior which
1774     is to keep looping and increasing buffer size until the function succeeds.
1775
1776     In practice it's impossible to determine before compilation which behavior
1777     may be used.  The vswprintf function may have vsnprintf-like behavior or
1778     vice-versa.  Behavior detected on one release can theoretically change
1779     with an updated release.  Not to mention that configure testing for it
1780     would require the test to be run on the host system, not the build system
1781     which makes cross compilation difficult. Therefore, we make no assumptions
1782     about behavior and try our best to handle every known case, including the
1783     case where wxVsnprintf returns a negative number and fails to set errno.
1784
1785     There is yet one more non-standard implementation and that is our own.
1786     Fortunately, that can be detected at compile-time.
1787
1788     On top of all that, ISO C99 explicitly defines snprintf to write a null
1789     character to the last position of the specified buffer.  That would be at
1790     at the given buffer size minus 1.  It is supposed to do this even if it
1791     turns out that the buffer is sized too small.
1792
1793     Darwin (tested on 10.5) follows the C99 behavior exactly.
1794
1795     Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1796     errno even when it fails.  However, it only seems to ever fail due
1797     to an undersized buffer.
1798 */
1799 #if wxUSE_UNICODE_UTF8
1800 template<typename BufferType>
1801 #else
1802 // we only need one version in non-UTF8 builds and at least two Windows
1803 // compilers have problems with this function template, so use just one
1804 // normal function here
1805 #endif
1806 static int DoStringPrintfV(wxString& str,
1807                            const wxString& format, va_list argptr)
1808 {
1809     int size = 1024;
1810
1811     for ( ;; )
1812     {
1813 #if wxUSE_UNICODE_UTF8
1814         BufferType tmp(str, size + 1);
1815         typename BufferType::CharType *buf = tmp;
1816 #else
1817         wxStringBuffer tmp(str, size + 1);
1818         wxChar *buf = tmp;
1819 #endif
1820
1821         if ( !buf )
1822         {
1823             // out of memory
1824
1825             // in UTF-8 build, leaving uninitialized junk in the buffer
1826             // could result in invalid non-empty UTF-8 string, so just
1827             // reset the string to empty on failure:
1828             buf[0] = '\0';
1829             return -1;
1830         }
1831
1832         // wxVsnprintf() may modify the original arg pointer, so pass it
1833         // only a copy
1834         va_list argptrcopy;
1835         wxVaCopy(argptrcopy, argptr);
1836
1837 #ifndef __WXWINCE__
1838         // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1839         errno = 0;
1840 #endif
1841         int len = wxVsnprintf(buf, size, format, argptrcopy);
1842         va_end(argptrcopy);
1843
1844         // some implementations of vsnprintf() don't NUL terminate
1845         // the string if there is not enough space for it so
1846         // always do it manually
1847         // FIXME: This really seems to be the wrong and would be an off-by-one
1848         // bug except the code above allocates an extra character.
1849         buf[size] = _T('\0');
1850
1851         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1852         // total number of characters which would have been written if the
1853         // buffer were large enough (newer standards such as Unix98)
1854         if ( len < 0 )
1855         {
1856             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1857             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1858             //     is true if *both* of them use our own implementation,
1859             //     otherwise we can't be sure
1860 #if wxUSE_WXVSNPRINTF
1861             // we know that our own implementation of wxVsnprintf() returns -1
1862             // only for a format error - thus there's something wrong with
1863             // the user's format string
1864             buf[0] = '\0';
1865             return -1;
1866 #else // possibly using system version
1867             // assume it only returns error if there is not enough space, but
1868             // as we don't know how much we need, double the current size of
1869             // the buffer
1870 #ifndef __WXWINCE__
1871             if( (errno == EILSEQ) || (errno == EINVAL) )
1872             // If errno was set to one of the two well-known hard errors
1873             // then fail immediately to avoid an infinite loop.
1874                 return -1;
1875             else
1876 #endif // __WXWINCE__
1877             // still not enough, as we don't know how much we need, double the
1878             // current size of the buffer
1879                 size *= 2;
1880 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1881         }
1882         else if ( len >= size )
1883         {
1884 #if wxUSE_WXVSNPRINTF
1885             // we know that our own implementation of wxVsnprintf() returns
1886             // size+1 when there's not enough space but that's not the size
1887             // of the required buffer!
1888             size *= 2;      // so we just double the current size of the buffer
1889 #else
1890             // some vsnprintf() implementations NUL-terminate the buffer and
1891             // some don't in len == size case, to be safe always add 1
1892             // FIXME: I don't quite understand this comment.  The vsnprintf
1893             // function is specifically defined to return the number of
1894             // characters printed not including the null terminator.
1895             // So OF COURSE you need to add 1 to get the right buffer size.
1896             // The following line is definitely correct, no question.
1897             size = len + 1;
1898 #endif
1899         }
1900         else // ok, there was enough space
1901         {
1902             break;
1903         }
1904     }
1905
1906     // we could have overshot
1907     str.Shrink();
1908
1909     return str.length();
1910 }
1911
1912 int wxString::PrintfV(const wxString& format, va_list argptr)
1913 {
1914 #if wxUSE_UNICODE_UTF8
1915     #if wxUSE_STL_BASED_WXSTRING
1916         typedef wxStringTypeBuffer<char> Utf8Buffer;
1917     #else
1918         typedef wxStringInternalBuffer Utf8Buffer;
1919     #endif
1920 #endif
1921
1922 #if wxUSE_UTF8_LOCALE_ONLY
1923     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1924 #else
1925     #if wxUSE_UNICODE_UTF8
1926     if ( wxLocaleIsUtf8 )
1927         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1928     else
1929         // wxChar* version
1930         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1931     #else
1932         return DoStringPrintfV(*this, format, argptr);
1933     #endif // UTF8/WCHAR
1934 #endif
1935 }
1936
1937 // ----------------------------------------------------------------------------
1938 // misc other operations
1939 // ----------------------------------------------------------------------------
1940
1941 // returns true if the string matches the pattern which may contain '*' and
1942 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1943 // of them)
1944 bool wxString::Matches(const wxString& mask) const
1945 {
1946     // I disable this code as it doesn't seem to be faster (in fact, it seems
1947     // to be much slower) than the old, hand-written code below and using it
1948     // here requires always linking with libregex even if the user code doesn't
1949     // use it
1950 #if 0 // wxUSE_REGEX
1951     // first translate the shell-like mask into a regex
1952     wxString pattern;
1953     pattern.reserve(wxStrlen(pszMask));
1954
1955     pattern += _T('^');
1956     while ( *pszMask )
1957     {
1958         switch ( *pszMask )
1959         {
1960             case _T('?'):
1961                 pattern += _T('.');
1962                 break;
1963
1964             case _T('*'):
1965                 pattern += _T(".*");
1966                 break;
1967
1968             case _T('^'):
1969             case _T('.'):
1970             case _T('$'):
1971             case _T('('):
1972             case _T(')'):
1973             case _T('|'):
1974             case _T('+'):
1975             case _T('\\'):
1976                 // these characters are special in a RE, quote them
1977                 // (however note that we don't quote '[' and ']' to allow
1978                 // using them for Unix shell like matching)
1979                 pattern += _T('\\');
1980                 // fall through
1981
1982             default:
1983                 pattern += *pszMask;
1984         }
1985
1986         pszMask++;
1987     }
1988     pattern += _T('$');
1989
1990     // and now use it
1991     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1992 #else // !wxUSE_REGEX
1993   // TODO: this is, of course, awfully inefficient...
1994
1995   // FIXME-UTF8: implement using iterators, remove #if
1996 #if wxUSE_UNICODE_UTF8
1997   wxWCharBuffer maskBuf = mask.wc_str();
1998   wxWCharBuffer txtBuf = wc_str();
1999   const wxChar *pszMask = maskBuf.data();
2000   const wxChar *pszTxt = txtBuf.data();
2001 #else
2002   const wxChar *pszMask = mask.wx_str();
2003   // the char currently being checked
2004   const wxChar *pszTxt = wx_str();
2005 #endif
2006
2007   // the last location where '*' matched
2008   const wxChar *pszLastStarInText = NULL;
2009   const wxChar *pszLastStarInMask = NULL;
2010
2011 match:
2012   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
2013     switch ( *pszMask ) {
2014       case wxT('?'):
2015         if ( *pszTxt == wxT('\0') )
2016           return false;
2017
2018         // pszTxt and pszMask will be incremented in the loop statement
2019
2020         break;
2021
2022       case wxT('*'):
2023         {
2024           // remember where we started to be able to backtrack later
2025           pszLastStarInText = pszTxt;
2026           pszLastStarInMask = pszMask;
2027
2028           // ignore special chars immediately following this one
2029           // (should this be an error?)
2030           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
2031             pszMask++;
2032
2033           // if there is nothing more, match
2034           if ( *pszMask == wxT('\0') )
2035             return true;
2036
2037           // are there any other metacharacters in the mask?
2038           size_t uiLenMask;
2039           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2040
2041           if ( pEndMask != NULL ) {
2042             // we have to match the string between two metachars
2043             uiLenMask = pEndMask - pszMask;
2044           }
2045           else {
2046             // we have to match the remainder of the string
2047             uiLenMask = wxStrlen(pszMask);
2048           }
2049
2050           wxString strToMatch(pszMask, uiLenMask);
2051           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2052           if ( pMatch == NULL )
2053             return false;
2054
2055           // -1 to compensate "++" in the loop
2056           pszTxt = pMatch + uiLenMask - 1;
2057           pszMask += uiLenMask - 1;
2058         }
2059         break;
2060
2061       default:
2062         if ( *pszMask != *pszTxt )
2063           return false;
2064         break;
2065     }
2066   }
2067
2068   // match only if nothing left
2069   if ( *pszTxt == wxT('\0') )
2070     return true;
2071
2072   // if we failed to match, backtrack if we can
2073   if ( pszLastStarInText ) {
2074     pszTxt = pszLastStarInText + 1;
2075     pszMask = pszLastStarInMask;
2076
2077     pszLastStarInText = NULL;
2078
2079     // don't bother resetting pszLastStarInMask, it's unnecessary
2080
2081     goto match;
2082   }
2083
2084   return false;
2085 #endif // wxUSE_REGEX/!wxUSE_REGEX
2086 }
2087
2088 // Count the number of chars
2089 int wxString::Freq(wxUniChar ch) const
2090 {
2091     int count = 0;
2092     for ( const_iterator i = begin(); i != end(); ++i )
2093     {
2094         if ( *i == ch )
2095             count ++;
2096     }
2097     return count;
2098 }