src/common/string.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/string.cpp
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin, Ryan Norton
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net>
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // headers, declarations, constants
  15 // ===========================================================================
  16
  17 // For compilers that support precompilation, includes "wx.h".
  18 #include "wx/wxprec.h"
  19
  20 #ifdef __BORLANDC__
  21     #pragma hdrstop
  22 #endif
  23
  24 #ifndef WX_PRECOMP
  25     #include "wx/string.h"
  26     #include "wx/wxcrtvararg.h"
  27 #endif
  28
  29 #include <ctype.h>
  30
  31 #ifndef __WXWINCE__
  32     #include <errno.h>
  33 #endif
  34
  35 #include <string.h>
  36 #include <stdlib.h>
  37
  38 #include "wx/hashmap.h"
  39
  40 // string handling functions used by wxString:
  41 #if wxUSE_UNICODE_UTF8
  42     #define wxStringMemcpy   memcpy
  43     #define wxStringMemcmp   memcmp
  44     #define wxStringMemchr   memchr
  45     #define wxStringStrlen   strlen
  46 #else
  47     #define wxStringMemcpy   wxTmemcpy
  48     #define wxStringMemcmp   wxTmemcmp
  49     #define wxStringMemchr   wxTmemchr
  50     #define wxStringStrlen   wxStrlen
  51 #endif
  52
  53
  54 // ---------------------------------------------------------------------------
  55 // static class variables definition
  56 // ---------------------------------------------------------------------------
  57
  58 //According to STL _must_ be a -1 size_t
  59 const size_t wxString::npos = (size_t) -1;
  60
  61 #if wxUSE_STRING_POS_CACHE
  62
  63 // gdb seems to be unable to display thread-local variables correctly, at least
  64 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
  65 #ifdef __WXDEBUG__
  66
  67 struct wxStrCacheDumper
  68 {
  69     static void ShowAll()
  70     {
  71         puts("*** wxString cache dump:");
  72         for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
  73         {
  74             const wxString::Cache::Element&
  75                 c = wxString::GetCacheBegin()[n];
  76
  77             printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
  78                    n,
  79                    n == wxString::LastUsedCacheElement() ? " [*]" : "",
  80                    c.str,
  81                    (unsigned long)c.pos,
  82                    (unsigned long)c.impl,
  83                    (long)c.len);
  84         }
  85     }
  86 };
  87
  88 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
  89
  90 #endif // __WXDEBUG__
  91
  92 #ifdef wxPROFILE_STRING_CACHE
  93
  94 wxString::CacheStats wxString::ms_cacheStats;
  95
  96 namespace
  97 {
  98
  99 struct ShowCacheStats
 100 {
 101     ~ShowCacheStats()
 102     {
 103         const wxString::CacheStats& stats = wxString::ms_cacheStats;
 104
 105         if ( stats.postot )
 106         {
 107             puts("*** wxString cache statistics:");
 108             printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
 109                    stats.postot);
 110             printf("\tHits %u (of which %u not used) or %.2f%%\n",
 111                    stats.poshits,
 112                    stats.mishits,
 113                    100.*float(stats.poshits - stats.mishits)/stats.postot);
 114             printf("\tAverage position requested: %.2f\n",
 115                    float(stats.sumpos) / stats.postot);
 116             printf("\tAverage offset after cached hint: %.2f\n",
 117                    float(stats.sumofs) / stats.postot);
 118         }
 119
 120         if ( stats.lentot )
 121         {
 122             printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
 123                    stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
 124         }
 125     }
 126 } s_showCacheStats;
 127
 128 } // anonymous namespace
 129
 130 #endif // wxPROFILE_STRING_CACHE
 131
 132 #endif // wxUSE_STRING_POS_CACHE
 133
 134 // ----------------------------------------------------------------------------
 135 // global functions
 136 // ----------------------------------------------------------------------------
 137
 138 #if wxUSE_STD_IOSTREAM
 139
 140 #include <iostream>
 141
 142 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
 143 {
 144 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
 145     return os << (const char *)str.AsCharBuf();
 146 #else
 147     return os << str.AsInternal();
 148 #endif
 149 }
 150
 151 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
 152 {
 153     return os << str.c_str();
 154 }
 155
 156 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
 157 {
 158     return os << str.data();
 159 }
 160
 161 #ifndef __BORLANDC__
 162 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
 163 {
 164     return os << str.data();
 165 }
 166 #endif
 167
 168 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 169
 170 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
 171 {
 172     return wos << str.wc_str();
 173 }
 174
 175 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
 176 {
 177     return wos << str.AsWChar();
 178 }
 179
 180 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
 181 {
 182     return wos << str.data();
 183 }
 184
 185 #endif  // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
 186
 187 #endif // wxUSE_STD_IOSTREAM
 188
 189 // ===========================================================================
 190 // wxString class core
 191 // ===========================================================================
 192
 193 #if wxUSE_UNICODE_UTF8
 194
 195 void wxString::PosLenToImpl(size_t pos, size_t len,
 196                             size_t *implPos, size_t *implLen) const
 197 {
 198     if ( pos == npos )
 199     {
 200         *implPos = npos;
 201     }
 202     else // have valid start position
 203     {
 204         const const_iterator b = GetIterForNthChar(pos);
 205         *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
 206         if ( len == npos )
 207         {
 208             *implLen = npos;
 209         }
 210         else // have valid length too
 211         {
 212             // we need to handle the case of length specifying a substring
 213             // going beyond the end of the string, just as std::string does
 214             const const_iterator e(end());
 215             const_iterator i(b);
 216             while ( len && i <= e )
 217             {
 218                 ++i;
 219                 --len;
 220             }
 221
 222             *implLen = i.impl() - b.impl();
 223         }
 224     }
 225 }
 226
 227 #endif // wxUSE_UNICODE_UTF8
 228
 229 // ----------------------------------------------------------------------------
 230 // wxCStrData converted strings caching
 231 // ----------------------------------------------------------------------------
 232
 233 // FIXME-UTF8: temporarily disabled because it doesn't work with global
 234 //             string objects; re-enable after fixing this bug and benchmarking
 235 //             performance to see if using a hash is a good idea at all
 236 #if 0
 237
 238 // For backward compatibility reasons, it must be possible to assign the value
 239 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
 240 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
 241 // because the memory would be freed immediately, but it has to be valid as long
 242 // as the string is not modified, so that code like this still works:
 243 //
 244 // const wxChar *s = str.c_str();
 245 // while ( s ) { ... }
 246
 247 // FIXME-UTF8: not thread safe!
 248 // FIXME-UTF8: we currently clear the cached conversion only when the string is
 249 //             destroyed, but we should do it when the string is modified, to
 250 //             keep memory usage down
 251 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
 252 //             invalidated the cache on every change, we could keep the previous
 253 //             conversion
 254 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
 255 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str()
 256
 257 template<typename T>
 258 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
 259 {
 260     typename T::iterator i = hash.find(wxConstCast(s, wxString));
 261     if ( i != hash.end() )
 262     {
 263         free(i->second);
 264         hash.erase(i);
 265     }
 266 }
 267
 268 #if wxUSE_UNICODE
 269 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
 270 //     so we have to use wxString* here and const-cast when used
 271 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
 272                     wxStringCharConversionCache);
 273 static wxStringCharConversionCache gs_stringsCharCache;
 274
 275 const char* wxCStrData::AsChar() const
 276 {
 277     // remove previously cache value, if any (see FIXMEs above):
 278     DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
 279
 280     // convert the string and keep it:
 281     const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
 282         m_str->mb_str().release();
 283
 284     return s + m_offset;
 285 }
 286 #endif // wxUSE_UNICODE
 287
 288 #if !wxUSE_UNICODE_WCHAR
 289 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
 290                     wxStringWCharConversionCache);
 291 static wxStringWCharConversionCache gs_stringsWCharCache;
 292
 293 const wchar_t* wxCStrData::AsWChar() const
 294 {
 295     // remove previously cache value, if any (see FIXMEs above):
 296     DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
 297
 298     // convert the string and keep it:
 299     const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
 300         m_str->wc_str().release();
 301
 302     return s + m_offset;
 303 }
 304 #endif // !wxUSE_UNICODE_WCHAR
 305
 306 wxString::~wxString()
 307 {
 308 #if wxUSE_UNICODE
 309     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
 310     DeleteStringFromConversionCache(gs_stringsCharCache, this);
 311 #endif
 312 #if !wxUSE_UNICODE_WCHAR
 313     DeleteStringFromConversionCache(gs_stringsWCharCache, this);
 314 #endif
 315 }
 316 #endif
 317
 318 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 319 const char* wxCStrData::AsChar() const
 320 {
 321 #if wxUSE_UNICODE_UTF8
 322     if ( wxLocaleIsUtf8 )
 323         return AsInternal();
 324 #endif
 325     // under non-UTF8 locales, we have to convert the internal UTF-8
 326     // representation using wxConvLibc and cache the result
 327
 328     wxString *str = wxConstCast(m_str, wxString);
 329
 330     // convert the string:
 331     //
 332     // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
 333     //             have it) but it's unfortunately not obvious to implement
 334     //             because we don't know how big buffer do we need for the
 335     //             given string length (in case of multibyte encodings, e.g.
 336     //             ISO-2022-JP or UTF-8 when internal representation is wchar_t)
 337     //
 338     //             One idea would be to store more than just m_convertedToChar
 339     //             in wxString: then we could record the length of the string
 340     //             which was converted the last time and try to reuse the same
 341     //             buffer if the current length is not greater than it (this
 342     //             could still fail because string could have been modified in
 343     //             place but it would work most of the time, so we'd do it and
 344     //             only allocate the new buffer if in-place conversion returned
 345     //             an error). We could also store a bit saying if the string
 346     //             was modified since the last conversion (and update it in all
 347     //             operation modifying the string, of course) to avoid unneeded
 348     //             consequential conversions. But both of these ideas require
 349     //             adding more fields to wxString and require profiling results
 350     //             to be sure that we really gain enough from them to justify
 351     //             doing it.
 352     wxCharBuffer buf(str->mb_str());
 353
 354     // if it failed, return empty string and not NULL to avoid crashes in code
 355     // written with either wxWidgets 2 wxString or std::string behaviour in
 356     // mind: neither of them ever returns NULL and so we shouldn't neither
 357     if ( !buf )
 358         return "";
 359
 360     if ( str->m_convertedToChar &&
 361          strlen(buf) == strlen(str->m_convertedToChar) )
 362     {
 363         // keep the same buffer for as long as possible, so that several calls
 364         // to c_str() in a row still work:
 365         strcpy(str->m_convertedToChar, buf);
 366     }
 367     else
 368     {
 369         str->m_convertedToChar = buf.release();
 370     }
 371
 372     // and keep it:
 373     return str->m_convertedToChar + m_offset;
 374 }
 375 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
 376
 377 #if !wxUSE_UNICODE_WCHAR
 378 const wchar_t* wxCStrData::AsWChar() const
 379 {
 380     wxString *str = wxConstCast(m_str, wxString);
 381
 382     // convert the string:
 383     wxWCharBuffer buf(str->wc_str());
 384
 385     // notice that here, unlike above in AsChar(), conversion can't fail as our
 386     // internal UTF-8 is always well-formed -- or the string was corrupted and
 387     // all bets are off anyhow
 388
 389     // FIXME-UTF8: do the conversion in-place in the existing buffer
 390     if ( str->m_convertedToWChar &&
 391          wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
 392     {
 393         // keep the same buffer for as long as possible, so that several calls
 394         // to c_str() in a row still work:
 395         memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
 396     }
 397     else
 398     {
 399         str->m_convertedToWChar = buf.release();
 400     }
 401
 402     // and keep it:
 403     return str->m_convertedToWChar + m_offset;
 404 }
 405 #endif // !wxUSE_UNICODE_WCHAR
 406
 407 // ===========================================================================
 408 // wxString class core
 409 // ===========================================================================
 410
 411 // ---------------------------------------------------------------------------
 412 // construction and conversion
 413 // ---------------------------------------------------------------------------
 414
 415 #if wxUSE_UNICODE_WCHAR
 416 /* static */
 417 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 418                                                const wxMBConv& conv)
 419 {
 420     // anything to do?
 421     if ( !psz || nLength == 0 )
 422         return SubstrBufFromMB(L"", 0);
 423
 424     if ( nLength == npos )
 425         nLength = wxNO_LEN;
 426
 427     size_t wcLen;
 428     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 429     if ( !wcLen )
 430         return SubstrBufFromMB(_T(""), 0);
 431     else
 432         return SubstrBufFromMB(wcBuf, wcLen);
 433 }
 434 #endif // wxUSE_UNICODE_WCHAR
 435
 436 #if wxUSE_UNICODE_UTF8
 437 /* static */
 438 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
 439                                                const wxMBConv& conv)
 440 {
 441     // anything to do?
 442     if ( !psz || nLength == 0 )
 443         return SubstrBufFromMB("", 0);
 444
 445     // if psz is already in UTF-8, we don't have to do the roundtrip to
 446     // wchar_t* and back:
 447     if ( conv.IsUTF8() )
 448     {
 449         // we need to validate the input because UTF8 iterators assume valid
 450         // UTF-8 sequence and psz may be invalid:
 451         if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
 452         {
 453             // we must pass the real string length to SubstrBufFromMB ctor
 454             if ( nLength == npos )
 455                 nLength = psz ? strlen(psz) : 0;
 456             return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
 457         }
 458         // else: do the roundtrip through wchar_t*
 459     }
 460
 461     if ( nLength == npos )
 462         nLength = wxNO_LEN;
 463
 464     // first convert to wide string:
 465     size_t wcLen;
 466     wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
 467     if ( !wcLen )
 468         return SubstrBufFromMB("", 0);
 469
 470     // and then to UTF-8:
 471     SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
 472     // widechar -> UTF-8 conversion isn't supposed to ever fail:
 473     wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
 474
 475     return buf;
 476 }
 477 #endif // wxUSE_UNICODE_UTF8
 478
 479 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 480 /* static */
 481 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
 482                                                const wxMBConv& conv)
 483 {
 484     // anything to do?
 485     if ( !pwz || nLength == 0 )
 486         return SubstrBufFromWC("", 0);
 487
 488     if ( nLength == npos )
 489         nLength = wxNO_LEN;
 490
 491     size_t mbLen;
 492     wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
 493     if ( !mbLen )
 494         return SubstrBufFromWC("", 0);
 495     else
 496         return SubstrBufFromWC(mbBuf, mbLen);
 497 }
 498 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
 499
 500
 501 #if wxUSE_UNICODE_WCHAR
 502
 503 //Convert wxString in Unicode mode to a multi-byte string
 504 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 505 {
 506     return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
 507 }
 508
 509 #elif wxUSE_UNICODE_UTF8
 510
 511 const wxWCharBuffer wxString::wc_str() const
 512 {
 513     return wxMBConvStrictUTF8().cMB2WC
 514                                 (
 515                                     m_impl.c_str(),
 516                                     m_impl.length() + 1, // size, not length
 517                                     NULL
 518                                 );
 519 }
 520
 521 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 522 {
 523     if ( conv.IsUTF8() )
 524         return wxCharBuffer::CreateNonOwned(m_impl.c_str());
 525
 526     // FIXME-UTF8: use wc_str() here once we have buffers with length
 527
 528     size_t wcLen;
 529     wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
 530                                              (
 531                                                 m_impl.c_str(),
 532                                                 m_impl.length() + 1, // size
 533                                                 &wcLen
 534                                              ));
 535     if ( !wcLen )
 536         return wxCharBuffer("");
 537
 538     return conv.cWC2MB(wcBuf, wcLen+1, NULL);
 539 }
 540
 541 #else // ANSI
 542
 543 //Converts this string to a wide character string if unicode
 544 //mode is not enabled and wxUSE_WCHAR_T is enabled
 545 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 546 {
 547     return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
 548 }
 549
 550 #endif // Unicode/ANSI
 551
 552 // shrink to minimal size (releasing extra memory)
 553 bool wxString::Shrink()
 554 {
 555   wxString tmp(begin(), end());
 556   swap(tmp);
 557   return tmp.length() == length();
 558 }
 559
 560 // deprecated compatibility code:
 561 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 562 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
 563 {
 564     return DoGetWriteBuf(nLen);
 565 }
 566
 567 void wxString::UngetWriteBuf()
 568 {
 569     DoUngetWriteBuf();
 570 }
 571
 572 void wxString::UngetWriteBuf(size_t nLen)
 573 {
 574     DoUngetWriteBuf(nLen);
 575 }
 576 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
 577
 578
 579 // ---------------------------------------------------------------------------
 580 // data access
 581 // ---------------------------------------------------------------------------
 582
 583 // all functions are inline in string.h
 584
 585 // ---------------------------------------------------------------------------
 586 // concatenation operators
 587 // ---------------------------------------------------------------------------
 588
 589 /*
 590  * concatenation functions come in 5 flavours:
 591  *  string + string
 592  *  char   + string      and      string + char
 593  *  C str  + string      and      string + C str
 594  */
 595
 596 wxString operator+(const wxString& str1, const wxString& str2)
 597 {
 598 #if !wxUSE_STL_BASED_WXSTRING
 599     wxASSERT( str1.IsValid() );
 600     wxASSERT( str2.IsValid() );
 601 #endif
 602
 603     wxString s = str1;
 604     s += str2;
 605
 606     return s;
 607 }
 608
 609 wxString operator+(const wxString& str, wxUniChar ch)
 610 {
 611 #if !wxUSE_STL_BASED_WXSTRING
 612     wxASSERT( str.IsValid() );
 613 #endif
 614
 615     wxString s = str;
 616     s += ch;
 617
 618     return s;
 619 }
 620
 621 wxString operator+(wxUniChar ch, const wxString& str)
 622 {
 623 #if !wxUSE_STL_BASED_WXSTRING
 624     wxASSERT( str.IsValid() );
 625 #endif
 626
 627     wxString s = ch;
 628     s += str;
 629
 630     return s;
 631 }
 632
 633 wxString operator+(const wxString& str, const char *psz)
 634 {
 635 #if !wxUSE_STL_BASED_WXSTRING
 636     wxASSERT( str.IsValid() );
 637 #endif
 638
 639     wxString s;
 640     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 641         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 642     }
 643     s += str;
 644     s += psz;
 645
 646     return s;
 647 }
 648
 649 wxString operator+(const wxString& str, const wchar_t *pwz)
 650 {
 651 #if !wxUSE_STL_BASED_WXSTRING
 652     wxASSERT( str.IsValid() );
 653 #endif
 654
 655     wxString s;
 656     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 657         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 658     }
 659     s += str;
 660     s += pwz;
 661
 662     return s;
 663 }
 664
 665 wxString operator+(const char *psz, const wxString& str)
 666 {
 667 #if !wxUSE_STL_BASED_WXSTRING
 668     wxASSERT( str.IsValid() );
 669 #endif
 670
 671     wxString s;
 672     if ( !s.Alloc(strlen(psz) + str.length()) ) {
 673         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 674     }
 675     s = psz;
 676     s += str;
 677
 678     return s;
 679 }
 680
 681 wxString operator+(const wchar_t *pwz, const wxString& str)
 682 {
 683 #if !wxUSE_STL_BASED_WXSTRING
 684     wxASSERT( str.IsValid() );
 685 #endif
 686
 687     wxString s;
 688     if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
 689         wxFAIL_MSG( _T("out of memory in wxString::operator+") );
 690     }
 691     s = pwz;
 692     s += str;
 693
 694     return s;
 695 }
 696
 697 // ---------------------------------------------------------------------------
 698 // string comparison
 699 // ---------------------------------------------------------------------------
 700
 701 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
 702 {
 703     return (length() == 1) && (compareWithCase ? GetChar(0u) == c
 704                                : wxToupper(GetChar(0u)) == wxToupper(c));
 705 }
 706
 707 #ifdef HAVE_STD_STRING_COMPARE
 708
 709 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
 710 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
 711 //     sort strings in characters code point order by sorting the byte sequence
 712 //     in byte values order (i.e. what strcmp() and memcmp() do).
 713
 714 int wxString::compare(const wxString& str) const
 715 {
 716     return m_impl.compare(str.m_impl);
 717 }
 718
 719 int wxString::compare(size_t nStart, size_t nLen,
 720                       const wxString& str) const
 721 {
 722     size_t pos, len;
 723     PosLenToImpl(nStart, nLen, &pos, &len);
 724     return m_impl.compare(pos, len, str.m_impl);
 725 }
 726
 727 int wxString::compare(size_t nStart, size_t nLen,
 728                       const wxString& str,
 729                       size_t nStart2, size_t nLen2) const
 730 {
 731     size_t pos, len;
 732     PosLenToImpl(nStart, nLen, &pos, &len);
 733
 734     size_t pos2, len2;
 735     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 736
 737     return m_impl.compare(pos, len, str.m_impl, pos2, len2);
 738 }
 739
 740 int wxString::compare(const char* sz) const
 741 {
 742     return m_impl.compare(ImplStr(sz));
 743 }
 744
 745 int wxString::compare(const wchar_t* sz) const
 746 {
 747     return m_impl.compare(ImplStr(sz));
 748 }
 749
 750 int wxString::compare(size_t nStart, size_t nLen,
 751                       const char* sz, size_t nCount) const
 752 {
 753     size_t pos, len;
 754     PosLenToImpl(nStart, nLen, &pos, &len);
 755
 756     SubstrBufFromMB str(ImplStr(sz, nCount));
 757
 758     return m_impl.compare(pos, len, str.data, str.len);
 759 }
 760
 761 int wxString::compare(size_t nStart, size_t nLen,
 762                       const wchar_t* sz, size_t nCount) const
 763 {
 764     size_t pos, len;
 765     PosLenToImpl(nStart, nLen, &pos, &len);
 766
 767     SubstrBufFromWC str(ImplStr(sz, nCount));
 768
 769     return m_impl.compare(pos, len, str.data, str.len);
 770 }
 771
 772 #else // !HAVE_STD_STRING_COMPARE
 773
 774 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
 775                           const wxStringCharType* s2, size_t l2)
 776 {
 777     if( l1 == l2 )
 778         return wxStringMemcmp(s1, s2, l1);
 779     else if( l1 < l2 )
 780     {
 781         int ret = wxStringMemcmp(s1, s2, l1);
 782         return ret == 0 ? -1 : ret;
 783     }
 784     else
 785     {
 786         int ret = wxStringMemcmp(s1, s2, l2);
 787         return ret == 0 ? +1 : ret;
 788     }
 789 }
 790
 791 int wxString::compare(const wxString& str) const
 792 {
 793     return ::wxDoCmp(m_impl.data(), m_impl.length(),
 794                      str.m_impl.data(), str.m_impl.length());
 795 }
 796
 797 int wxString::compare(size_t nStart, size_t nLen,
 798                       const wxString& str) const
 799 {
 800     wxASSERT(nStart <= length());
 801     size_type strLen = length() - nStart;
 802     nLen = strLen < nLen ? strLen : nLen;
 803
 804     size_t pos, len;
 805     PosLenToImpl(nStart, nLen, &pos, &len);
 806
 807     return ::wxDoCmp(m_impl.data() + pos,  len,
 808                      str.m_impl.data(), str.m_impl.length());
 809 }
 810
 811 int wxString::compare(size_t nStart, size_t nLen,
 812                       const wxString& str,
 813                       size_t nStart2, size_t nLen2) const
 814 {
 815     wxASSERT(nStart <= length());
 816     wxASSERT(nStart2 <= str.length());
 817     size_type strLen  =     length() - nStart,
 818               strLen2 = str.length() - nStart2;
 819     nLen  = strLen  < nLen  ? strLen  : nLen;
 820     nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
 821
 822     size_t pos, len;
 823     PosLenToImpl(nStart, nLen, &pos, &len);
 824     size_t pos2, len2;
 825     str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
 826
 827     return ::wxDoCmp(m_impl.data() + pos, len,
 828                      str.m_impl.data() + pos2, len2);
 829 }
 830
 831 int wxString::compare(const char* sz) const
 832 {
 833     SubstrBufFromMB str(ImplStr(sz, npos));
 834     if ( str.len == npos )
 835         str.len = wxStringStrlen(str.data);
 836     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 837 }
 838
 839 int wxString::compare(const wchar_t* sz) const
 840 {
 841     SubstrBufFromWC str(ImplStr(sz, npos));
 842     if ( str.len == npos )
 843         str.len = wxStringStrlen(str.data);
 844     return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
 845 }
 846
 847 int wxString::compare(size_t nStart, size_t nLen,
 848                       const char* sz, size_t nCount) const
 849 {
 850     wxASSERT(nStart <= length());
 851     size_type strLen = length() - nStart;
 852     nLen = strLen < nLen ? strLen : nLen;
 853
 854     size_t pos, len;
 855     PosLenToImpl(nStart, nLen, &pos, &len);
 856
 857     SubstrBufFromMB str(ImplStr(sz, nCount));
 858     if ( str.len == npos )
 859         str.len = wxStringStrlen(str.data);
 860
 861     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 862 }
 863
 864 int wxString::compare(size_t nStart, size_t nLen,
 865                       const wchar_t* sz, size_t nCount) const
 866 {
 867     wxASSERT(nStart <= length());
 868     size_type strLen = length() - nStart;
 869     nLen = strLen < nLen ? strLen : nLen;
 870
 871     size_t pos, len;
 872     PosLenToImpl(nStart, nLen, &pos, &len);
 873
 874     SubstrBufFromWC str(ImplStr(sz, nCount));
 875     if ( str.len == npos )
 876         str.len = wxStringStrlen(str.data);
 877
 878     return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
 879 }
 880
 881 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
 882
 883
 884 // ---------------------------------------------------------------------------
 885 // find_{first,last}_[not]_of functions
 886 // ---------------------------------------------------------------------------
 887
 888 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
 889
 890 // NB: All these functions are implemented  with the argument being wxChar*,
 891 //     i.e. widechar string in any Unicode build, even though native string
 892 //     representation is char* in the UTF-8 build. This is because we couldn't
 893 //     use memchr() to determine if a character is in a set encoded as UTF-8.
 894
 895 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
 896 {
 897     return find_first_of(sz, nStart, wxStrlen(sz));
 898 }
 899
 900 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
 901 {
 902     return find_first_not_of(sz, nStart, wxStrlen(sz));
 903 }
 904
 905 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
 906 {
 907     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 908
 909     size_t idx = nStart;
 910     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 911     {
 912         if ( wxTmemchr(sz, *i, n) )
 913             return idx;
 914     }
 915
 916     return npos;
 917 }
 918
 919 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
 920 {
 921     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 922
 923     size_t idx = nStart;
 924     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 925     {
 926         if ( !wxTmemchr(sz, *i, n) )
 927             return idx;
 928     }
 929
 930     return npos;
 931 }
 932
 933
 934 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
 935 {
 936     return find_last_of(sz, nStart, wxStrlen(sz));
 937 }
 938
 939 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
 940 {
 941     return find_last_not_of(sz, nStart, wxStrlen(sz));
 942 }
 943
 944 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
 945 {
 946     size_t len = length();
 947
 948     if ( nStart == npos )
 949     {
 950         nStart = len - 1;
 951     }
 952     else
 953     {
 954         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 955     }
 956
 957     size_t idx = nStart;
 958     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 959           i != rend(); --idx, ++i )
 960     {
 961         if ( wxTmemchr(sz, *i, n) )
 962             return idx;
 963     }
 964
 965     return npos;
 966 }
 967
 968 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
 969 {
 970     size_t len = length();
 971
 972     if ( nStart == npos )
 973     {
 974         nStart = len - 1;
 975     }
 976     else
 977     {
 978         wxASSERT_MSG( nStart <= len, _T("invalid index") );
 979     }
 980
 981     size_t idx = nStart;
 982     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
 983           i != rend(); --idx, ++i )
 984     {
 985         if ( !wxTmemchr(sz, *i, n) )
 986             return idx;
 987     }
 988
 989     return npos;
 990 }
 991
 992 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
 993 {
 994     wxASSERT_MSG( nStart <= length(),  _T("invalid index") );
 995
 996     size_t idx = nStart;
 997     for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
 998     {
 999         if ( *i != ch )
1000             return idx;
1001     }
1002
1003     return npos;
1004 }
1005
1006 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1007 {
1008     size_t len = length();
1009
1010     if ( nStart == npos )
1011     {
1012         nStart = len - 1;
1013     }
1014     else
1015     {
1016         wxASSERT_MSG( nStart <= len, _T("invalid index") );
1017     }
1018
1019     size_t idx = nStart;
1020     for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1021           i != rend(); --idx, ++i )
1022     {
1023         if ( *i != ch )
1024             return idx;
1025     }
1026
1027     return npos;
1028 }
1029
1030 // the functions above were implemented for wchar_t* arguments in Unicode
1031 // build and char* in ANSI build; below are implementations for the other
1032 // version:
1033 #if wxUSE_UNICODE
1034     #define wxOtherCharType char
1035     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC
1036 #else
1037     #define wxOtherCharType wchar_t
1038     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB
1039 #endif
1040
1041 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1042     { return find_first_of(STRCONV(sz), nStart); }
1043
1044 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1045                                size_t n) const
1046     { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1047 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1048     { return find_last_of(STRCONV(sz), nStart); }
1049 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1050                               size_t n) const
1051     { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1052 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1053     { return find_first_not_of(STRCONV(sz), nStart); }
1054 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1055                                    size_t n) const
1056     { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1057 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1058     { return find_last_not_of(STRCONV(sz), nStart); }
1059 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1060                                   size_t n) const
1061     { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1062
1063 #undef wxOtherCharType
1064 #undef STRCONV
1065
1066 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1067
1068 // ===========================================================================
1069 // other common string functions
1070 // ===========================================================================
1071
1072 int wxString::CmpNoCase(const wxString& s) const
1073 {
1074     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1075
1076     const_iterator i1 = begin();
1077     const_iterator end1 = end();
1078     const_iterator i2 = s.begin();
1079     const_iterator end2 = s.end();
1080
1081     for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1082     {
1083         wxUniChar lower1 = (wxChar)wxTolower(*i1);
1084         wxUniChar lower2 = (wxChar)wxTolower(*i2);
1085         if ( lower1 != lower2 )
1086             return lower1 < lower2 ? -1 : 1;
1087     }
1088
1089     size_t len1 = length();
1090     size_t len2 = s.length();
1091
1092     if ( len1 < len2 )
1093         return -1;
1094     else if ( len1 > len2 )
1095         return 1;
1096     return 0;
1097 }
1098
1099
1100 #if wxUSE_UNICODE
1101
1102 #ifdef __MWERKS__
1103 #ifndef __SCHAR_MAX__
1104 #define __SCHAR_MAX__ 127
1105 #endif
1106 #endif
1107
1108 wxString wxString::FromAscii(const char *ascii, size_t len)
1109 {
1110     if (!ascii || len == 0)
1111        return wxEmptyString;
1112
1113     wxString res;
1114
1115     {
1116         wxStringInternalBuffer buf(res, len);
1117         wxStringCharType *dest = buf;
1118
1119         for ( ; len > 0; --len )
1120         {
1121             unsigned char c = (unsigned char)*ascii++;
1122             wxASSERT_MSG( c < 0x80,
1123                           _T("Non-ASCII value passed to FromAscii().") );
1124
1125             *dest++ = (wchar_t)c;
1126         }
1127     }
1128
1129     return res;
1130 }
1131
1132 wxString wxString::FromAscii(const char *ascii)
1133 {
1134     return FromAscii(ascii, wxStrlen(ascii));
1135 }
1136
1137 wxString wxString::FromAscii(char ascii)
1138 {
1139     // What do we do with '\0' ?
1140
1141     unsigned char c = (unsigned char)ascii;
1142
1143     wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1144
1145     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1146     return wxString(wxUniChar((wchar_t)c));
1147 }
1148
1149 const wxCharBuffer wxString::ToAscii() const
1150 {
1151     // this will allocate enough space for the terminating NUL too
1152     wxCharBuffer buffer(length());
1153     char *dest = buffer.data();
1154
1155     for ( const_iterator i = begin(); i != end(); ++i )
1156     {
1157         wxUniChar c(*i);
1158         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1159         *dest++ = c.IsAscii() ? (char)c : '_';
1160
1161         // the output string can't have embedded NULs anyhow, so we can safely
1162         // stop at first of them even if we do have any
1163         if ( !c )
1164             break;
1165     }
1166
1167     return buffer;
1168 }
1169
1170 #endif // wxUSE_UNICODE
1171
1172 // extract string of length nCount starting at nFirst
1173 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1174 {
1175     size_t nLen = length();
1176
1177     // default value of nCount is npos and means "till the end"
1178     if ( nCount == npos )
1179     {
1180         nCount = nLen - nFirst;
1181     }
1182
1183     // out-of-bounds requests return sensible things
1184     if ( nFirst + nCount > nLen )
1185     {
1186         nCount = nLen - nFirst;
1187     }
1188
1189     if ( nFirst > nLen )
1190     {
1191         // AllocCopy() will return empty string
1192         return wxEmptyString;
1193     }
1194
1195     wxString dest(*this, nFirst, nCount);
1196     if ( dest.length() != nCount )
1197     {
1198         wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1199     }
1200
1201     return dest;
1202 }
1203
1204 // check that the string starts with prefix and return the rest of the string
1205 // in the provided pointer if it is not NULL, otherwise return false
1206 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1207 {
1208     if ( compare(0, prefix.length(), prefix) != 0 )
1209         return false;
1210
1211     if ( rest )
1212     {
1213         // put the rest of the string into provided pointer
1214         rest->assign(*this, prefix.length(), npos);
1215     }
1216
1217     return true;
1218 }
1219
1220
1221 // check that the string ends with suffix and return the rest of it in the
1222 // provided pointer if it is not NULL, otherwise return false
1223 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1224 {
1225     int start = length() - suffix.length();
1226
1227     if ( start < 0 || compare(start, npos, suffix) != 0 )
1228         return false;
1229
1230     if ( rest )
1231     {
1232         // put the rest of the string into provided pointer
1233         rest->assign(*this, 0, start);
1234     }
1235
1236     return true;
1237 }
1238
1239
1240 // extract nCount last (rightmost) characters
1241 wxString wxString::Right(size_t nCount) const
1242 {
1243   if ( nCount > length() )
1244     nCount = length();
1245
1246   wxString dest(*this, length() - nCount, nCount);
1247   if ( dest.length() != nCount ) {
1248     wxFAIL_MSG( _T("out of memory in wxString::Right") );
1249   }
1250   return dest;
1251 }
1252
1253 // get all characters after the last occurence of ch
1254 // (returns the whole string if ch not found)
1255 wxString wxString::AfterLast(wxUniChar ch) const
1256 {
1257   wxString str;
1258   int iPos = Find(ch, true);
1259   if ( iPos == wxNOT_FOUND )
1260     str = *this;
1261   else
1262     str = wx_str() + iPos + 1;
1263
1264   return str;
1265 }
1266
1267 // extract nCount first (leftmost) characters
1268 wxString wxString::Left(size_t nCount) const
1269 {
1270   if ( nCount > length() )
1271     nCount = length();
1272
1273   wxString dest(*this, 0, nCount);
1274   if ( dest.length() != nCount ) {
1275     wxFAIL_MSG( _T("out of memory in wxString::Left") );
1276   }
1277   return dest;
1278 }
1279
1280 // get all characters before the first occurence of ch
1281 // (returns the whole string if ch not found)
1282 wxString wxString::BeforeFirst(wxUniChar ch) const
1283 {
1284   int iPos = Find(ch);
1285   if ( iPos == wxNOT_FOUND ) iPos = length();
1286   return wxString(*this, 0, iPos);
1287 }
1288
1289 /// get all characters before the last occurence of ch
1290 /// (returns empty string if ch not found)
1291 wxString wxString::BeforeLast(wxUniChar ch) const
1292 {
1293   wxString str;
1294   int iPos = Find(ch, true);
1295   if ( iPos != wxNOT_FOUND && iPos != 0 )
1296     str = wxString(c_str(), iPos);
1297
1298   return str;
1299 }
1300
1301 /// get all characters after the first occurence of ch
1302 /// (returns empty string if ch not found)
1303 wxString wxString::AfterFirst(wxUniChar ch) const
1304 {
1305   wxString str;
1306   int iPos = Find(ch);
1307   if ( iPos != wxNOT_FOUND )
1308     str = wx_str() + iPos + 1;
1309
1310   return str;
1311 }
1312
1313 // replace first (or all) occurences of some substring with another one
1314 size_t wxString::Replace(const wxString& strOld,
1315                          const wxString& strNew, bool bReplaceAll)
1316 {
1317     // if we tried to replace an empty string we'd enter an infinite loop below
1318     wxCHECK_MSG( !strOld.empty(), 0,
1319                  _T("wxString::Replace(): invalid parameter") );
1320
1321     wxSTRING_INVALIDATE_CACHE();
1322
1323     size_t uiCount = 0;   // count of replacements made
1324
1325     // optimize the special common case: replacement of one character by
1326     // another one (in UTF-8 case we can only do this for ASCII characters)
1327     //
1328     // benchmarks show that this special version is around 3 times faster
1329     // (depending on the proportion of matching characters and UTF-8/wchar_t
1330     // build)
1331     if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1332     {
1333         const wxStringCharType chOld = strOld.m_impl[0],
1334                                chNew = strNew.m_impl[0];
1335
1336         // this loop is the simplified version of the one below
1337         for ( size_t pos = 0; ; )
1338         {
1339             pos = m_impl.find(chOld, pos);
1340             if ( pos == npos )
1341                 break;
1342
1343             m_impl[pos++] = chNew;
1344
1345             uiCount++;
1346
1347             if ( !bReplaceAll )
1348                 break;
1349         }
1350     }
1351     else // general case
1352     {
1353         const size_t uiOldLen = strOld.m_impl.length();
1354         const size_t uiNewLen = strNew.m_impl.length();
1355
1356         for ( size_t pos = 0; ; )
1357         {
1358             pos = m_impl.find(strOld.m_impl, pos);
1359             if ( pos == npos )
1360                 break;
1361
1362             // replace this occurrence of the old string with the new one
1363             m_impl.replace(pos, uiOldLen, strNew.m_impl);
1364
1365             // move up pos past the string that was replaced
1366             pos += uiNewLen;
1367
1368             // increase replace count
1369             uiCount++;
1370
1371             // stop after the first one?
1372             if ( !bReplaceAll )
1373                 break;
1374         }
1375     }
1376
1377     return uiCount;
1378 }
1379
1380 bool wxString::IsAscii() const
1381 {
1382     for ( const_iterator i = begin(); i != end(); ++i )
1383     {
1384         if ( !(*i).IsAscii() )
1385             return false;
1386     }
1387
1388     return true;
1389 }
1390
1391 bool wxString::IsWord() const
1392 {
1393     for ( const_iterator i = begin(); i != end(); ++i )
1394     {
1395         if ( !wxIsalpha(*i) )
1396             return false;
1397     }
1398
1399     return true;
1400 }
1401
1402 bool wxString::IsNumber() const
1403 {
1404     if ( empty() )
1405         return true;
1406
1407     const_iterator i = begin();
1408
1409     if ( *i == _T('-') || *i == _T('+') )
1410         ++i;
1411
1412     for ( ; i != end(); ++i )
1413     {
1414         if ( !wxIsdigit(*i) )
1415             return false;
1416     }
1417
1418     return true;
1419 }
1420
1421 wxString wxString::Strip(stripType w) const
1422 {
1423     wxString s = *this;
1424     if ( w & leading ) s.Trim(false);
1425     if ( w & trailing ) s.Trim(true);
1426     return s;
1427 }
1428
1429 // ---------------------------------------------------------------------------
1430 // case conversion
1431 // ---------------------------------------------------------------------------
1432
1433 wxString& wxString::MakeUpper()
1434 {
1435   for ( iterator it = begin(), en = end(); it != en; ++it )
1436     *it = (wxChar)wxToupper(*it);
1437
1438   return *this;
1439 }
1440
1441 wxString& wxString::MakeLower()
1442 {
1443   for ( iterator it = begin(), en = end(); it != en; ++it )
1444     *it = (wxChar)wxTolower(*it);
1445
1446   return *this;
1447 }
1448
1449 wxString& wxString::MakeCapitalized()
1450 {
1451     const iterator en = end();
1452     iterator it = begin();
1453     if ( it != en )
1454     {
1455         *it = (wxChar)wxToupper(*it);
1456         for ( ++it; it != en; ++it )
1457             *it = (wxChar)wxTolower(*it);
1458     }
1459
1460     return *this;
1461 }
1462
1463 // ---------------------------------------------------------------------------
1464 // trimming and padding
1465 // ---------------------------------------------------------------------------
1466
1467 // some compilers (VC++ 6.0 not to name them) return true for a call to
1468 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1469 // to live with this by checking that the character is a 7 bit one - even if
1470 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1471 // space-like symbols somewhere except in the first 128 chars), it is arguably
1472 // still better than trimming away accented letters
1473 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1474
1475 // trims spaces (in the sense of isspace) from left or right side
1476 wxString& wxString::Trim(bool bFromRight)
1477 {
1478     // first check if we're going to modify the string at all
1479     if ( !empty() &&
1480          (
1481           (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1482           (!bFromRight && wxSafeIsspace(GetChar(0u)))
1483          )
1484        )
1485     {
1486         if ( bFromRight )
1487         {
1488             // find last non-space character
1489             reverse_iterator psz = rbegin();
1490             while ( (psz != rend()) && wxSafeIsspace(*psz) )
1491                 ++psz;
1492
1493             // truncate at trailing space start
1494             erase(psz.base(), end());
1495         }
1496         else
1497         {
1498             // find first non-space character
1499             iterator psz = begin();
1500             while ( (psz != end()) && wxSafeIsspace(*psz) )
1501                 ++psz;
1502
1503             // fix up data and length
1504             erase(begin(), psz);
1505         }
1506     }
1507
1508     return *this;
1509 }
1510
1511 // adds nCount characters chPad to the string from either side
1512 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1513 {
1514     wxString s(chPad, nCount);
1515
1516     if ( bFromRight )
1517         *this += s;
1518     else
1519     {
1520         s += *this;
1521         swap(s);
1522     }
1523
1524     return *this;
1525 }
1526
1527 // truncate the string
1528 wxString& wxString::Truncate(size_t uiLen)
1529 {
1530     if ( uiLen < length() )
1531     {
1532         erase(begin() + uiLen, end());
1533     }
1534     //else: nothing to do, string is already short enough
1535
1536     return *this;
1537 }
1538
1539 // ---------------------------------------------------------------------------
1540 // finding (return wxNOT_FOUND if not found and index otherwise)
1541 // ---------------------------------------------------------------------------
1542
1543 // find a character
1544 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1545 {
1546     size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1547
1548     return (idx == npos) ? wxNOT_FOUND : (int)idx;
1549 }
1550
1551 // ----------------------------------------------------------------------------
1552 // conversion to numbers
1553 // ----------------------------------------------------------------------------
1554
1555 // The implementation of all the functions below is exactly the same so factor
1556 // it out. Note that number extraction works correctly on UTF-8 strings, so
1557 // we can use wxStringCharType and wx_str() for maximum efficiency.
1558
1559 #ifndef __WXWINCE__
1560     #define DO_IF_NOT_WINCE(x) x
1561 #else
1562     #define DO_IF_NOT_WINCE(x)
1563 #endif
1564
1565 #define WX_STRING_TO_INT_TYPE(out, base, func, T)                           \
1566     wxCHECK_MSG( out, false, _T("NULL output pointer") );                   \
1567     wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );  \
1568                                                                             \
1569     DO_IF_NOT_WINCE( errno = 0; )                                           \
1570                                                                             \
1571     const wxStringCharType *start = wx_str();                               \
1572     wxStringCharType *end;                                                  \
1573     T val = func(start, &end, base);                                        \
1574                                                                             \
1575     /* return true only if scan was stopped by the terminating NUL and */   \
1576     /* if the string was not empty to start with and no under/overflow */   \
1577     /* occurred: */                                                         \
1578     if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )         \
1579         return false;                                                       \
1580     *out = val;                                                             \
1581     return true
1582
1583 bool wxString::ToLong(long *pVal, int base) const
1584 {
1585     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
1586 }
1587
1588 bool wxString::ToULong(unsigned long *pVal, int base) const
1589 {
1590     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
1591 }
1592
1593 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1594 {
1595     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
1596 }
1597
1598 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1599 {
1600     WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
1601 }
1602
1603 bool wxString::ToDouble(double *pVal) const
1604 {
1605     wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
1606
1607     DO_IF_NOT_WINCE( errno = 0; )
1608
1609     const wxChar *start = c_str();
1610     wxChar *end;
1611     double val = wxStrtod(start, &end);
1612
1613     // return true only if scan was stopped by the terminating NUL and if the
1614     // string was not empty to start with and no under/overflow occurred
1615     if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1616         return false;
1617
1618     *pVal = val;
1619
1620     return true;
1621 }
1622
1623 // ---------------------------------------------------------------------------
1624 // formatted output
1625 // ---------------------------------------------------------------------------
1626
1627 #if !wxUSE_UTF8_LOCALE_ONLY
1628 /* static */
1629 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1630 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1631 #else
1632 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1633 #endif
1634 {
1635     va_list argptr;
1636     va_start(argptr, format);
1637
1638     wxString s;
1639     s.PrintfV(format, argptr);
1640
1641     va_end(argptr);
1642
1643     return s;
1644 }
1645 #endif // !wxUSE_UTF8_LOCALE_ONLY
1646
1647 #if wxUSE_UNICODE_UTF8
1648 /* static */
1649 wxString wxString::DoFormatUtf8(const char *format, ...)
1650 {
1651     va_list argptr;
1652     va_start(argptr, format);
1653
1654     wxString s;
1655     s.PrintfV(format, argptr);
1656
1657     va_end(argptr);
1658
1659     return s;
1660 }
1661 #endif // wxUSE_UNICODE_UTF8
1662
1663 /* static */
1664 wxString wxString::FormatV(const wxString& format, va_list argptr)
1665 {
1666     wxString s;
1667     s.PrintfV(format, argptr);
1668     return s;
1669 }
1670
1671 #if !wxUSE_UTF8_LOCALE_ONLY
1672 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1673 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1674 #else
1675 int wxString::DoPrintfWchar(const wxChar *format, ...)
1676 #endif
1677 {
1678     va_list argptr;
1679     va_start(argptr, format);
1680
1681 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1682     // get a pointer to the wxString instance; we have to use dynamic_cast<>
1683     // because it's the only cast that works safely for downcasting when
1684     // multiple inheritance is used:
1685     wxString *str = static_cast<wxString*>(this);
1686 #else
1687     wxString *str = this;
1688 #endif
1689
1690     int iLen = str->PrintfV(format, argptr);
1691
1692     va_end(argptr);
1693
1694     return iLen;
1695 }
1696 #endif // !wxUSE_UTF8_LOCALE_ONLY
1697
1698 #if wxUSE_UNICODE_UTF8
1699 int wxString::DoPrintfUtf8(const char *format, ...)
1700 {
1701     va_list argptr;
1702     va_start(argptr, format);
1703
1704     int iLen = PrintfV(format, argptr);
1705
1706     va_end(argptr);
1707
1708     return iLen;
1709 }
1710 #endif // wxUSE_UNICODE_UTF8
1711
1712 /*
1713     Uses wxVsnprintf and places the result into the this string.
1714
1715     In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1716     it is vswprintf.  Due to a discrepancy between vsnprintf and vswprintf in
1717     the ISO C99 (and thus SUSv3) standard the return value for the case of
1718     an undersized buffer is inconsistent.  For conforming vsnprintf
1719     implementations the function must return the number of characters that
1720     would have been printed had the buffer been large enough.  For conforming
1721     vswprintf implementations the function must return a negative number
1722     and set errno.
1723
1724     What vswprintf sets errno to is undefined but Darwin seems to set it to
1725     EOVERFLOW.  The only expected errno are EILSEQ and EINVAL.  Both of
1726     those are defined in the standard and backed up by several conformance
1727     statements.  Note that ENOMEM mentioned in the manual page does not
1728     apply to swprintf, only wprintf and fwprintf.
1729
1730     Official manual page:
1731     http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1732
1733     Some conformance statements (AIX, Solaris):
1734     http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1735     http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1736
1737     Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1738     EILSEQ and EINVAL are specifically defined to mean the error is other than
1739     an undersized buffer and no other errno are defined we treat those two
1740     as meaning hard errors and everything else gets the old behavior which
1741     is to keep looping and increasing buffer size until the function succeeds.
1742
1743     In practice it's impossible to determine before compilation which behavior
1744     may be used.  The vswprintf function may have vsnprintf-like behavior or
1745     vice-versa.  Behavior detected on one release can theoretically change
1746     with an updated release.  Not to mention that configure testing for it
1747     would require the test to be run on the host system, not the build system
1748     which makes cross compilation difficult. Therefore, we make no assumptions
1749     about behavior and try our best to handle every known case, including the
1750     case where wxVsnprintf returns a negative number and fails to set errno.
1751
1752     There is yet one more non-standard implementation and that is our own.
1753     Fortunately, that can be detected at compile-time.
1754
1755     On top of all that, ISO C99 explicitly defines snprintf to write a null
1756     character to the last position of the specified buffer.  That would be at
1757     at the given buffer size minus 1.  It is supposed to do this even if it
1758     turns out that the buffer is sized too small.
1759
1760     Darwin (tested on 10.5) follows the C99 behavior exactly.
1761
1762     Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1763     errno even when it fails.  However, it only seems to ever fail due
1764     to an undersized buffer.
1765 */
1766 #if wxUSE_UNICODE_UTF8
1767 template<typename BufferType>
1768 #else
1769 // we only need one version in non-UTF8 builds and at least two Windows
1770 // compilers have problems with this function template, so use just one
1771 // normal function here
1772 #endif
1773 static int DoStringPrintfV(wxString& str,
1774                            const wxString& format, va_list argptr)
1775 {
1776     int size = 1024;
1777
1778     for ( ;; )
1779     {
1780 #if wxUSE_UNICODE_UTF8
1781         BufferType tmp(str, size + 1);
1782         typename BufferType::CharType *buf = tmp;
1783 #else
1784         wxStringBuffer tmp(str, size + 1);
1785         wxChar *buf = tmp;
1786 #endif
1787
1788         if ( !buf )
1789         {
1790             // out of memory
1791
1792             // in UTF-8 build, leaving uninitialized junk in the buffer
1793             // could result in invalid non-empty UTF-8 string, so just
1794             // reset the string to empty on failure:
1795             buf[0] = '\0';
1796             return -1;
1797         }
1798
1799         // wxVsnprintf() may modify the original arg pointer, so pass it
1800         // only a copy
1801         va_list argptrcopy;
1802         wxVaCopy(argptrcopy, argptr);
1803
1804 #ifndef __WXWINCE__
1805         // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1806         errno = 0;
1807 #endif
1808         int len = wxVsnprintf(buf, size, format, argptrcopy);
1809         va_end(argptrcopy);
1810
1811         // some implementations of vsnprintf() don't NUL terminate
1812         // the string if there is not enough space for it so
1813         // always do it manually
1814         // FIXME: This really seems to be the wrong and would be an off-by-one
1815         // bug except the code above allocates an extra character.
1816         buf[size] = _T('\0');
1817
1818         // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1819         // total number of characters which would have been written if the
1820         // buffer were large enough (newer standards such as Unix98)
1821         if ( len < 0 )
1822         {
1823             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1824             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1825             //     is true if *both* of them use our own implementation,
1826             //     otherwise we can't be sure
1827 #if wxUSE_WXVSNPRINTF
1828             // we know that our own implementation of wxVsnprintf() returns -1
1829             // only for a format error - thus there's something wrong with
1830             // the user's format string
1831             buf[0] = '\0';
1832             return -1;
1833 #else // possibly using system version
1834             // assume it only returns error if there is not enough space, but
1835             // as we don't know how much we need, double the current size of
1836             // the buffer
1837 #ifndef __WXWINCE__
1838             if( (errno == EILSEQ) || (errno == EINVAL) )
1839             // If errno was set to one of the two well-known hard errors
1840             // then fail immediately to avoid an infinite loop.
1841                 return -1;
1842             else
1843 #endif // __WXWINCE__
1844             // still not enough, as we don't know how much we need, double the
1845             // current size of the buffer
1846                 size *= 2;
1847 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1848         }
1849         else if ( len >= size )
1850         {
1851 #if wxUSE_WXVSNPRINTF
1852             // we know that our own implementation of wxVsnprintf() returns
1853             // size+1 when there's not enough space but that's not the size
1854             // of the required buffer!
1855             size *= 2;      // so we just double the current size of the buffer
1856 #else
1857             // some vsnprintf() implementations NUL-terminate the buffer and
1858             // some don't in len == size case, to be safe always add 1
1859             // FIXME: I don't quite understand this comment.  The vsnprintf
1860             // function is specifically defined to return the number of
1861             // characters printed not including the null terminator.
1862             // So OF COURSE you need to add 1 to get the right buffer size.
1863             // The following line is definitely correct, no question.
1864             size = len + 1;
1865 #endif
1866         }
1867         else // ok, there was enough space
1868         {
1869             break;
1870         }
1871     }
1872
1873     // we could have overshot
1874     str.Shrink();
1875
1876     return str.length();
1877 }
1878
1879 int wxString::PrintfV(const wxString& format, va_list argptr)
1880 {
1881 #if wxUSE_UNICODE_UTF8
1882     #if wxUSE_STL_BASED_WXSTRING
1883         typedef wxStringTypeBuffer<char> Utf8Buffer;
1884     #else
1885         typedef wxStringInternalBuffer Utf8Buffer;
1886     #endif
1887 #endif
1888
1889 #if wxUSE_UTF8_LOCALE_ONLY
1890     return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1891 #else
1892     #if wxUSE_UNICODE_UTF8
1893     if ( wxLocaleIsUtf8 )
1894         return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1895     else
1896         // wxChar* version
1897         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1898     #else
1899         return DoStringPrintfV(*this, format, argptr);
1900     #endif // UTF8/WCHAR
1901 #endif
1902 }
1903
1904 // ----------------------------------------------------------------------------
1905 // misc other operations
1906 // ----------------------------------------------------------------------------
1907
1908 // returns true if the string matches the pattern which may contain '*' and
1909 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1910 // of them)
1911 bool wxString::Matches(const wxString& mask) const
1912 {
1913     // I disable this code as it doesn't seem to be faster (in fact, it seems
1914     // to be much slower) than the old, hand-written code below and using it
1915     // here requires always linking with libregex even if the user code doesn't
1916     // use it
1917 #if 0 // wxUSE_REGEX
1918     // first translate the shell-like mask into a regex
1919     wxString pattern;
1920     pattern.reserve(wxStrlen(pszMask));
1921
1922     pattern += _T('^');
1923     while ( *pszMask )
1924     {
1925         switch ( *pszMask )
1926         {
1927             case _T('?'):
1928                 pattern += _T('.');
1929                 break;
1930
1931             case _T('*'):
1932                 pattern += _T(".*");
1933                 break;
1934
1935             case _T('^'):
1936             case _T('.'):
1937             case _T('$'):
1938             case _T('('):
1939             case _T(')'):
1940             case _T('|'):
1941             case _T('+'):
1942             case _T('\\'):
1943                 // these characters are special in a RE, quote them
1944                 // (however note that we don't quote '[' and ']' to allow
1945                 // using them for Unix shell like matching)
1946                 pattern += _T('\\');
1947                 // fall through
1948
1949             default:
1950                 pattern += *pszMask;
1951         }
1952
1953         pszMask++;
1954     }
1955     pattern += _T('$');
1956
1957     // and now use it
1958     return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1959 #else // !wxUSE_REGEX
1960   // TODO: this is, of course, awfully inefficient...
1961
1962   // FIXME-UTF8: implement using iterators, remove #if
1963 #if wxUSE_UNICODE_UTF8
1964   wxWCharBuffer maskBuf = mask.wc_str();
1965   wxWCharBuffer txtBuf = wc_str();
1966   const wxChar *pszMask = maskBuf.data();
1967   const wxChar *pszTxt = txtBuf.data();
1968 #else
1969   const wxChar *pszMask = mask.wx_str();
1970   // the char currently being checked
1971   const wxChar *pszTxt = wx_str();
1972 #endif
1973
1974   // the last location where '*' matched
1975   const wxChar *pszLastStarInText = NULL;
1976   const wxChar *pszLastStarInMask = NULL;
1977
1978 match:
1979   for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1980     switch ( *pszMask ) {
1981       case wxT('?'):
1982         if ( *pszTxt == wxT('\0') )
1983           return false;
1984
1985         // pszTxt and pszMask will be incremented in the loop statement
1986
1987         break;
1988
1989       case wxT('*'):
1990         {
1991           // remember where we started to be able to backtrack later
1992           pszLastStarInText = pszTxt;
1993           pszLastStarInMask = pszMask;
1994
1995           // ignore special chars immediately following this one
1996           // (should this be an error?)
1997           while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1998             pszMask++;
1999
2000           // if there is nothing more, match
2001           if ( *pszMask == wxT('\0') )
2002             return true;
2003
2004           // are there any other metacharacters in the mask?
2005           size_t uiLenMask;
2006           const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2007
2008           if ( pEndMask != NULL ) {
2009             // we have to match the string between two metachars
2010             uiLenMask = pEndMask - pszMask;
2011           }
2012           else {
2013             // we have to match the remainder of the string
2014             uiLenMask = wxStrlen(pszMask);
2015           }
2016
2017           wxString strToMatch(pszMask, uiLenMask);
2018           const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2019           if ( pMatch == NULL )
2020             return false;
2021
2022           // -1 to compensate "++" in the loop
2023           pszTxt = pMatch + uiLenMask - 1;
2024           pszMask += uiLenMask - 1;
2025         }
2026         break;
2027
2028       default:
2029         if ( *pszMask != *pszTxt )
2030           return false;
2031         break;
2032     }
2033   }
2034
2035   // match only if nothing left
2036   if ( *pszTxt == wxT('\0') )
2037     return true;
2038
2039   // if we failed to match, backtrack if we can
2040   if ( pszLastStarInText ) {
2041     pszTxt = pszLastStarInText + 1;
2042     pszMask = pszLastStarInMask;
2043
2044     pszLastStarInText = NULL;
2045
2046     // don't bother resetting pszLastStarInMask, it's unnecessary
2047
2048     goto match;
2049   }
2050
2051   return false;
2052 #endif // wxUSE_REGEX/!wxUSE_REGEX
2053 }
2054
2055 // Count the number of chars
2056 int wxString::Freq(wxUniChar ch) const
2057 {
2058     int count = 0;
2059     for ( const_iterator i = begin(); i != end(); ++i )
2060     {
2061         if ( *i == ch )
2062             count ++;
2063     }
2064     return count;
2065 }
2066
2067 // ----------------------------------------------------------------------------
2068 // wxUTF8StringBuffer
2069 // ----------------------------------------------------------------------------
2070
2071 #if wxUSE_UNICODE_WCHAR
2072 wxUTF8StringBuffer::~wxUTF8StringBuffer()
2073 {
2074     wxMBConvStrictUTF8 conv;
2075     size_t wlen = conv.ToWChar(NULL, 0, m_buf);
2076     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2077
2078     wxStringInternalBuffer wbuf(m_str, wlen);
2079     conv.ToWChar(wbuf, wlen, m_buf);
2080 }
2081
2082 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2083 {
2084     wxCHECK_RET(m_lenSet, "length not set");
2085
2086     wxMBConvStrictUTF8 conv;
2087     size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
2088     wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2089
2090     wxStringInternalBufferLength wbuf(m_str, wlen);
2091     conv.ToWChar(wbuf, wlen, m_buf, m_len);
2092     wbuf.SetLength(wlen);
2093 }
2094 #endif // wxUSE_UNICODE_WCHAR
2095
2096 // ----------------------------------------------------------------------------
2097 // wxCharBufferType<T>
2098 // ----------------------------------------------------------------------------
2099
2100 template<>
2101 wxCharTypeBuffer<char>::Data
2102 wxCharTypeBuffer<char>::NullData(NULL);
2103
2104 template<>
2105 wxCharTypeBuffer<wchar_t>::Data
2106 wxCharTypeBuffer<wchar_t>::NullData(NULL);