1 ///////////////////////////////////////////////////////////////////////////// 
   2 // Name:        src/common/string.cpp 
   3 // Purpose:     wxString class 
   4 // Author:      Vadim Zeitlin, Ryan Norton 
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr> 
   9 //              (c) 2004 Ryan Norton <wxprojects@comcast.net> 
  10 // Licence:     wxWindows licence 
  11 ///////////////////////////////////////////////////////////////////////////// 
  13 // =========================================================================== 
  14 // headers, declarations, constants 
  15 // =========================================================================== 
  17 // For compilers that support precompilation, includes "wx.h". 
  18 #include "wx/wxprec.h" 
  25     #include "wx/string.h" 
  26     #include "wx/wxcrtvararg.h" 
  40 #include "wx/hashmap.h" 
  41 #include "wx/vector.h" 
  42 #include "wx/xlocale.h" 
  45     #include "wx/msw/wrapwin.h" 
  48 #if wxUSE_STD_IOSTREAM 
  52 // string handling functions used by wxString: 
  53 #if wxUSE_UNICODE_UTF8 
  54     #define wxStringMemcpy   memcpy 
  55     #define wxStringMemcmp   memcmp 
  56     #define wxStringMemchr   memchr 
  57     #define wxStringStrlen   strlen 
  59     #define wxStringMemcpy   wxTmemcpy 
  60     #define wxStringMemcmp   wxTmemcmp 
  61     #define wxStringMemchr   wxTmemchr 
  62     #define wxStringStrlen   wxStrlen 
  65 // define a function declared in wx/buffer.h here as we don't have buffer.cpp 
  66 // and don't want to add it just because of this simple function 
  70 // wxXXXBuffer classes can be (implicitly) used during global statics 
  71 // initialization so wrap the status UntypedBufferData variable in a function 
  72 // to make it safe to access it even before all global statics are initialized 
  73 UntypedBufferData 
*GetUntypedNullData() 
  75     static UntypedBufferData 
s_untypedNullData(NULL
, 0); 
  77     return &s_untypedNullData
; 
  80 } // namespace wxPrivate 
  82 // --------------------------------------------------------------------------- 
  83 // static class variables definition 
  84 // --------------------------------------------------------------------------- 
  86 //According to STL _must_ be a -1 size_t 
  87 const size_t wxString::npos 
= (size_t) -1; 
  89 #if wxUSE_STRING_POS_CACHE 
  91 #ifdef wxHAS_COMPILER_TLS 
  93 wxTLS_TYPE(wxString::Cache
) wxString::ms_cache
; 
  95 #else // !wxHAS_COMPILER_TLS 
  97 struct wxStrCacheInitializer
 
  99     wxStrCacheInitializer() 
 101         // calling this function triggers s_cache initialization in it, and 
 102         // from now on it becomes safe to call from multiple threads 
 103         wxString::GetCache(); 
 108 wxString::Cache& wxString::GetCache() 
 110     static wxTLS_TYPE(Cache) s_cache; 
 112     return wxTLS_VALUE(s_cache); 
 116 static wxStrCacheInitializer gs_stringCacheInit
; 
 118 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS 
 120 // gdb seems to be unable to display thread-local variables correctly, at least 
 121 // not my 6.4.98 version under amd64, so provide this debugging helper to do it 
 122 #if wxDEBUG_LEVEL >= 2 
 124 struct wxStrCacheDumper
 
 126     static void ShowAll() 
 128         puts("*** wxString cache dump:"); 
 129         for ( unsigned n 
= 0; n 
< wxString::Cache::SIZE
; n
++ ) 
 131             const wxString::Cache::Element
& 
 132                 c 
= wxString::GetCacheBegin()[n
]; 
 134             printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n", 
 136                    n 
== wxString::LastUsedCacheElement() ? " [*]" : "", 
 138                    (unsigned long)c
.pos
, 
 139                    (unsigned long)c
.impl
, 
 145 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); } 
 147 #endif // wxDEBUG_LEVEL >= 2 
 149 #ifdef wxPROFILE_STRING_CACHE 
 151 wxString::CacheStats 
wxString::ms_cacheStats
; 
 153 struct wxStrCacheStatsDumper
 
 155     ~wxStrCacheStatsDumper() 
 157         const wxString::CacheStats
& stats 
= wxString::ms_cacheStats
; 
 161             puts("*** wxString cache statistics:"); 
 162             printf("\tTotal non-trivial calls to PosToImpl(): %u\n", 
 164             printf("\tHits %u (of which %u not used) or %.2f%%\n", 
 167                    100.*float(stats
.poshits 
- stats
.mishits
)/stats
.postot
); 
 168             printf("\tAverage position requested: %.2f\n", 
 169                    float(stats
.sumpos
) / stats
.postot
); 
 170             printf("\tAverage offset after cached hint: %.2f\n", 
 171                    float(stats
.sumofs
) / stats
.postot
); 
 176             printf("\tNumber of calls to length(): %u, hits=%.2f%%\n", 
 177                    stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
); 
 182 static wxStrCacheStatsDumper s_showCacheStats
; 
 184 #endif // wxPROFILE_STRING_CACHE 
 186 #endif // wxUSE_STRING_POS_CACHE 
 188 // ---------------------------------------------------------------------------- 
 190 // ---------------------------------------------------------------------------- 
 192 #if wxUSE_STD_IOSTREAM 
 196 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
) 
 198 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8 
 199     const wxScopedCharBuffer 
buf(str
.AsCharBuf()); 
 201         os
.clear(wxSTD 
ios_base::failbit
); 
 207     return os 
<< str
.AsInternal(); 
 211 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
) 
 213     return os 
<< str
.c_str(); 
 216 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedCharBuffer
& str
) 
 218     return os 
<< str
.data(); 
 222 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedWCharBuffer
& str
) 
 224     return os 
<< str
.data(); 
 228 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM) 
 230 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
) 
 232     return wos 
<< str
.wc_str(); 
 235 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
) 
 237     return wos 
<< str
.AsWChar(); 
 240 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxScopedWCharBuffer
& str
) 
 242     return wos 
<< str
.data(); 
 245 #endif  // wxUSE_UNICODE && defined(HAVE_WOSTREAM) 
 247 #endif // wxUSE_STD_IOSTREAM 
 249 // =========================================================================== 
 250 // wxString class core 
 251 // =========================================================================== 
 253 #if wxUSE_UNICODE_UTF8 
 255 void wxString::PosLenToImpl(size_t pos
, size_t len
, 
 256                             size_t *implPos
, size_t *implLen
) const 
 262     else // have valid start position 
 264         const const_iterator b 
= GetIterForNthChar(pos
); 
 265         *implPos 
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin(); 
 270         else // have valid length too 
 272             // we need to handle the case of length specifying a substring 
 273             // going beyond the end of the string, just as std::string does 
 274             const const_iterator 
e(end()); 
 276             while ( len 
&& i 
<= e 
) 
 282             *implLen 
= i
.impl() - b
.impl(); 
 287 #endif // wxUSE_UNICODE_UTF8 
 289 // ---------------------------------------------------------------------------- 
 290 // wxCStrData converted strings caching 
 291 // ---------------------------------------------------------------------------- 
 293 // FIXME-UTF8: temporarily disabled because it doesn't work with global 
 294 //             string objects; re-enable after fixing this bug and benchmarking 
 295 //             performance to see if using a hash is a good idea at all 
 298 // For backward compatibility reasons, it must be possible to assign the value 
 299 // returned by wxString::c_str() to a char* or wchar_t* variable and work with 
 300 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick, 
 301 // because the memory would be freed immediately, but it has to be valid as long 
 302 // as the string is not modified, so that code like this still works: 
 304 // const wxChar *s = str.c_str(); 
 305 // while ( s ) { ... } 
 307 // FIXME-UTF8: not thread safe! 
 308 // FIXME-UTF8: we currently clear the cached conversion only when the string is 
 309 //             destroyed, but we should do it when the string is modified, to 
 310 //             keep memory usage down 
 311 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we 
 312 //             invalidated the cache on every change, we could keep the previous 
 314 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed 
 315 //             to use mb_str() or wc_str() instead of (const [w]char*)c_str() 
 318 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString 
*s
) 
 320     typename 
T::iterator i 
= hash
.find(wxConstCast(s
, wxString
)); 
 321     if ( i 
!= hash
.end() ) 
 329 // NB: non-STL implementation doesn't compile with "const wxString*" key type, 
 330 //     so we have to use wxString* here and const-cast when used 
 331 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
, 
 332                     wxStringCharConversionCache
); 
 333 static wxStringCharConversionCache gs_stringsCharCache
; 
 335 const char* wxCStrData::AsChar() const 
 337     // remove previously cache value, if any (see FIXMEs above): 
 338     DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
); 
 340     // convert the string and keep it: 
 341     const char *s 
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] = 
 342         m_str
->mb_str().release(); 
 346 #endif // wxUSE_UNICODE 
 348 #if !wxUSE_UNICODE_WCHAR 
 349 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
, 
 350                     wxStringWCharConversionCache
); 
 351 static wxStringWCharConversionCache gs_stringsWCharCache
; 
 353 const wchar_t* wxCStrData::AsWChar() const 
 355     // remove previously cache value, if any (see FIXMEs above): 
 356     DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
); 
 358     // convert the string and keep it: 
 359     const wchar_t *s 
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] = 
 360         m_str
->wc_str().release(); 
 364 #endif // !wxUSE_UNICODE_WCHAR 
 366 wxString::~wxString() 
 369     // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8 
 370     DeleteStringFromConversionCache(gs_stringsCharCache
, this); 
 372 #if !wxUSE_UNICODE_WCHAR 
 373     DeleteStringFromConversionCache(gs_stringsWCharCache
, this); 
 378 // =========================================================================== 
 379 // wxString class core 
 380 // =========================================================================== 
 382 // --------------------------------------------------------------------------- 
 383 // construction and conversion 
 384 // --------------------------------------------------------------------------- 
 386 #if wxUSE_UNICODE_WCHAR 
 388 wxString::SubstrBufFromMB 
wxString::ConvertStr(const char *psz
, size_t nLength
, 
 389                                                const wxMBConv
& conv
) 
 392     if ( !psz 
|| nLength 
== 0 ) 
 393         return SubstrBufFromMB(wxWCharBuffer(L
""), 0); 
 395     if ( nLength 
== npos 
) 
 399     wxScopedWCharBuffer 
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
)); 
 401         return SubstrBufFromMB(wxWCharBuffer(L
""), 0); 
 403         return SubstrBufFromMB(wcBuf
, wcLen
); 
 405 #endif // wxUSE_UNICODE_WCHAR 
 407 #if wxUSE_UNICODE_UTF8 
 409 wxString::SubstrBufFromMB 
wxString::ConvertStr(const char *psz
, size_t nLength
, 
 410                                                const wxMBConv
& conv
) 
 413     if ( !psz 
|| nLength 
== 0 ) 
 414         return SubstrBufFromMB(wxCharBuffer(""), 0); 
 416     // if psz is already in UTF-8, we don't have to do the roundtrip to 
 417     // wchar_t* and back: 
 420         // we need to validate the input because UTF8 iterators assume valid 
 421         // UTF-8 sequence and psz may be invalid: 
 422         if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) ) 
 424             // we must pass the real string length to SubstrBufFromMB ctor 
 425             if ( nLength 
== npos 
) 
 426                 nLength 
= psz 
? strlen(psz
) : 0; 
 427             return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz
, nLength
), 
 430         // else: do the roundtrip through wchar_t* 
 433     if ( nLength 
== npos 
) 
 436     // first convert to wide string: 
 438     wxScopedWCharBuffer 
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
)); 
 440         return SubstrBufFromMB(wxCharBuffer(""), 0); 
 442     // and then to UTF-8: 
 443     SubstrBufFromMB 
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8())); 
 444     // widechar -> UTF-8 conversion isn't supposed to ever fail: 
 445     wxASSERT_MSG( buf
.data
, wxT("conversion to UTF-8 failed") ); 
 449 #endif // wxUSE_UNICODE_UTF8 
 451 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE 
 453 wxString::SubstrBufFromWC 
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
, 
 454                                                const wxMBConv
& conv
) 
 457     if ( !pwz 
|| nLength 
== 0 ) 
 458         return SubstrBufFromWC(wxCharBuffer(""), 0); 
 460     if ( nLength 
== npos 
) 
 464     wxScopedCharBuffer 
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
)); 
 466         return SubstrBufFromWC(wxCharBuffer(""), 0); 
 468         return SubstrBufFromWC(mbBuf
, mbLen
); 
 470 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE 
 472 // This std::string::c_str()-like method returns a wide char pointer to string 
 473 // contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return 
 474 // a pointer to the internal representation. Otherwise a conversion is required 
 475 // and it returns a temporary buffer. 
 477 // However for compatibility with c_str() and to avoid breaking existing code 
 480 //      for ( const wchar_t *p = s.wc_str(); *p; p++ ) 
 483 // we actually need to ensure that the returned buffer is _not_ temporary and 
 484 // so we use wxString::m_convertedToWChar to store the returned data 
 485 #if !wxUSE_UNICODE_WCHAR 
 487 const wchar_t *wxString::AsWChar(const wxMBConv
& conv
) const 
 489     const char * const strMB 
= m_impl
.c_str(); 
 490     const size_t lenMB 
= m_impl
.length(); 
 492     // find out the size of the buffer needed 
 493     const size_t lenWC 
= conv
.ToWChar(NULL
, 0, strMB
, lenMB
); 
 494     if ( lenWC 
== wxCONV_FAILED 
) 
 497     // keep the same buffer if the string size didn't change: this is not only 
 498     // an optimization but also ensure that code which modifies string 
 499     // character by character (without changing its length) can continue to use 
 500     // the pointer returned by a previous wc_str() call even after changing the 
 503     // TODO-UTF8: we could check for ">" instead of "!=" here as this would 
 504     //            allow to save on buffer reallocations but at the cost of 
 505     //            consuming (even) more memory, we should benchmark this to 
 506     //            determine if it's worth doing 
 507     if ( !m_convertedToWChar
.m_str 
|| lenWC 
!= m_convertedToWChar
.m_len 
) 
 509         if ( !const_cast<wxString 
*>(this)->m_convertedToWChar
.Extend(lenWC
) ) 
 513     // finally do convert 
 514     m_convertedToWChar
.m_str
[lenWC
] = L
'\0'; 
 515     if ( conv
.ToWChar(m_convertedToWChar
.m_str
, lenWC
, 
 516                       strMB
, lenMB
) == wxCONV_FAILED 
) 
 519     return m_convertedToWChar
.m_str
; 
 522 #endif // !wxUSE_UNICODE_WCHAR 
 525 // Same thing for mb_str() which returns a normal char pointer to string 
 526 // contents: this always requires converting it to the specified encoding in 
 527 // non-ANSI build except if we need to convert to UTF-8 and this is what we 
 528 // already use internally. 
 531 const char *wxString::AsChar(const wxMBConv
& conv
) const 
 533 #if wxUSE_UNICODE_UTF8 
 535         return m_impl
.c_str(); 
 537     const wchar_t * const strWC 
= AsWChar(wxMBConvStrictUTF8()); 
 538     const size_t lenWC 
= m_convertedToWChar
.m_len
; 
 539 #else // wxUSE_UNICODE_WCHAR 
 540     const wchar_t * const strWC 
= m_impl
.c_str(); 
 541     const size_t lenWC 
= m_impl
.length(); 
 542 #endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR 
 544     const size_t lenMB 
= conv
.FromWChar(NULL
, 0, strWC
, lenWC
); 
 545     if ( lenMB 
== wxCONV_FAILED 
) 
 548     if ( !m_convertedToChar
.m_str 
|| lenMB 
!= m_convertedToChar
.m_len 
) 
 550         if ( !const_cast<wxString 
*>(this)->m_convertedToChar
.Extend(lenMB
) ) 
 554     m_convertedToChar
.m_str
[lenMB
] = '\0'; 
 555     if ( conv
.FromWChar(m_convertedToChar
.m_str
, lenMB
, 
 556                         strWC
, lenWC
) == wxCONV_FAILED 
) 
 559     return m_convertedToChar
.m_str
; 
 562 #endif // wxUSE_UNICODE 
 564 // shrink to minimal size (releasing extra memory) 
 565 bool wxString::Shrink() 
 567   wxString 
tmp(begin(), end()); 
 569   return tmp
.length() == length(); 
 572 // deprecated compatibility code: 
 573 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8 
 574 wxStringCharType 
*wxString::GetWriteBuf(size_t nLen
) 
 576     return DoGetWriteBuf(nLen
); 
 579 void wxString::UngetWriteBuf() 
 584 void wxString::UngetWriteBuf(size_t nLen
) 
 586     DoUngetWriteBuf(nLen
); 
 588 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8 
 591 // --------------------------------------------------------------------------- 
 593 // --------------------------------------------------------------------------- 
 595 // all functions are inline in string.h 
 597 // --------------------------------------------------------------------------- 
 598 // concatenation operators 
 599 // --------------------------------------------------------------------------- 
 602  * concatenation functions come in 5 flavours: 
 604  *  char   + string      and      string + char 
 605  *  C str  + string      and      string + C str 
 608 wxString 
operator+(const wxString
& str1
, const wxString
& str2
) 
 610 #if !wxUSE_STL_BASED_WXSTRING 
 611     wxASSERT( str1
.IsValid() ); 
 612     wxASSERT( str2
.IsValid() ); 
 621 wxString 
operator+(const wxString
& str
, wxUniChar ch
) 
 623 #if !wxUSE_STL_BASED_WXSTRING 
 624     wxASSERT( str
.IsValid() ); 
 633 wxString 
operator+(wxUniChar ch
, const wxString
& str
) 
 635 #if !wxUSE_STL_BASED_WXSTRING 
 636     wxASSERT( str
.IsValid() ); 
 645 wxString 
operator+(const wxString
& str
, const char *psz
) 
 647 #if !wxUSE_STL_BASED_WXSTRING 
 648     wxASSERT( str
.IsValid() ); 
 652     if ( !s
.Alloc(strlen(psz
) + str
.length()) ) { 
 653         wxFAIL_MSG( wxT("out of memory in wxString::operator+") ); 
 661 wxString 
operator+(const wxString
& str
, const wchar_t *pwz
) 
 663 #if !wxUSE_STL_BASED_WXSTRING 
 664     wxASSERT( str
.IsValid() ); 
 668     if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) { 
 669         wxFAIL_MSG( wxT("out of memory in wxString::operator+") ); 
 677 wxString 
operator+(const char *psz
, const wxString
& str
) 
 679 #if !wxUSE_STL_BASED_WXSTRING 
 680     wxASSERT( str
.IsValid() ); 
 684     if ( !s
.Alloc(strlen(psz
) + str
.length()) ) { 
 685         wxFAIL_MSG( wxT("out of memory in wxString::operator+") ); 
 693 wxString 
operator+(const wchar_t *pwz
, const wxString
& str
) 
 695 #if !wxUSE_STL_BASED_WXSTRING 
 696     wxASSERT( str
.IsValid() ); 
 700     if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) { 
 701         wxFAIL_MSG( wxT("out of memory in wxString::operator+") ); 
 709 // --------------------------------------------------------------------------- 
 711 // --------------------------------------------------------------------------- 
 713 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const 
 715     return (length() == 1) && (compareWithCase 
? GetChar(0u) == c
 
 716                                : wxToupper(GetChar(0u)) == wxToupper(c
)); 
 719 #ifdef HAVE_STD_STRING_COMPARE 
 721 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with 
 722 //     UTF-8 encoded strings too, thanks to UTF-8's design which allows us to 
 723 //     sort strings in characters code point order by sorting the byte sequence 
 724 //     in byte values order (i.e. what strcmp() and memcmp() do). 
 726 int wxString::compare(const wxString
& str
) const 
 728     return m_impl
.compare(str
.m_impl
); 
 731 int wxString::compare(size_t nStart
, size_t nLen
, 
 732                       const wxString
& str
) const 
 735     PosLenToImpl(nStart
, nLen
, &pos
, &len
); 
 736     return m_impl
.compare(pos
, len
, str
.m_impl
); 
 739 int wxString::compare(size_t nStart
, size_t nLen
, 
 741                       size_t nStart2
, size_t nLen2
) const 
 744     PosLenToImpl(nStart
, nLen
, &pos
, &len
); 
 747     str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
); 
 749     return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
); 
 752 int wxString::compare(const char* sz
) const 
 754     return m_impl
.compare(ImplStr(sz
)); 
 757 int wxString::compare(const wchar_t* sz
) const 
 759     return m_impl
.compare(ImplStr(sz
)); 
 762 int wxString::compare(size_t nStart
, size_t nLen
, 
 763                       const char* sz
, size_t nCount
) const 
 766     PosLenToImpl(nStart
, nLen
, &pos
, &len
); 
 768     SubstrBufFromMB 
str(ImplStr(sz
, nCount
)); 
 770     return m_impl
.compare(pos
, len
, str
.data
, str
.len
); 
 773 int wxString::compare(size_t nStart
, size_t nLen
, 
 774                       const wchar_t* sz
, size_t nCount
) const 
 777     PosLenToImpl(nStart
, nLen
, &pos
, &len
); 
 779     SubstrBufFromWC 
str(ImplStr(sz
, nCount
)); 
 781     return m_impl
.compare(pos
, len
, str
.data
, str
.len
); 
 784 #else // !HAVE_STD_STRING_COMPARE 
 786 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
, 
 787                           const wxStringCharType
* s2
, size_t l2
) 
 790         return wxStringMemcmp(s1
, s2
, l1
); 
 793         int ret 
= wxStringMemcmp(s1
, s2
, l1
); 
 794         return ret 
== 0 ? -1 : ret
; 
 798         int ret 
= wxStringMemcmp(s1
, s2
, l2
); 
 799         return ret 
== 0 ? +1 : ret
; 
 803 int wxString::compare(const wxString
& str
) const 
 805     return ::wxDoCmp(m_impl
.data(), m_impl
.length(), 
 806                      str
.m_impl
.data(), str
.m_impl
.length()); 
 809 int wxString::compare(size_t nStart
, size_t nLen
, 
 810                       const wxString
& str
) const 
 812     wxASSERT(nStart 
<= length()); 
 813     size_type strLen 
= length() - nStart
; 
 814     nLen 
= strLen 
< nLen 
? strLen 
: nLen
; 
 817     PosLenToImpl(nStart
, nLen
, &pos
, &len
); 
 819     return ::wxDoCmp(m_impl
.data() + pos
,  len
, 
 820                      str
.m_impl
.data(), str
.m_impl
.length()); 
 823 int wxString::compare(size_t nStart
, size_t nLen
, 
 825                       size_t nStart2
, size_t nLen2
) const 
 827     wxASSERT(nStart 
<= length()); 
 828     wxASSERT(nStart2 
<= str
.length()); 
 829     size_type strLen  
=     length() - nStart
, 
 830               strLen2 
= str
.length() - nStart2
; 
 831     nLen  
= strLen  
< nLen  
? strLen  
: nLen
; 
 832     nLen2 
= strLen2 
< nLen2 
? strLen2 
: nLen2
; 
 835     PosLenToImpl(nStart
, nLen
, &pos
, &len
); 
 837     str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
); 
 839     return ::wxDoCmp(m_impl
.data() + pos
, len
, 
 840                      str
.m_impl
.data() + pos2
, len2
); 
 843 int wxString::compare(const char* sz
) const 
 845     SubstrBufFromMB 
str(ImplStr(sz
, npos
)); 
 846     if ( str
.len 
== npos 
) 
 847         str
.len 
= wxStringStrlen(str
.data
); 
 848     return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
); 
 851 int wxString::compare(const wchar_t* sz
) const 
 853     SubstrBufFromWC 
str(ImplStr(sz
, npos
)); 
 854     if ( str
.len 
== npos 
) 
 855         str
.len 
= wxStringStrlen(str
.data
); 
 856     return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
); 
 859 int wxString::compare(size_t nStart
, size_t nLen
, 
 860                       const char* sz
, size_t nCount
) const 
 862     wxASSERT(nStart 
<= length()); 
 863     size_type strLen 
= length() - nStart
; 
 864     nLen 
= strLen 
< nLen 
? strLen 
: nLen
; 
 867     PosLenToImpl(nStart
, nLen
, &pos
, &len
); 
 869     SubstrBufFromMB 
str(ImplStr(sz
, nCount
)); 
 870     if ( str
.len 
== npos 
) 
 871         str
.len 
= wxStringStrlen(str
.data
); 
 873     return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
); 
 876 int wxString::compare(size_t nStart
, size_t nLen
, 
 877                       const wchar_t* sz
, size_t nCount
) const 
 879     wxASSERT(nStart 
<= length()); 
 880     size_type strLen 
= length() - nStart
; 
 881     nLen 
= strLen 
< nLen 
? strLen 
: nLen
; 
 884     PosLenToImpl(nStart
, nLen
, &pos
, &len
); 
 886     SubstrBufFromWC 
str(ImplStr(sz
, nCount
)); 
 887     if ( str
.len 
== npos 
) 
 888         str
.len 
= wxStringStrlen(str
.data
); 
 890     return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
); 
 893 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE 
 896 // --------------------------------------------------------------------------- 
 897 // find_{first,last}_[not]_of functions 
 898 // --------------------------------------------------------------------------- 
 900 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8 
 902 // NB: All these functions are implemented  with the argument being wxChar*, 
 903 //     i.e. widechar string in any Unicode build, even though native string 
 904 //     representation is char* in the UTF-8 build. This is because we couldn't 
 905 //     use memchr() to determine if a character is in a set encoded as UTF-8. 
 907 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const 
 909     return find_first_of(sz
, nStart
, wxStrlen(sz
)); 
 912 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const 
 914     return find_first_not_of(sz
, nStart
, wxStrlen(sz
)); 
 917 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const 
 919     wxASSERT_MSG( nStart 
<= length(),  wxT("invalid index") ); 
 922     for ( const_iterator i 
= begin() + nStart
; i 
!= end(); ++idx
, ++i 
) 
 924         if ( wxTmemchr(sz
, *i
, n
) ) 
 931 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const 
 933     wxASSERT_MSG( nStart 
<= length(),  wxT("invalid index") ); 
 936     for ( const_iterator i 
= begin() + nStart
; i 
!= end(); ++idx
, ++i 
) 
 938         if ( !wxTmemchr(sz
, *i
, n
) ) 
 946 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const 
 948     return find_last_of(sz
, nStart
, wxStrlen(sz
)); 
 951 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const 
 953     return find_last_not_of(sz
, nStart
, wxStrlen(sz
)); 
 956 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const 
 958     size_t len 
= length(); 
 960     if ( nStart 
== npos 
) 
 966         wxASSERT_MSG( nStart 
<= len
, wxT("invalid index") ); 
 970     for ( const_reverse_iterator i 
= rbegin() + (len 
- nStart 
- 1); 
 971           i 
!= rend(); --idx
, ++i 
) 
 973         if ( wxTmemchr(sz
, *i
, n
) ) 
 980 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const 
 982     size_t len 
= length(); 
 984     if ( nStart 
== npos 
) 
 990         wxASSERT_MSG( nStart 
<= len
, wxT("invalid index") ); 
 994     for ( const_reverse_iterator i 
= rbegin() + (len 
- nStart 
- 1); 
 995           i 
!= rend(); --idx
, ++i 
) 
 997         if ( !wxTmemchr(sz
, *i
, n
) ) 
1004 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const 
1006     wxASSERT_MSG( nStart 
<= length(),  wxT("invalid index") ); 
1008     size_t idx 
= nStart
; 
1009     for ( const_iterator i 
= begin() + nStart
; i 
!= end(); ++idx
, ++i 
) 
1018 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const 
1020     size_t len 
= length(); 
1022     if ( nStart 
== npos 
) 
1028         wxASSERT_MSG( nStart 
<= len
, wxT("invalid index") ); 
1031     size_t idx 
= nStart
; 
1032     for ( const_reverse_iterator i 
= rbegin() + (len 
- nStart 
- 1); 
1033           i 
!= rend(); --idx
, ++i 
) 
1042 // the functions above were implemented for wchar_t* arguments in Unicode 
1043 // build and char* in ANSI build; below are implementations for the other 
1046     #define wxOtherCharType char 
1047     #define STRCONV         (const wxChar*)wxConvLibc.cMB2WC 
1049     #define wxOtherCharType wchar_t 
1050     #define STRCONV         (const wxChar*)wxConvLibc.cWC2MB 
1053 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const 
1054     { return find_first_of(STRCONV(sz
), nStart
); } 
1056 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
, 
1058     { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); } 
1059 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const 
1060     { return find_last_of(STRCONV(sz
), nStart
); } 
1061 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
, 
1063     { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); } 
1064 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const 
1065     { return find_first_not_of(STRCONV(sz
), nStart
); } 
1066 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
, 
1068     { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); } 
1069 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const 
1070     { return find_last_not_of(STRCONV(sz
), nStart
); } 
1071 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
, 
1073     { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); } 
1075 #undef wxOtherCharType 
1078 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8 
1080 // =========================================================================== 
1081 // other common string functions 
1082 // =========================================================================== 
1084 int wxString::CmpNoCase(const wxString
& s
) const 
1086 #if !wxUSE_UNICODE_UTF8 
1087     // We compare NUL-delimited chunks of the strings inside the loop. We will 
1088     // do as many iterations as there are embedded NULs in the string, i.e. 
1089     // usually we will run it just once. 
1091     typedef const wxStringImpl::value_type 
*pchar_type
; 
1092     const pchar_type thisBegin 
= m_impl
.c_str(); 
1093     const pchar_type thatBegin 
= s
.m_impl
.c_str(); 
1095     const pchar_type thisEnd 
= thisBegin 
+ m_impl
.length(); 
1096     const pchar_type thatEnd 
= thatBegin 
+ s
.m_impl
.length(); 
1098     pchar_type thisCur 
= thisBegin
; 
1099     pchar_type thatCur 
= thatBegin
; 
1104         // Compare until the next NUL, if the strings differ this is the final 
1106         rc 
= wxStricmp(thisCur
, thatCur
); 
1110         const size_t lenChunk 
= wxStrlen(thisCur
); 
1111         thisCur 
+= lenChunk
; 
1112         thatCur 
+= lenChunk
; 
1114         // Skip all the NULs as wxStricmp() doesn't handle them. 
1115         for ( ; !*thisCur
; thisCur
++, thatCur
++ ) 
1117             // Check if we exhausted either of the strings. 
1118             if ( thisCur 
== thisEnd 
) 
1120                 // This one is exhausted, is the other one too? 
1121                 return thatCur 
== thatEnd 
? 0 : -1; 
1124             if ( thatCur 
== thatEnd 
) 
1126                 // Because of the test above we know that this one is not 
1127                 // exhausted yet so it's greater than the other one that is. 
1133                 // Anything non-NUL is greater than NUL. 
1140 #else // wxUSE_UNICODE_UTF8 
1141     // CRT functions can't be used for case-insensitive comparison of UTF-8 
1142     // strings so do it in the naive, simple and inefficient way. 
1144     // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added 
1145     const_iterator i1 
= begin(); 
1146     const_iterator end1 
= end(); 
1147     const_iterator i2 
= s
.begin(); 
1148     const_iterator end2 
= s
.end(); 
1150     for ( ; i1 
!= end1 
&& i2 
!= end2
; ++i1
, ++i2 
) 
1152         wxUniChar lower1 
= (wxChar
)wxTolower(*i1
); 
1153         wxUniChar lower2 
= (wxChar
)wxTolower(*i2
); 
1154         if ( lower1 
!= lower2 
) 
1155             return lower1 
< lower2 
? -1 : 1; 
1158     size_t len1 
= length(); 
1159     size_t len2 
= s
.length(); 
1163     else if ( len1 
> len2 
) 
1166 #endif // !wxUSE_UNICODE_UTF8/wxUSE_UNICODE_UTF8 
1173 #ifndef __SCHAR_MAX__ 
1174 #define __SCHAR_MAX__ 127 
1178 wxString 
wxString::FromAscii(const char *ascii
, size_t len
) 
1180     if (!ascii 
|| len 
== 0) 
1181        return wxEmptyString
; 
1186         wxStringInternalBuffer 
buf(res
, len
); 
1187         wxStringCharType 
*dest 
= buf
; 
1189         for ( ; len 
> 0; --len 
) 
1191             unsigned char c 
= (unsigned char)*ascii
++; 
1192             wxASSERT_MSG( c 
< 0x80, 
1193                           wxT("Non-ASCII value passed to FromAscii().") ); 
1195             *dest
++ = (wchar_t)c
; 
1202 wxString 
wxString::FromAscii(const char *ascii
) 
1204     return FromAscii(ascii
, wxStrlen(ascii
)); 
1207 wxString 
wxString::FromAscii(char ascii
) 
1209     // What do we do with '\0' ? 
1211     unsigned char c 
= (unsigned char)ascii
; 
1213     wxASSERT_MSG( c 
< 0x80, wxT("Non-ASCII value passed to FromAscii().") ); 
1215     // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value 
1216     return wxString(wxUniChar((wchar_t)c
)); 
1219 const wxScopedCharBuffer 
wxString::ToAscii() const 
1221     // this will allocate enough space for the terminating NUL too 
1222     wxCharBuffer 
buffer(length()); 
1223     char *dest 
= buffer
.data(); 
1225     for ( const_iterator i 
= begin(); i 
!= end(); ++i 
) 
1228         // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?') 
1229         *dest
++ = c
.IsAscii() ? (char)c 
: '_'; 
1231         // the output string can't have embedded NULs anyhow, so we can safely 
1232         // stop at first of them even if we do have any 
1240 #endif // wxUSE_UNICODE 
1242 // extract string of length nCount starting at nFirst 
1243 wxString 
wxString::Mid(size_t nFirst
, size_t nCount
) const 
1245     size_t nLen 
= length(); 
1247     // default value of nCount is npos and means "till the end" 
1248     if ( nCount 
== npos 
) 
1250         nCount 
= nLen 
- nFirst
; 
1253     // out-of-bounds requests return sensible things 
1254     if ( nFirst 
+ nCount 
> nLen 
) 
1256         nCount 
= nLen 
- nFirst
; 
1259     if ( nFirst 
> nLen 
) 
1261         // AllocCopy() will return empty string 
1262         return wxEmptyString
; 
1265     wxString 
dest(*this, nFirst
, nCount
); 
1266     if ( dest
.length() != nCount 
) 
1268         wxFAIL_MSG( wxT("out of memory in wxString::Mid") ); 
1274 // check that the string starts with prefix and return the rest of the string 
1275 // in the provided pointer if it is not NULL, otherwise return false 
1276 bool wxString::StartsWith(const wxString
& prefix
, wxString 
*rest
) const 
1278     if ( compare(0, prefix
.length(), prefix
) != 0 ) 
1283         // put the rest of the string into provided pointer 
1284         rest
->assign(*this, prefix
.length(), npos
); 
1291 // check that the string ends with suffix and return the rest of it in the 
1292 // provided pointer if it is not NULL, otherwise return false 
1293 bool wxString::EndsWith(const wxString
& suffix
, wxString 
*rest
) const 
1295     int start 
= length() - suffix
.length(); 
1297     if ( start 
< 0 || compare(start
, npos
, suffix
) != 0 ) 
1302         // put the rest of the string into provided pointer 
1303         rest
->assign(*this, 0, start
); 
1310 // extract nCount last (rightmost) characters 
1311 wxString 
wxString::Right(size_t nCount
) const 
1313   if ( nCount 
> length() ) 
1316   wxString 
dest(*this, length() - nCount
, nCount
); 
1317   if ( dest
.length() != nCount 
) { 
1318     wxFAIL_MSG( wxT("out of memory in wxString::Right") ); 
1323 // get all characters after the last occurrence of ch 
1324 // (returns the whole string if ch not found) 
1325 wxString 
wxString::AfterLast(wxUniChar ch
) const 
1328   int iPos 
= Find(ch
, true); 
1329   if ( iPos 
== wxNOT_FOUND 
) 
1332     str
.assign(*this, iPos 
+ 1, npos
); 
1337 // extract nCount first (leftmost) characters 
1338 wxString 
wxString::Left(size_t nCount
) const 
1340   if ( nCount 
> length() ) 
1343   wxString 
dest(*this, 0, nCount
); 
1344   if ( dest
.length() != nCount 
) { 
1345     wxFAIL_MSG( wxT("out of memory in wxString::Left") ); 
1350 // get all characters before the first occurrence of ch 
1351 // (returns the whole string if ch not found) 
1352 wxString 
wxString::BeforeFirst(wxUniChar ch
, wxString 
*rest
) const 
1354   int iPos 
= Find(ch
); 
1355   if ( iPos 
== wxNOT_FOUND 
) 
1364       rest
->assign(*this, iPos 
+ 1, npos
); 
1367   return wxString(*this, 0, iPos
); 
1370 /// get all characters before the last occurrence of ch 
1371 /// (returns empty string if ch not found) 
1372 wxString 
wxString::BeforeLast(wxUniChar ch
, wxString 
*rest
) const 
1375   int iPos 
= Find(ch
, true); 
1376   if ( iPos 
!= wxNOT_FOUND 
) 
1379       str
.assign(*this, 0, iPos
); 
1382       rest
->assign(*this, iPos 
+ 1, npos
); 
1393 /// get all characters after the first occurrence of ch 
1394 /// (returns empty string if ch not found) 
1395 wxString 
wxString::AfterFirst(wxUniChar ch
) const 
1398   int iPos 
= Find(ch
); 
1399   if ( iPos 
!= wxNOT_FOUND 
) 
1400       str
.assign(*this, iPos 
+ 1, npos
); 
1405 // replace first (or all) occurrences of some substring with another one 
1406 size_t wxString::Replace(const wxString
& strOld
, 
1407                          const wxString
& strNew
, bool bReplaceAll
) 
1409     // if we tried to replace an empty string we'd enter an infinite loop below 
1410     wxCHECK_MSG( !strOld
.empty(), 0, 
1411                  wxT("wxString::Replace(): invalid parameter") ); 
1413     wxSTRING_INVALIDATE_CACHE(); 
1415     size_t uiCount 
= 0;   // count of replacements made 
1417     // optimize the special common case: replacement of one character by 
1418     // another one (in UTF-8 case we can only do this for ASCII characters) 
1420     // benchmarks show that this special version is around 3 times faster 
1421     // (depending on the proportion of matching characters and UTF-8/wchar_t 
1423     if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 ) 
1425         const wxStringCharType chOld 
= strOld
.m_impl
[0], 
1426                                chNew 
= strNew
.m_impl
[0]; 
1428         // this loop is the simplified version of the one below 
1429         for ( size_t pos 
= 0; ; ) 
1431             pos 
= m_impl
.find(chOld
, pos
); 
1435             m_impl
[pos
++] = chNew
; 
1443     else if ( !bReplaceAll
) 
1445         size_t pos 
= m_impl
.find(strOld
, 0); 
1448             m_impl
.replace(pos
, strOld
.m_impl
.length(), strNew
.m_impl
); 
1452     else // replace all occurrences 
1454         const size_t uiOldLen 
= strOld
.m_impl
.length(); 
1455         const size_t uiNewLen 
= strNew
.m_impl
.length(); 
1457         // first scan the string to find all positions at which the replacement 
1459         wxVector
<size_t> replacePositions
; 
1462         for ( pos 
= m_impl
.find(strOld
.m_impl
, 0); 
1464               pos 
= m_impl
.find(strOld
.m_impl
, pos 
+ uiOldLen
)) 
1466             replacePositions
.push_back(pos
); 
1473         // allocate enough memory for the whole new string 
1475         tmp
.m_impl
.reserve(m_impl
.length() + uiCount
*(uiNewLen 
- uiOldLen
)); 
1477         // copy this string to tmp doing replacements on the fly 
1479         for ( pos 
= 0; replNum 
< uiCount
; replNum
++ ) 
1481             const size_t nextReplPos 
= replacePositions
[replNum
]; 
1483             if ( pos 
!= nextReplPos 
) 
1485                 tmp
.m_impl
.append(m_impl
, pos
, nextReplPos 
- pos
); 
1488             tmp
.m_impl
.append(strNew
.m_impl
); 
1489             pos 
= nextReplPos 
+ uiOldLen
; 
1492         if ( pos 
!= m_impl
.length() ) 
1494             // append the rest of the string unchanged 
1495             tmp
.m_impl
.append(m_impl
, pos
, m_impl
.length() - pos
); 
1504 bool wxString::IsAscii() const 
1506     for ( const_iterator i 
= begin(); i 
!= end(); ++i 
) 
1508         if ( !(*i
).IsAscii() ) 
1515 bool wxString::IsWord() const 
1517     for ( const_iterator i 
= begin(); i 
!= end(); ++i 
) 
1519         if ( !wxIsalpha(*i
) ) 
1526 bool wxString::IsNumber() const 
1531     const_iterator i 
= begin(); 
1533     if ( *i 
== wxT('-') || *i 
== wxT('+') ) 
1536     for ( ; i 
!= end(); ++i 
) 
1538         if ( !wxIsdigit(*i
) ) 
1545 wxString 
wxString::Strip(stripType w
) const 
1548     if ( w 
& leading 
) s
.Trim(false); 
1549     if ( w 
& trailing 
) s
.Trim(true); 
1553 // --------------------------------------------------------------------------- 
1555 // --------------------------------------------------------------------------- 
1557 wxString
& wxString::MakeUpper() 
1559   for ( iterator it 
= begin(), en 
= end(); it 
!= en
; ++it 
) 
1560     *it 
= (wxChar
)wxToupper(*it
); 
1565 wxString
& wxString::MakeLower() 
1567   for ( iterator it 
= begin(), en 
= end(); it 
!= en
; ++it 
) 
1568     *it 
= (wxChar
)wxTolower(*it
); 
1573 wxString
& wxString::MakeCapitalized() 
1575     const iterator en 
= end(); 
1576     iterator it 
= begin(); 
1579         *it 
= (wxChar
)wxToupper(*it
); 
1580         for ( ++it
; it 
!= en
; ++it 
) 
1581             *it 
= (wxChar
)wxTolower(*it
); 
1587 // --------------------------------------------------------------------------- 
1588 // trimming and padding 
1589 // --------------------------------------------------------------------------- 
1591 // some compilers (VC++ 6.0 not to name them) return true for a call to 
1592 // isspace('\xEA') in the C locale which seems to be broken to me, but we have 
1593 // to live with this by checking that the character is a 7 bit one - even if 
1594 // this may fail to detect some spaces (I don't know if Unicode doesn't have 
1595 // space-like symbols somewhere except in the first 128 chars), it is arguably 
1596 // still better than trimming away accented letters 
1597 inline int wxSafeIsspace(wxChar ch
) { return (ch 
< 127) && wxIsspace(ch
); } 
1599 // trims spaces (in the sense of isspace) from left or right side 
1600 wxString
& wxString::Trim(bool bFromRight
) 
1602     // first check if we're going to modify the string at all 
1605           (bFromRight 
&& wxSafeIsspace(GetChar(length() - 1))) || 
1606           (!bFromRight 
&& wxSafeIsspace(GetChar(0u))) 
1612             // find last non-space character 
1613             reverse_iterator psz 
= rbegin(); 
1614             while ( (psz 
!= rend()) && wxSafeIsspace(*psz
) ) 
1617             // truncate at trailing space start 
1618             erase(psz
.base(), end()); 
1622             // find first non-space character 
1623             iterator psz 
= begin(); 
1624             while ( (psz 
!= end()) && wxSafeIsspace(*psz
) ) 
1627             // fix up data and length 
1628             erase(begin(), psz
); 
1635 // adds nCount characters chPad to the string from either side 
1636 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
) 
1638     wxString 
s(chPad
, nCount
); 
1651 // truncate the string 
1652 wxString
& wxString::Truncate(size_t uiLen
) 
1654     if ( uiLen 
< length() ) 
1656         erase(begin() + uiLen
, end()); 
1658     //else: nothing to do, string is already short enough 
1663 // --------------------------------------------------------------------------- 
1664 // finding (return wxNOT_FOUND if not found and index otherwise) 
1665 // --------------------------------------------------------------------------- 
1668 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const 
1670     size_type idx 
= bFromEnd 
? find_last_of(ch
) : find_first_of(ch
); 
1672     return (idx 
== npos
) ? wxNOT_FOUND 
: (int)idx
; 
1675 // ---------------------------------------------------------------------------- 
1676 // conversion to numbers 
1677 // ---------------------------------------------------------------------------- 
1679 // The implementation of all the functions below is exactly the same so factor 
1680 // it out. Note that number extraction works correctly on UTF-8 strings, so 
1681 // we can use wxStringCharType and wx_str() for maximum efficiency. 
1684     #define DO_IF_NOT_WINCE(x) x 
1686     #define DO_IF_NOT_WINCE(x) 
1689 #define WX_STRING_TO_X_TYPE_START                                           \ 
1690     wxCHECK_MSG( pVal, false, wxT("NULL output pointer") );                  \ 
1691     DO_IF_NOT_WINCE( errno = 0; )                                           \ 
1692     const wxStringCharType *start = wx_str();                               \ 
1693     wxStringCharType *end; 
1695 // notice that we return false without modifying the output parameter at all if 
1696 // nothing could be parsed but we do modify it and return false then if we did 
1697 // parse something successfully but not the entire string 
1698 #define WX_STRING_TO_X_TYPE_END                                             \ 
1699     if ( end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )                 \ 
1704 bool wxString::ToLong(long *pVal
, int base
) const 
1706     wxASSERT_MSG( !base 
|| (base 
> 1 && base 
<= 36), wxT("invalid base") ); 
1708     WX_STRING_TO_X_TYPE_START
 
1709     long val 
= wxStrtol(start
, &end
, base
); 
1710     WX_STRING_TO_X_TYPE_END
 
1713 bool wxString::ToULong(unsigned long *pVal
, int base
) const 
1715     wxASSERT_MSG( !base 
|| (base 
> 1 && base 
<= 36), wxT("invalid base") ); 
1717     WX_STRING_TO_X_TYPE_START
 
1718     unsigned long val 
= wxStrtoul(start
, &end
, base
); 
1719     WX_STRING_TO_X_TYPE_END
 
1722 bool wxString::ToLongLong(wxLongLong_t 
*pVal
, int base
) const 
1724     wxASSERT_MSG( !base 
|| (base 
> 1 && base 
<= 36), wxT("invalid base") ); 
1726     WX_STRING_TO_X_TYPE_START
 
1727     wxLongLong_t val 
= wxStrtoll(start
, &end
, base
); 
1728     WX_STRING_TO_X_TYPE_END
 
1731 bool wxString::ToULongLong(wxULongLong_t 
*pVal
, int base
) const 
1733     wxASSERT_MSG( !base 
|| (base 
> 1 && base 
<= 36), wxT("invalid base") ); 
1735     WX_STRING_TO_X_TYPE_START
 
1736     wxULongLong_t val 
= wxStrtoull(start
, &end
, base
); 
1737     WX_STRING_TO_X_TYPE_END
 
1740 bool wxString::ToDouble(double *pVal
) const 
1742     WX_STRING_TO_X_TYPE_START
 
1743     double val 
= wxStrtod(start
, &end
); 
1744     WX_STRING_TO_X_TYPE_END
 
1749 bool wxString::ToCLong(long *pVal
, int base
) const 
1751     wxASSERT_MSG( !base 
|| (base 
> 1 && base 
<= 36), wxT("invalid base") ); 
1753     WX_STRING_TO_X_TYPE_START
 
1754 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT) 
1755     long val 
= wxStrtol_lA(start
, &end
, base
, wxCLocale
); 
1757     long val 
= wxStrtol_l(start
, &end
, base
, wxCLocale
); 
1759     WX_STRING_TO_X_TYPE_END
 
1762 bool wxString::ToCULong(unsigned long *pVal
, int base
) const 
1764     wxASSERT_MSG( !base 
|| (base 
> 1 && base 
<= 36), wxT("invalid base") ); 
1766     WX_STRING_TO_X_TYPE_START
 
1767 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT) 
1768     unsigned long val 
= wxStrtoul_lA(start
, &end
, base
, wxCLocale
); 
1770     unsigned long val 
= wxStrtoul_l(start
, &end
, base
, wxCLocale
); 
1772     WX_STRING_TO_X_TYPE_END
 
1775 bool wxString::ToCDouble(double *pVal
) const 
1777     WX_STRING_TO_X_TYPE_START
 
1778 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT) 
1779     double val 
= wxStrtod_lA(start
, &end
, wxCLocale
); 
1781     double val 
= wxStrtod_l(start
, &end
, wxCLocale
); 
1783     WX_STRING_TO_X_TYPE_END
 
1786 #else // wxUSE_XLOCALE 
1788 // Provide implementation of these functions even when wxUSE_XLOCALE is 
1789 // disabled, we still need them in wxWidgets internal code. 
1791 // For integers we just assume the current locale uses the same number 
1792 // representation as the C one as there is nothing else we can do. 
1793 bool wxString::ToCLong(long *pVal
, int base
) const 
1795     return ToLong(pVal
, base
); 
1798 bool wxString::ToCULong(unsigned long *pVal
, int base
) const 
1800     return ToULong(pVal
, base
); 
1803 // For floating point numbers we have to handle the problem of the decimal 
1804 // point which is different in different locales. 
1805 bool wxString::ToCDouble(double *pVal
) const 
1807     // Create a copy of this string using the decimal point instead of whatever 
1808     // separator the current locale uses. 
1810     wxString sep 
= wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT
, 
1811                                      wxLOCALE_CAT_NUMBER
); 
1814         // We can avoid an unnecessary string copy in this case. 
1815         return ToDouble(pVal
); 
1817 #else // !wxUSE_INTL 
1818     // We don't know what the current separator is so it might even be a point 
1819     // already, try to parse the string as a double: 
1820     if ( ToDouble(pVal
) ) 
1822         // It must have been the point, nothing else to do. 
1826     // Try to guess the separator, using the most common alternative value. 
1828 #endif // wxUSE_INTL/!wxUSE_INTL 
1829     wxString 
cstr(*this); 
1830     cstr
.Replace(".", sep
); 
1832     return cstr
.ToDouble(pVal
); 
1835 #endif  // wxUSE_XLOCALE/!wxUSE_XLOCALE 
1837 // ---------------------------------------------------------------------------- 
1838 // number to string conversion 
1839 // ---------------------------------------------------------------------------- 
1842 wxString 
wxString::FromDouble(double val
, int precision
) 
1844     wxCHECK_MSG( precision 
>= -1, wxString(), "Invalid negative precision" ); 
1847     if ( precision 
== -1 ) 
1851     else // Use fixed precision. 
1853         format
.Printf("%%.%df", precision
); 
1856     return wxString::Format(format
, val
); 
1860 wxString 
wxString::FromCDouble(double val
, int precision
) 
1862     wxCHECK_MSG( precision 
>= -1, wxString(), "Invalid negative precision" ); 
1864 #if wxUSE_STD_IOSTREAM && wxUSE_STD_STRING 
1865     // We assume that we can use the ostream and not wstream for numbers. 
1866     wxSTD ostringstream os
; 
1867     if ( precision 
!= -1 ) 
1869         os
.precision(precision
); 
1870         os
.setf(std::ios::fixed
, std::ios::floatfield
); 
1875 #else // !wxUSE_STD_IOSTREAM 
1876     // Can't use iostream locale support, fall back to the manual method 
1878     wxString s 
= FromDouble(val
, precision
); 
1880     wxString sep 
= wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT
, 
1881                                      wxLOCALE_CAT_NUMBER
); 
1882 #else // !wxUSE_INTL 
1883     // As above, this is the most common alternative value. Notice that here it 
1884     // doesn't matter if we guess wrongly and the current separator is already 
1885     // ".": we'll just waste a call to Replace() in this case. 
1887 #endif // wxUSE_INTL/!wxUSE_INTL 
1889     s
.Replace(sep
, "."); 
1891 #endif // wxUSE_STD_IOSTREAM/!wxUSE_STD_IOSTREAM 
1894 // --------------------------------------------------------------------------- 
1896 // --------------------------------------------------------------------------- 
1898 #if !wxUSE_UTF8_LOCALE_ONLY 
1900 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN 
1901 wxString 
wxStringPrintfMixinBase::DoFormatWchar(const wxChar 
*format
, ...) 
1903 wxString 
wxString::DoFormatWchar(const wxChar 
*format
, ...) 
1907     va_start(argptr
, format
); 
1910     s
.PrintfV(format
, argptr
); 
1916 #endif // !wxUSE_UTF8_LOCALE_ONLY 
1918 #if wxUSE_UNICODE_UTF8 
1920 wxString 
wxString::DoFormatUtf8(const char *format
, ...) 
1923     va_start(argptr
, format
); 
1926     s
.PrintfV(format
, argptr
); 
1932 #endif // wxUSE_UNICODE_UTF8 
1935 wxString 
wxString::FormatV(const wxString
& format
, va_list argptr
) 
1938     s
.PrintfV(format
, argptr
); 
1942 #if !wxUSE_UTF8_LOCALE_ONLY 
1943 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN 
1944 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar 
*format
, ...) 
1946 int wxString::DoPrintfWchar(const wxChar 
*format
, ...) 
1950     va_start(argptr
, format
); 
1952 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN 
1953     // get a pointer to the wxString instance; we have to use dynamic_cast<> 
1954     // because it's the only cast that works safely for downcasting when 
1955     // multiple inheritance is used: 
1956     wxString 
*str 
= static_cast<wxString
*>(this); 
1958     wxString 
*str 
= this; 
1961     int iLen 
= str
->PrintfV(format
, argptr
); 
1967 #endif // !wxUSE_UTF8_LOCALE_ONLY 
1969 #if wxUSE_UNICODE_UTF8 
1970 int wxString::DoPrintfUtf8(const char *format
, ...) 
1973     va_start(argptr
, format
); 
1975     int iLen 
= PrintfV(format
, argptr
); 
1981 #endif // wxUSE_UNICODE_UTF8 
1984     Uses wxVsnprintf and places the result into the this string. 
1986     In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build 
1987     it is vswprintf.  Due to a discrepancy between vsnprintf and vswprintf in 
1988     the ISO C99 (and thus SUSv3) standard the return value for the case of 
1989     an undersized buffer is inconsistent.  For conforming vsnprintf 
1990     implementations the function must return the number of characters that 
1991     would have been printed had the buffer been large enough.  For conforming 
1992     vswprintf implementations the function must return a negative number 
1995     What vswprintf sets errno to is undefined but Darwin seems to set it to 
1996     EOVERFLOW.  The only expected errno are EILSEQ and EINVAL.  Both of 
1997     those are defined in the standard and backed up by several conformance 
1998     statements.  Note that ENOMEM mentioned in the manual page does not 
1999     apply to swprintf, only wprintf and fwprintf. 
2001     Official manual page: 
2002     http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html 
2004     Some conformance statements (AIX, Solaris): 
2005     http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3 
2006     http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10 
2008     Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since 
2009     EILSEQ and EINVAL are specifically defined to mean the error is other than 
2010     an undersized buffer and no other errno are defined we treat those two 
2011     as meaning hard errors and everything else gets the old behaviour which 
2012     is to keep looping and increasing buffer size until the function succeeds. 
2014     In practice it's impossible to determine before compilation which behaviour 
2015     may be used.  The vswprintf function may have vsnprintf-like behaviour or 
2016     vice-versa.  Behaviour detected on one release can theoretically change 
2017     with an updated release.  Not to mention that configure testing for it 
2018     would require the test to be run on the host system, not the build system 
2019     which makes cross compilation difficult. Therefore, we make no assumptions 
2020     about behaviour and try our best to handle every known case, including the 
2021     case where wxVsnprintf returns a negative number and fails to set errno. 
2023     There is yet one more non-standard implementation and that is our own. 
2024     Fortunately, that can be detected at compile-time. 
2026     On top of all that, ISO C99 explicitly defines snprintf to write a null 
2027     character to the last position of the specified buffer.  That would be at 
2028     at the given buffer size minus 1.  It is supposed to do this even if it 
2029     turns out that the buffer is sized too small. 
2031     Darwin (tested on 10.5) follows the C99 behaviour exactly. 
2033     Glibc 2.6 almost follows the C99 behaviour except vswprintf never sets 
2034     errno even when it fails.  However, it only seems to ever fail due 
2035     to an undersized buffer. 
2037 #if wxUSE_UNICODE_UTF8 
2038 template<typename BufferType
> 
2040 // we only need one version in non-UTF8 builds and at least two Windows 
2041 // compilers have problems with this function template, so use just one 
2042 // normal function here 
2044 static int DoStringPrintfV(wxString
& str
, 
2045                            const wxString
& format
, va_list argptr
) 
2051 #if wxUSE_UNICODE_UTF8 
2052         BufferType 
tmp(str
, size 
+ 1); 
2053         typename 
BufferType::CharType 
*buf 
= tmp
; 
2055         wxStringBuffer 
tmp(str
, size 
+ 1); 
2065         // wxVsnprintf() may modify the original arg pointer, so pass it 
2068         wxVaCopy(argptrcopy
, argptr
); 
2071         // Set errno to 0 to make it determinate if wxVsnprintf fails to set it. 
2074         int len 
= wxVsnprintf(buf
, size
, format
, argptrcopy
); 
2077         // some implementations of vsnprintf() don't NUL terminate 
2078         // the string if there is not enough space for it so 
2079         // always do it manually 
2080         // FIXME: This really seems to be the wrong and would be an off-by-one 
2081         // bug except the code above allocates an extra character. 
2082         buf
[size
] = wxT('\0'); 
2084         // vsnprintf() may return either -1 (traditional Unix behaviour) or the 
2085         // total number of characters which would have been written if the 
2086         // buffer were large enough (newer standards such as Unix98) 
2089             // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or 
2090             //     wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF 
2091             //     is true if *both* of them use our own implementation, 
2092             //     otherwise we can't be sure 
2093 #if wxUSE_WXVSNPRINTF 
2094             // we know that our own implementation of wxVsnprintf() returns -1 
2095             // only for a format error - thus there's something wrong with 
2096             // the user's format string 
2099 #else // possibly using system version 
2100             // assume it only returns error if there is not enough space, but 
2101             // as we don't know how much we need, double the current size of 
2104             if( (errno 
== EILSEQ
) || (errno 
== EINVAL
) ) 
2105             // If errno was set to one of the two well-known hard errors 
2106             // then fail immediately to avoid an infinite loop. 
2109 #endif // __WXWINCE__ 
2110             // still not enough, as we don't know how much we need, double the 
2111             // current size of the buffer 
2113 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF 
2115         else if ( len 
>= size 
) 
2117 #if wxUSE_WXVSNPRINTF 
2118             // we know that our own implementation of wxVsnprintf() returns 
2119             // size+1 when there's not enough space but that's not the size 
2120             // of the required buffer! 
2121             size 
*= 2;      // so we just double the current size of the buffer 
2123             // some vsnprintf() implementations NUL-terminate the buffer and 
2124             // some don't in len == size case, to be safe always add 1 
2125             // FIXME: I don't quite understand this comment.  The vsnprintf 
2126             // function is specifically defined to return the number of 
2127             // characters printed not including the null terminator. 
2128             // So OF COURSE you need to add 1 to get the right buffer size. 
2129             // The following line is definitely correct, no question. 
2133         else // ok, there was enough space 
2139     // we could have overshot 
2142     return str
.length(); 
2145 int wxString::PrintfV(const wxString
& format
, va_list argptr
) 
2147 #if wxUSE_UNICODE_UTF8 
2148     #if wxUSE_STL_BASED_WXSTRING 
2149         typedef wxStringTypeBuffer
<char> Utf8Buffer
; 
2151         typedef wxStringInternalBuffer Utf8Buffer
; 
2155 #if wxUSE_UTF8_LOCALE_ONLY 
2156     return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
); 
2158     #if wxUSE_UNICODE_UTF8 
2159     if ( wxLocaleIsUtf8 
) 
2160         return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
); 
2163         return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
); 
2165         return DoStringPrintfV(*this, format
, argptr
); 
2166     #endif // UTF8/WCHAR 
2170 // ---------------------------------------------------------------------------- 
2171 // misc other operations 
2172 // ---------------------------------------------------------------------------- 
2174 // returns true if the string matches the pattern which may contain '*' and 
2175 // '?' metacharacters (as usual, '?' matches any character and '*' any number 
2177 bool wxString::Matches(const wxString
& mask
) const 
2179     // I disable this code as it doesn't seem to be faster (in fact, it seems 
2180     // to be much slower) than the old, hand-written code below and using it 
2181     // here requires always linking with libregex even if the user code doesn't 
2183 #if 0 // wxUSE_REGEX 
2184     // first translate the shell-like mask into a regex 
2186     pattern
.reserve(wxStrlen(pszMask
)); 
2188     pattern 
+= wxT('^'); 
2194                 pattern 
+= wxT('.'); 
2198                 pattern 
+= wxT(".*"); 
2209                 // these characters are special in a RE, quote them 
2210                 // (however note that we don't quote '[' and ']' to allow 
2211                 // using them for Unix shell like matching) 
2212                 pattern 
+= wxT('\\'); 
2216                 pattern 
+= *pszMask
; 
2221     pattern 
+= wxT('$'); 
2224     return wxRegEx(pattern
, wxRE_NOSUB 
| wxRE_EXTENDED
).Matches(c_str()); 
2225 #else // !wxUSE_REGEX 
2226   // TODO: this is, of course, awfully inefficient... 
2228   // FIXME-UTF8: implement using iterators, remove #if 
2229 #if wxUSE_UNICODE_UTF8 
2230   const wxScopedWCharBuffer maskBuf 
= mask
.wc_str(); 
2231   const wxScopedWCharBuffer txtBuf 
= wc_str(); 
2232   const wxChar 
*pszMask 
= maskBuf
.data(); 
2233   const wxChar 
*pszTxt 
= txtBuf
.data(); 
2235   const wxChar 
*pszMask 
= mask
.wx_str(); 
2236   // the char currently being checked 
2237   const wxChar 
*pszTxt 
= wx_str(); 
2240   // the last location where '*' matched 
2241   const wxChar 
*pszLastStarInText 
= NULL
; 
2242   const wxChar 
*pszLastStarInMask 
= NULL
; 
2245   for ( ; *pszMask 
!= wxT('\0'); pszMask
++, pszTxt
++ ) { 
2246     switch ( *pszMask 
) { 
2248         if ( *pszTxt 
== wxT('\0') ) 
2251         // pszTxt and pszMask will be incremented in the loop statement 
2257           // remember where we started to be able to backtrack later 
2258           pszLastStarInText 
= pszTxt
; 
2259           pszLastStarInMask 
= pszMask
; 
2261           // ignore special chars immediately following this one 
2262           // (should this be an error?) 
2263           while ( *pszMask 
== wxT('*') || *pszMask 
== wxT('?') ) 
2266           // if there is nothing more, match 
2267           if ( *pszMask 
== wxT('\0') ) 
2270           // are there any other metacharacters in the mask? 
2272           const wxChar 
*pEndMask 
= wxStrpbrk(pszMask
, wxT("*?")); 
2274           if ( pEndMask 
!= NULL 
) { 
2275             // we have to match the string between two metachars 
2276             uiLenMask 
= pEndMask 
- pszMask
; 
2279             // we have to match the remainder of the string 
2280             uiLenMask 
= wxStrlen(pszMask
); 
2283           wxString 
strToMatch(pszMask
, uiLenMask
); 
2284           const wxChar
* pMatch 
= wxStrstr(pszTxt
, strToMatch
); 
2285           if ( pMatch 
== NULL 
) 
2288           // -1 to compensate "++" in the loop 
2289           pszTxt 
= pMatch 
+ uiLenMask 
- 1; 
2290           pszMask 
+= uiLenMask 
- 1; 
2295         if ( *pszMask 
!= *pszTxt 
) 
2301   // match only if nothing left 
2302   if ( *pszTxt 
== wxT('\0') ) 
2305   // if we failed to match, backtrack if we can 
2306   if ( pszLastStarInText 
) { 
2307     pszTxt 
= pszLastStarInText 
+ 1; 
2308     pszMask 
= pszLastStarInMask
; 
2310     pszLastStarInText 
= NULL
; 
2312     // don't bother resetting pszLastStarInMask, it's unnecessary 
2318 #endif // wxUSE_REGEX/!wxUSE_REGEX 
2321 // Count the number of chars 
2322 int wxString::Freq(wxUniChar ch
) const 
2325     for ( const_iterator i 
= begin(); i 
!= end(); ++i 
)