1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
39 #include "wx/hashmap.h"
40 #include "wx/vector.h"
41 #include "wx/xlocale.h"
44 #include "wx/msw/wrapwin.h"
47 // string handling functions used by wxString:
48 #if wxUSE_UNICODE_UTF8
49 #define wxStringMemcpy memcpy
50 #define wxStringMemcmp memcmp
51 #define wxStringMemchr memchr
52 #define wxStringStrlen strlen
54 #define wxStringMemcpy wxTmemcpy
55 #define wxStringMemcmp wxTmemcmp
56 #define wxStringMemchr wxTmemchr
57 #define wxStringStrlen wxStrlen
60 // ----------------------------------------------------------------------------
62 // ----------------------------------------------------------------------------
67 static UntypedBufferData
s_untypedNullData(NULL
, 0);
69 UntypedBufferData
* const untypedNullDataPtr
= &s_untypedNullData
;
71 } // namespace wxPrivate
73 // ---------------------------------------------------------------------------
74 // static class variables definition
75 // ---------------------------------------------------------------------------
77 //According to STL _must_ be a -1 size_t
78 const size_t wxString::npos
= (size_t) -1;
80 #if wxUSE_STRING_POS_CACHE
82 #ifdef wxHAS_COMPILER_TLS
84 wxTLS_TYPE(wxString::Cache
) wxString::ms_cache
;
86 #else // !wxHAS_COMPILER_TLS
88 struct wxStrCacheInitializer
90 wxStrCacheInitializer()
92 // calling this function triggers s_cache initialization in it, and
93 // from now on it becomes safe to call from multiple threads
99 wxString::Cache& wxString::GetCache()
101 static wxTLS_TYPE(Cache) s_cache;
103 return wxTLS_VALUE(s_cache);
107 static wxStrCacheInitializer gs_stringCacheInit
;
109 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
111 // gdb seems to be unable to display thread-local variables correctly, at least
112 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
113 #if wxDEBUG_LEVEL >= 2
115 struct wxStrCacheDumper
117 static void ShowAll()
119 puts("*** wxString cache dump:");
120 for ( unsigned n
= 0; n
< wxString::Cache::SIZE
; n
++ )
122 const wxString::Cache::Element
&
123 c
= wxString::GetCacheBegin()[n
];
125 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
127 n
== wxString::LastUsedCacheElement() ? " [*]" : "",
129 (unsigned long)c
.pos
,
130 (unsigned long)c
.impl
,
136 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
138 #endif // wxDEBUG_LEVEL >= 2
140 #ifdef wxPROFILE_STRING_CACHE
142 wxString::CacheStats
wxString::ms_cacheStats
;
144 struct wxStrCacheStatsDumper
146 ~wxStrCacheStatsDumper()
148 const wxString::CacheStats
& stats
= wxString::ms_cacheStats
;
152 puts("*** wxString cache statistics:");
153 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
155 printf("\tHits %u (of which %u not used) or %.2f%%\n",
158 100.*float(stats
.poshits
- stats
.mishits
)/stats
.postot
);
159 printf("\tAverage position requested: %.2f\n",
160 float(stats
.sumpos
) / stats
.postot
);
161 printf("\tAverage offset after cached hint: %.2f\n",
162 float(stats
.sumofs
) / stats
.postot
);
167 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
168 stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
);
173 static wxStrCacheStatsDumper s_showCacheStats
;
175 #endif // wxPROFILE_STRING_CACHE
177 #endif // wxUSE_STRING_POS_CACHE
179 // ----------------------------------------------------------------------------
181 // ----------------------------------------------------------------------------
183 #if wxUSE_STD_IOSTREAM
187 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
189 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
190 const wxScopedCharBuffer
buf(str
.AsCharBuf());
192 os
.clear(wxSTD
ios_base::failbit
);
198 return os
<< str
.AsInternal();
202 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
204 return os
<< str
.c_str();
207 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedCharBuffer
& str
)
209 return os
<< str
.data();
213 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedWCharBuffer
& str
)
215 return os
<< str
.data();
219 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
221 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
223 return wos
<< str
.wc_str();
226 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
228 return wos
<< str
.AsWChar();
231 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxScopedWCharBuffer
& str
)
233 return wos
<< str
.data();
236 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
238 #endif // wxUSE_STD_IOSTREAM
240 // ===========================================================================
241 // wxString class core
242 // ===========================================================================
244 #if wxUSE_UNICODE_UTF8
246 void wxString::PosLenToImpl(size_t pos
, size_t len
,
247 size_t *implPos
, size_t *implLen
) const
253 else // have valid start position
255 const const_iterator b
= GetIterForNthChar(pos
);
256 *implPos
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin();
261 else // have valid length too
263 // we need to handle the case of length specifying a substring
264 // going beyond the end of the string, just as std::string does
265 const const_iterator
e(end());
267 while ( len
&& i
<= e
)
273 *implLen
= i
.impl() - b
.impl();
278 #endif // wxUSE_UNICODE_UTF8
280 // ----------------------------------------------------------------------------
281 // wxCStrData converted strings caching
282 // ----------------------------------------------------------------------------
284 // FIXME-UTF8: temporarily disabled because it doesn't work with global
285 // string objects; re-enable after fixing this bug and benchmarking
286 // performance to see if using a hash is a good idea at all
289 // For backward compatibility reasons, it must be possible to assign the value
290 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
291 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
292 // because the memory would be freed immediately, but it has to be valid as long
293 // as the string is not modified, so that code like this still works:
295 // const wxChar *s = str.c_str();
296 // while ( s ) { ... }
298 // FIXME-UTF8: not thread safe!
299 // FIXME-UTF8: we currently clear the cached conversion only when the string is
300 // destroyed, but we should do it when the string is modified, to
301 // keep memory usage down
302 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
303 // invalidated the cache on every change, we could keep the previous
305 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
306 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
309 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
311 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
312 if ( i
!= hash
.end() )
320 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
321 // so we have to use wxString* here and const-cast when used
322 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
323 wxStringCharConversionCache
);
324 static wxStringCharConversionCache gs_stringsCharCache
;
326 const char* wxCStrData::AsChar() const
328 // remove previously cache value, if any (see FIXMEs above):
329 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
331 // convert the string and keep it:
332 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
333 m_str
->mb_str().release();
337 #endif // wxUSE_UNICODE
339 #if !wxUSE_UNICODE_WCHAR
340 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
341 wxStringWCharConversionCache
);
342 static wxStringWCharConversionCache gs_stringsWCharCache
;
344 const wchar_t* wxCStrData::AsWChar() const
346 // remove previously cache value, if any (see FIXMEs above):
347 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
349 // convert the string and keep it:
350 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
351 m_str
->wc_str().release();
355 #endif // !wxUSE_UNICODE_WCHAR
357 wxString::~wxString()
360 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
361 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
363 #if !wxUSE_UNICODE_WCHAR
364 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
369 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
370 const char* wxCStrData::AsChar() const
372 #if wxUSE_UNICODE_UTF8
373 if ( wxLocaleIsUtf8
)
376 // under non-UTF8 locales, we have to convert the internal UTF-8
377 // representation using wxConvLibc and cache the result
379 wxString
*str
= wxConstCast(m_str
, wxString
);
381 // convert the string:
383 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
384 // have it) but it's unfortunately not obvious to implement
385 // because we don't know how big buffer do we need for the
386 // given string length (in case of multibyte encodings, e.g.
387 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
389 // One idea would be to store more than just m_convertedToChar
390 // in wxString: then we could record the length of the string
391 // which was converted the last time and try to reuse the same
392 // buffer if the current length is not greater than it (this
393 // could still fail because string could have been modified in
394 // place but it would work most of the time, so we'd do it and
395 // only allocate the new buffer if in-place conversion returned
396 // an error). We could also store a bit saying if the string
397 // was modified since the last conversion (and update it in all
398 // operation modifying the string, of course) to avoid unneeded
399 // consequential conversions. But both of these ideas require
400 // adding more fields to wxString and require profiling results
401 // to be sure that we really gain enough from them to justify
403 wxScopedCharBuffer
buf(str
->mb_str());
405 // if it failed, return empty string and not NULL to avoid crashes in code
406 // written with either wxWidgets 2 wxString or std::string behaviour in
407 // mind: neither of them ever returns NULL and so we shouldn't neither
411 if ( str
->m_convertedToChar
&&
412 strlen(buf
) == strlen(str
->m_convertedToChar
) )
414 // keep the same buffer for as long as possible, so that several calls
415 // to c_str() in a row still work:
416 strcpy(str
->m_convertedToChar
, buf
);
420 str
->m_convertedToChar
= buf
.release();
424 return str
->m_convertedToChar
+ m_offset
;
426 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
428 #if !wxUSE_UNICODE_WCHAR
429 const wchar_t* wxCStrData::AsWChar() const
431 wxString
*str
= wxConstCast(m_str
, wxString
);
433 // convert the string:
434 wxScopedWCharBuffer
buf(str
->wc_str());
436 // notice that here, unlike above in AsChar(), conversion can't fail as our
437 // internal UTF-8 is always well-formed -- or the string was corrupted and
438 // all bets are off anyhow
440 // FIXME-UTF8: do the conversion in-place in the existing buffer
441 if ( str
->m_convertedToWChar
&&
442 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
444 // keep the same buffer for as long as possible, so that several calls
445 // to c_str() in a row still work:
446 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
450 str
->m_convertedToWChar
= buf
.release();
454 return str
->m_convertedToWChar
+ m_offset
;
456 #endif // !wxUSE_UNICODE_WCHAR
458 // ===========================================================================
459 // wxString class core
460 // ===========================================================================
462 // ---------------------------------------------------------------------------
463 // construction and conversion
464 // ---------------------------------------------------------------------------
466 #if wxUSE_UNICODE_WCHAR
468 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
469 const wxMBConv
& conv
)
472 if ( !psz
|| nLength
== 0 )
473 return SubstrBufFromMB(wxWCharBuffer(L
""), 0);
475 if ( nLength
== npos
)
479 wxScopedWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
481 return SubstrBufFromMB(wxWCharBuffer(L
""), 0);
483 return SubstrBufFromMB(wcBuf
, wcLen
);
485 #endif // wxUSE_UNICODE_WCHAR
487 #if wxUSE_UNICODE_UTF8
489 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
490 const wxMBConv
& conv
)
493 if ( !psz
|| nLength
== 0 )
494 return SubstrBufFromMB(wxCharBuffer(""), 0);
496 // if psz is already in UTF-8, we don't have to do the roundtrip to
497 // wchar_t* and back:
500 // we need to validate the input because UTF8 iterators assume valid
501 // UTF-8 sequence and psz may be invalid:
502 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
504 // we must pass the real string length to SubstrBufFromMB ctor
505 if ( nLength
== npos
)
506 nLength
= psz
? strlen(psz
) : 0;
507 return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz
, nLength
),
510 // else: do the roundtrip through wchar_t*
513 if ( nLength
== npos
)
516 // first convert to wide string:
518 wxScopedWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
520 return SubstrBufFromMB(wxCharBuffer(""), 0);
522 // and then to UTF-8:
523 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
524 // widechar -> UTF-8 conversion isn't supposed to ever fail:
525 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
529 #endif // wxUSE_UNICODE_UTF8
531 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
533 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
534 const wxMBConv
& conv
)
537 if ( !pwz
|| nLength
== 0 )
538 return SubstrBufFromWC(wxCharBuffer(""), 0);
540 if ( nLength
== npos
)
544 wxScopedCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
546 return SubstrBufFromWC(wxCharBuffer(""), 0);
548 return SubstrBufFromWC(mbBuf
, mbLen
);
550 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
553 #if wxUSE_UNICODE_WCHAR
555 //Convert wxString in Unicode mode to a multi-byte string
556 const wxScopedCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
558 // NB: Length passed to cWC2MB() doesn't include terminating NUL, it's
559 // added by it automatically. If we passed length()+1 here, it would
560 // create a buffer with 2 trailing NULs of length one greater than
562 return conv
.cWC2MB(wx_str(), length(), NULL
);
565 #elif wxUSE_UNICODE_UTF8
567 const wxScopedWCharBuffer
wxString::wc_str() const
569 // NB: Length passed to cMB2WC() doesn't include terminating NUL, it's
570 // added by it automatically. If we passed length()+1 here, it would
571 // create a buffer with 2 trailing NULs of length one greater than
573 return wxMBConvStrictUTF8().cMB2WC
581 const wxScopedCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
584 return wxScopedCharBuffer::CreateNonOwned(m_impl
.c_str(), m_impl
.length());
586 wxScopedWCharBuffer
wcBuf(wc_str());
587 if ( !wcBuf
.length() )
588 return wxCharBuffer("");
590 return conv
.cWC2MB(wcBuf
.data(), wcBuf
.length(), NULL
);
595 //Converts this string to a wide character string if unicode
596 //mode is not enabled and wxUSE_WCHAR_T is enabled
597 const wxScopedWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
599 // NB: Length passed to cMB2WC() doesn't include terminating NUL, it's
600 // added by it automatically. If we passed length()+1 here, it would
601 // create a buffer with 2 trailing NULs of length one greater than
603 return conv
.cMB2WC(wx_str(), length(), NULL
);
606 #endif // Unicode/ANSI
608 // shrink to minimal size (releasing extra memory)
609 bool wxString::Shrink()
611 wxString
tmp(begin(), end());
613 return tmp
.length() == length();
616 // deprecated compatibility code:
617 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
618 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
620 return DoGetWriteBuf(nLen
);
623 void wxString::UngetWriteBuf()
628 void wxString::UngetWriteBuf(size_t nLen
)
630 DoUngetWriteBuf(nLen
);
632 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
635 // ---------------------------------------------------------------------------
637 // ---------------------------------------------------------------------------
639 // all functions are inline in string.h
641 // ---------------------------------------------------------------------------
642 // concatenation operators
643 // ---------------------------------------------------------------------------
646 * concatenation functions come in 5 flavours:
648 * char + string and string + char
649 * C str + string and string + C str
652 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
654 #if !wxUSE_STL_BASED_WXSTRING
655 wxASSERT( str1
.IsValid() );
656 wxASSERT( str2
.IsValid() );
665 wxString
operator+(const wxString
& str
, wxUniChar ch
)
667 #if !wxUSE_STL_BASED_WXSTRING
668 wxASSERT( str
.IsValid() );
677 wxString
operator+(wxUniChar ch
, const wxString
& str
)
679 #if !wxUSE_STL_BASED_WXSTRING
680 wxASSERT( str
.IsValid() );
689 wxString
operator+(const wxString
& str
, const char *psz
)
691 #if !wxUSE_STL_BASED_WXSTRING
692 wxASSERT( str
.IsValid() );
696 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
697 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
705 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
707 #if !wxUSE_STL_BASED_WXSTRING
708 wxASSERT( str
.IsValid() );
712 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
713 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
721 wxString
operator+(const char *psz
, const wxString
& str
)
723 #if !wxUSE_STL_BASED_WXSTRING
724 wxASSERT( str
.IsValid() );
728 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
729 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
737 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
739 #if !wxUSE_STL_BASED_WXSTRING
740 wxASSERT( str
.IsValid() );
744 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
745 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
753 // ---------------------------------------------------------------------------
755 // ---------------------------------------------------------------------------
757 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
759 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
760 : wxToupper(GetChar(0u)) == wxToupper(c
));
763 #ifdef HAVE_STD_STRING_COMPARE
765 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
766 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
767 // sort strings in characters code point order by sorting the byte sequence
768 // in byte values order (i.e. what strcmp() and memcmp() do).
770 int wxString::compare(const wxString
& str
) const
772 return m_impl
.compare(str
.m_impl
);
775 int wxString::compare(size_t nStart
, size_t nLen
,
776 const wxString
& str
) const
779 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
780 return m_impl
.compare(pos
, len
, str
.m_impl
);
783 int wxString::compare(size_t nStart
, size_t nLen
,
785 size_t nStart2
, size_t nLen2
) const
788 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
791 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
793 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
796 int wxString::compare(const char* sz
) const
798 return m_impl
.compare(ImplStr(sz
));
801 int wxString::compare(const wchar_t* sz
) const
803 return m_impl
.compare(ImplStr(sz
));
806 int wxString::compare(size_t nStart
, size_t nLen
,
807 const char* sz
, size_t nCount
) const
810 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
812 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
814 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
817 int wxString::compare(size_t nStart
, size_t nLen
,
818 const wchar_t* sz
, size_t nCount
) const
821 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
823 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
825 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
828 #else // !HAVE_STD_STRING_COMPARE
830 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
831 const wxStringCharType
* s2
, size_t l2
)
834 return wxStringMemcmp(s1
, s2
, l1
);
837 int ret
= wxStringMemcmp(s1
, s2
, l1
);
838 return ret
== 0 ? -1 : ret
;
842 int ret
= wxStringMemcmp(s1
, s2
, l2
);
843 return ret
== 0 ? +1 : ret
;
847 int wxString::compare(const wxString
& str
) const
849 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
850 str
.m_impl
.data(), str
.m_impl
.length());
853 int wxString::compare(size_t nStart
, size_t nLen
,
854 const wxString
& str
) const
856 wxASSERT(nStart
<= length());
857 size_type strLen
= length() - nStart
;
858 nLen
= strLen
< nLen
? strLen
: nLen
;
861 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
863 return ::wxDoCmp(m_impl
.data() + pos
, len
,
864 str
.m_impl
.data(), str
.m_impl
.length());
867 int wxString::compare(size_t nStart
, size_t nLen
,
869 size_t nStart2
, size_t nLen2
) const
871 wxASSERT(nStart
<= length());
872 wxASSERT(nStart2
<= str
.length());
873 size_type strLen
= length() - nStart
,
874 strLen2
= str
.length() - nStart2
;
875 nLen
= strLen
< nLen
? strLen
: nLen
;
876 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
879 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
881 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
883 return ::wxDoCmp(m_impl
.data() + pos
, len
,
884 str
.m_impl
.data() + pos2
, len2
);
887 int wxString::compare(const char* sz
) const
889 SubstrBufFromMB
str(ImplStr(sz
, npos
));
890 if ( str
.len
== npos
)
891 str
.len
= wxStringStrlen(str
.data
);
892 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
895 int wxString::compare(const wchar_t* sz
) const
897 SubstrBufFromWC
str(ImplStr(sz
, npos
));
898 if ( str
.len
== npos
)
899 str
.len
= wxStringStrlen(str
.data
);
900 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
903 int wxString::compare(size_t nStart
, size_t nLen
,
904 const char* sz
, size_t nCount
) const
906 wxASSERT(nStart
<= length());
907 size_type strLen
= length() - nStart
;
908 nLen
= strLen
< nLen
? strLen
: nLen
;
911 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
913 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
914 if ( str
.len
== npos
)
915 str
.len
= wxStringStrlen(str
.data
);
917 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
920 int wxString::compare(size_t nStart
, size_t nLen
,
921 const wchar_t* sz
, size_t nCount
) const
923 wxASSERT(nStart
<= length());
924 size_type strLen
= length() - nStart
;
925 nLen
= strLen
< nLen
? strLen
: nLen
;
928 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
930 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
931 if ( str
.len
== npos
)
932 str
.len
= wxStringStrlen(str
.data
);
934 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
937 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
940 // ---------------------------------------------------------------------------
941 // find_{first,last}_[not]_of functions
942 // ---------------------------------------------------------------------------
944 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
946 // NB: All these functions are implemented with the argument being wxChar*,
947 // i.e. widechar string in any Unicode build, even though native string
948 // representation is char* in the UTF-8 build. This is because we couldn't
949 // use memchr() to determine if a character is in a set encoded as UTF-8.
951 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
953 return find_first_of(sz
, nStart
, wxStrlen(sz
));
956 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
958 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
961 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
963 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
966 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
968 if ( wxTmemchr(sz
, *i
, n
) )
975 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
977 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
980 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
982 if ( !wxTmemchr(sz
, *i
, n
) )
990 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
992 return find_last_of(sz
, nStart
, wxStrlen(sz
));
995 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
997 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
1000 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
1002 size_t len
= length();
1004 if ( nStart
== npos
)
1010 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1013 size_t idx
= nStart
;
1014 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1015 i
!= rend(); --idx
, ++i
)
1017 if ( wxTmemchr(sz
, *i
, n
) )
1024 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
1026 size_t len
= length();
1028 if ( nStart
== npos
)
1034 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1037 size_t idx
= nStart
;
1038 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1039 i
!= rend(); --idx
, ++i
)
1041 if ( !wxTmemchr(sz
, *i
, n
) )
1048 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
1050 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
1052 size_t idx
= nStart
;
1053 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1062 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1064 size_t len
= length();
1066 if ( nStart
== npos
)
1072 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1075 size_t idx
= nStart
;
1076 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1077 i
!= rend(); --idx
, ++i
)
1086 // the functions above were implemented for wchar_t* arguments in Unicode
1087 // build and char* in ANSI build; below are implementations for the other
1090 #define wxOtherCharType char
1091 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1093 #define wxOtherCharType wchar_t
1094 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1097 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1098 { return find_first_of(STRCONV(sz
), nStart
); }
1100 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1102 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1103 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1104 { return find_last_of(STRCONV(sz
), nStart
); }
1105 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1107 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1108 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1109 { return find_first_not_of(STRCONV(sz
), nStart
); }
1110 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1112 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1113 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1114 { return find_last_not_of(STRCONV(sz
), nStart
); }
1115 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1117 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1119 #undef wxOtherCharType
1122 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1124 // ===========================================================================
1125 // other common string functions
1126 // ===========================================================================
1128 int wxString::CmpNoCase(const wxString
& s
) const
1130 #if defined(__WXMSW__) && !wxUSE_UNICODE_UTF8
1131 // prefer to use CompareString() if available as it's more efficient than
1132 // doing it manual or even using wxStricmp() (see #10375)
1133 switch ( ::CompareString(LOCALE_USER_DEFAULT
, NORM_IGNORECASE
,
1134 m_impl
.c_str(), m_impl
.length(),
1135 s
.m_impl
.c_str(), s
.m_impl
.length()) )
1137 case CSTR_LESS_THAN
:
1143 case CSTR_GREATER_THAN
:
1147 wxFAIL_MSG( "unexpected CompareString() return value" );
1151 wxLogLastError("CompareString");
1152 // use generic code below
1154 #endif // __WXMSW__ && !wxUSE_UNICODE_UTF8
1156 // do the comparison manually: notice that we can't use wxStricmp() as it
1157 // doesn't handle embedded NULs
1159 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1160 const_iterator i1
= begin();
1161 const_iterator end1
= end();
1162 const_iterator i2
= s
.begin();
1163 const_iterator end2
= s
.end();
1165 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1167 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1168 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1169 if ( lower1
!= lower2
)
1170 return lower1
< lower2
? -1 : 1;
1173 size_t len1
= length();
1174 size_t len2
= s
.length();
1178 else if ( len1
> len2
)
1187 #ifndef __SCHAR_MAX__
1188 #define __SCHAR_MAX__ 127
1192 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1194 if (!ascii
|| len
== 0)
1195 return wxEmptyString
;
1200 wxStringInternalBuffer
buf(res
, len
);
1201 wxStringCharType
*dest
= buf
;
1203 for ( ; len
> 0; --len
)
1205 unsigned char c
= (unsigned char)*ascii
++;
1206 wxASSERT_MSG( c
< 0x80,
1207 _T("Non-ASCII value passed to FromAscii().") );
1209 *dest
++ = (wchar_t)c
;
1216 wxString
wxString::FromAscii(const char *ascii
)
1218 return FromAscii(ascii
, wxStrlen(ascii
));
1221 wxString
wxString::FromAscii(char ascii
)
1223 // What do we do with '\0' ?
1225 unsigned char c
= (unsigned char)ascii
;
1227 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
1229 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1230 return wxString(wxUniChar((wchar_t)c
));
1233 const wxScopedCharBuffer
wxString::ToAscii() const
1235 // this will allocate enough space for the terminating NUL too
1236 wxCharBuffer
buffer(length());
1237 char *dest
= buffer
.data();
1239 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1242 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1243 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1245 // the output string can't have embedded NULs anyhow, so we can safely
1246 // stop at first of them even if we do have any
1254 #endif // wxUSE_UNICODE
1256 // extract string of length nCount starting at nFirst
1257 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1259 size_t nLen
= length();
1261 // default value of nCount is npos and means "till the end"
1262 if ( nCount
== npos
)
1264 nCount
= nLen
- nFirst
;
1267 // out-of-bounds requests return sensible things
1268 if ( nFirst
+ nCount
> nLen
)
1270 nCount
= nLen
- nFirst
;
1273 if ( nFirst
> nLen
)
1275 // AllocCopy() will return empty string
1276 return wxEmptyString
;
1279 wxString
dest(*this, nFirst
, nCount
);
1280 if ( dest
.length() != nCount
)
1282 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1288 // check that the string starts with prefix and return the rest of the string
1289 // in the provided pointer if it is not NULL, otherwise return false
1290 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1292 if ( compare(0, prefix
.length(), prefix
) != 0 )
1297 // put the rest of the string into provided pointer
1298 rest
->assign(*this, prefix
.length(), npos
);
1305 // check that the string ends with suffix and return the rest of it in the
1306 // provided pointer if it is not NULL, otherwise return false
1307 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1309 int start
= length() - suffix
.length();
1311 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1316 // put the rest of the string into provided pointer
1317 rest
->assign(*this, 0, start
);
1324 // extract nCount last (rightmost) characters
1325 wxString
wxString::Right(size_t nCount
) const
1327 if ( nCount
> length() )
1330 wxString
dest(*this, length() - nCount
, nCount
);
1331 if ( dest
.length() != nCount
) {
1332 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1337 // get all characters after the last occurrence of ch
1338 // (returns the whole string if ch not found)
1339 wxString
wxString::AfterLast(wxUniChar ch
) const
1342 int iPos
= Find(ch
, true);
1343 if ( iPos
== wxNOT_FOUND
)
1346 str
.assign(*this, iPos
+ 1, npos
);
1351 // extract nCount first (leftmost) characters
1352 wxString
wxString::Left(size_t nCount
) const
1354 if ( nCount
> length() )
1357 wxString
dest(*this, 0, nCount
);
1358 if ( dest
.length() != nCount
) {
1359 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1364 // get all characters before the first occurrence of ch
1365 // (returns the whole string if ch not found)
1366 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1368 int iPos
= Find(ch
);
1369 if ( iPos
== wxNOT_FOUND
)
1371 return wxString(*this, 0, iPos
);
1374 /// get all characters before the last occurrence of ch
1375 /// (returns empty string if ch not found)
1376 wxString
wxString::BeforeLast(wxUniChar ch
) const
1379 int iPos
= Find(ch
, true);
1380 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1381 str
= wxString(c_str(), iPos
);
1386 /// get all characters after the first occurrence of ch
1387 /// (returns empty string if ch not found)
1388 wxString
wxString::AfterFirst(wxUniChar ch
) const
1391 int iPos
= Find(ch
);
1392 if ( iPos
!= wxNOT_FOUND
)
1393 str
.assign(*this, iPos
+ 1, npos
);
1398 // replace first (or all) occurrences of some substring with another one
1399 size_t wxString::Replace(const wxString
& strOld
,
1400 const wxString
& strNew
, bool bReplaceAll
)
1402 // if we tried to replace an empty string we'd enter an infinite loop below
1403 wxCHECK_MSG( !strOld
.empty(), 0,
1404 _T("wxString::Replace(): invalid parameter") );
1406 wxSTRING_INVALIDATE_CACHE();
1408 size_t uiCount
= 0; // count of replacements made
1410 // optimize the special common case: replacement of one character by
1411 // another one (in UTF-8 case we can only do this for ASCII characters)
1413 // benchmarks show that this special version is around 3 times faster
1414 // (depending on the proportion of matching characters and UTF-8/wchar_t
1416 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1418 const wxStringCharType chOld
= strOld
.m_impl
[0],
1419 chNew
= strNew
.m_impl
[0];
1421 // this loop is the simplified version of the one below
1422 for ( size_t pos
= 0; ; )
1424 pos
= m_impl
.find(chOld
, pos
);
1428 m_impl
[pos
++] = chNew
;
1436 else if ( !bReplaceAll
)
1438 size_t pos
= m_impl
.find(strOld
, 0);
1441 m_impl
.replace(pos
, strOld
.m_impl
.length(), strNew
.m_impl
);
1445 else // replace all occurrences
1447 const size_t uiOldLen
= strOld
.m_impl
.length();
1448 const size_t uiNewLen
= strNew
.m_impl
.length();
1450 // first scan the string to find all positions at which the replacement
1452 wxVector
<size_t> replacePositions
;
1455 for ( pos
= m_impl
.find(strOld
.m_impl
, 0);
1457 pos
= m_impl
.find(strOld
.m_impl
, pos
+ uiOldLen
))
1459 replacePositions
.push_back(pos
);
1466 // allocate enough memory for the whole new string
1468 tmp
.m_impl
.reserve(m_impl
.length() + uiCount
*(uiNewLen
- uiOldLen
));
1470 // copy this string to tmp doing replacements on the fly
1472 for ( pos
= 0; replNum
< uiCount
; replNum
++ )
1474 const size_t nextReplPos
= replacePositions
[replNum
];
1476 if ( pos
!= nextReplPos
)
1478 tmp
.m_impl
.append(m_impl
, pos
, nextReplPos
- pos
);
1481 tmp
.m_impl
.append(strNew
.m_impl
);
1482 pos
= nextReplPos
+ uiOldLen
;
1485 if ( pos
!= m_impl
.length() )
1487 // append the rest of the string unchanged
1488 tmp
.m_impl
.append(m_impl
, pos
, m_impl
.length() - pos
);
1497 bool wxString::IsAscii() const
1499 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1501 if ( !(*i
).IsAscii() )
1508 bool wxString::IsWord() const
1510 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1512 if ( !wxIsalpha(*i
) )
1519 bool wxString::IsNumber() const
1524 const_iterator i
= begin();
1526 if ( *i
== _T('-') || *i
== _T('+') )
1529 for ( ; i
!= end(); ++i
)
1531 if ( !wxIsdigit(*i
) )
1538 wxString
wxString::Strip(stripType w
) const
1541 if ( w
& leading
) s
.Trim(false);
1542 if ( w
& trailing
) s
.Trim(true);
1546 // ---------------------------------------------------------------------------
1548 // ---------------------------------------------------------------------------
1550 wxString
& wxString::MakeUpper()
1552 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1553 *it
= (wxChar
)wxToupper(*it
);
1558 wxString
& wxString::MakeLower()
1560 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1561 *it
= (wxChar
)wxTolower(*it
);
1566 wxString
& wxString::MakeCapitalized()
1568 const iterator en
= end();
1569 iterator it
= begin();
1572 *it
= (wxChar
)wxToupper(*it
);
1573 for ( ++it
; it
!= en
; ++it
)
1574 *it
= (wxChar
)wxTolower(*it
);
1580 // ---------------------------------------------------------------------------
1581 // trimming and padding
1582 // ---------------------------------------------------------------------------
1584 // some compilers (VC++ 6.0 not to name them) return true for a call to
1585 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1586 // to live with this by checking that the character is a 7 bit one - even if
1587 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1588 // space-like symbols somewhere except in the first 128 chars), it is arguably
1589 // still better than trimming away accented letters
1590 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1592 // trims spaces (in the sense of isspace) from left or right side
1593 wxString
& wxString::Trim(bool bFromRight
)
1595 // first check if we're going to modify the string at all
1598 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1599 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1605 // find last non-space character
1606 reverse_iterator psz
= rbegin();
1607 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1610 // truncate at trailing space start
1611 erase(psz
.base(), end());
1615 // find first non-space character
1616 iterator psz
= begin();
1617 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1620 // fix up data and length
1621 erase(begin(), psz
);
1628 // adds nCount characters chPad to the string from either side
1629 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1631 wxString
s(chPad
, nCount
);
1644 // truncate the string
1645 wxString
& wxString::Truncate(size_t uiLen
)
1647 if ( uiLen
< length() )
1649 erase(begin() + uiLen
, end());
1651 //else: nothing to do, string is already short enough
1656 // ---------------------------------------------------------------------------
1657 // finding (return wxNOT_FOUND if not found and index otherwise)
1658 // ---------------------------------------------------------------------------
1661 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1663 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1665 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1668 // ----------------------------------------------------------------------------
1669 // conversion to numbers
1670 // ----------------------------------------------------------------------------
1672 // The implementation of all the functions below is exactly the same so factor
1673 // it out. Note that number extraction works correctly on UTF-8 strings, so
1674 // we can use wxStringCharType and wx_str() for maximum efficiency.
1677 #define DO_IF_NOT_WINCE(x) x
1679 #define DO_IF_NOT_WINCE(x)
1682 #define WX_STRING_TO_X_TYPE_START \
1683 wxCHECK_MSG( pVal, false, _T("NULL output pointer") ); \
1684 DO_IF_NOT_WINCE( errno = 0; ) \
1685 const wxStringCharType *start = wx_str(); \
1686 wxStringCharType *end;
1688 #define WX_STRING_TO_X_TYPE_END \
1689 /* return true only if scan was stopped by the terminating NUL and */ \
1690 /* if the string was not empty to start with and no under/overflow */ \
1692 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1697 bool wxString::ToLong(long *pVal
, int base
) const
1699 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1701 WX_STRING_TO_X_TYPE_START
1702 long val
= wxStrtol(start
, &end
, base
);
1703 WX_STRING_TO_X_TYPE_END
1706 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1708 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1710 WX_STRING_TO_X_TYPE_START
1711 unsigned long val
= wxStrtoul(start
, &end
, base
);
1712 WX_STRING_TO_X_TYPE_END
1715 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1717 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1719 WX_STRING_TO_X_TYPE_START
1720 wxLongLong_t val
= wxStrtoll(start
, &end
, base
);
1721 WX_STRING_TO_X_TYPE_END
1724 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1726 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1728 WX_STRING_TO_X_TYPE_START
1729 wxULongLong_t val
= wxStrtoull(start
, &end
, base
);
1730 WX_STRING_TO_X_TYPE_END
1733 bool wxString::ToDouble(double *pVal
) const
1735 WX_STRING_TO_X_TYPE_START
1736 double val
= wxStrtod(start
, &end
);
1737 WX_STRING_TO_X_TYPE_END
1742 bool wxString::ToCLong(long *pVal
, int base
) const
1744 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1746 WX_STRING_TO_X_TYPE_START
1747 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1748 long val
= wxStrtol_lA(start
, &end
, base
, wxCLocale
);
1750 long val
= wxStrtol_l(start
, &end
, base
, wxCLocale
);
1752 WX_STRING_TO_X_TYPE_END
1755 bool wxString::ToCULong(unsigned long *pVal
, int base
) const
1757 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1759 WX_STRING_TO_X_TYPE_START
1760 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1761 unsigned long val
= wxStrtoul_lA(start
, &end
, base
, wxCLocale
);
1763 unsigned long val
= wxStrtoul_l(start
, &end
, base
, wxCLocale
);
1765 WX_STRING_TO_X_TYPE_END
1768 bool wxString::ToCDouble(double *pVal
) const
1770 WX_STRING_TO_X_TYPE_START
1771 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1772 double val
= wxStrtod_lA(start
, &end
, wxCLocale
);
1774 double val
= wxStrtod_l(start
, &end
, wxCLocale
);
1776 WX_STRING_TO_X_TYPE_END
1779 #endif // wxUSE_XLOCALE
1781 // ---------------------------------------------------------------------------
1783 // ---------------------------------------------------------------------------
1785 #if !wxUSE_UTF8_LOCALE_ONLY
1787 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1788 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1790 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1794 va_start(argptr
, format
);
1797 s
.PrintfV(format
, argptr
);
1803 #endif // !wxUSE_UTF8_LOCALE_ONLY
1805 #if wxUSE_UNICODE_UTF8
1807 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1810 va_start(argptr
, format
);
1813 s
.PrintfV(format
, argptr
);
1819 #endif // wxUSE_UNICODE_UTF8
1822 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1825 s
.PrintfV(format
, argptr
);
1829 #if !wxUSE_UTF8_LOCALE_ONLY
1830 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1831 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1833 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1837 va_start(argptr
, format
);
1839 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1840 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1841 // because it's the only cast that works safely for downcasting when
1842 // multiple inheritance is used:
1843 wxString
*str
= static_cast<wxString
*>(this);
1845 wxString
*str
= this;
1848 int iLen
= str
->PrintfV(format
, argptr
);
1854 #endif // !wxUSE_UTF8_LOCALE_ONLY
1856 #if wxUSE_UNICODE_UTF8
1857 int wxString::DoPrintfUtf8(const char *format
, ...)
1860 va_start(argptr
, format
);
1862 int iLen
= PrintfV(format
, argptr
);
1868 #endif // wxUSE_UNICODE_UTF8
1871 Uses wxVsnprintf and places the result into the this string.
1873 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1874 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1875 the ISO C99 (and thus SUSv3) standard the return value for the case of
1876 an undersized buffer is inconsistent. For conforming vsnprintf
1877 implementations the function must return the number of characters that
1878 would have been printed had the buffer been large enough. For conforming
1879 vswprintf implementations the function must return a negative number
1882 What vswprintf sets errno to is undefined but Darwin seems to set it to
1883 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1884 those are defined in the standard and backed up by several conformance
1885 statements. Note that ENOMEM mentioned in the manual page does not
1886 apply to swprintf, only wprintf and fwprintf.
1888 Official manual page:
1889 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1891 Some conformance statements (AIX, Solaris):
1892 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1893 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1895 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1896 EILSEQ and EINVAL are specifically defined to mean the error is other than
1897 an undersized buffer and no other errno are defined we treat those two
1898 as meaning hard errors and everything else gets the old behavior which
1899 is to keep looping and increasing buffer size until the function succeeds.
1901 In practice it's impossible to determine before compilation which behavior
1902 may be used. The vswprintf function may have vsnprintf-like behavior or
1903 vice-versa. Behavior detected on one release can theoretically change
1904 with an updated release. Not to mention that configure testing for it
1905 would require the test to be run on the host system, not the build system
1906 which makes cross compilation difficult. Therefore, we make no assumptions
1907 about behavior and try our best to handle every known case, including the
1908 case where wxVsnprintf returns a negative number and fails to set errno.
1910 There is yet one more non-standard implementation and that is our own.
1911 Fortunately, that can be detected at compile-time.
1913 On top of all that, ISO C99 explicitly defines snprintf to write a null
1914 character to the last position of the specified buffer. That would be at
1915 at the given buffer size minus 1. It is supposed to do this even if it
1916 turns out that the buffer is sized too small.
1918 Darwin (tested on 10.5) follows the C99 behavior exactly.
1920 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1921 errno even when it fails. However, it only seems to ever fail due
1922 to an undersized buffer.
1924 #if wxUSE_UNICODE_UTF8
1925 template<typename BufferType
>
1927 // we only need one version in non-UTF8 builds and at least two Windows
1928 // compilers have problems with this function template, so use just one
1929 // normal function here
1931 static int DoStringPrintfV(wxString
& str
,
1932 const wxString
& format
, va_list argptr
)
1938 #if wxUSE_UNICODE_UTF8
1939 BufferType
tmp(str
, size
+ 1);
1940 typename
BufferType::CharType
*buf
= tmp
;
1942 wxStringBuffer
tmp(str
, size
+ 1);
1950 // in UTF-8 build, leaving uninitialized junk in the buffer
1951 // could result in invalid non-empty UTF-8 string, so just
1952 // reset the string to empty on failure:
1957 // wxVsnprintf() may modify the original arg pointer, so pass it
1960 wxVaCopy(argptrcopy
, argptr
);
1963 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1966 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1969 // some implementations of vsnprintf() don't NUL terminate
1970 // the string if there is not enough space for it so
1971 // always do it manually
1972 // FIXME: This really seems to be the wrong and would be an off-by-one
1973 // bug except the code above allocates an extra character.
1974 buf
[size
] = _T('\0');
1976 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1977 // total number of characters which would have been written if the
1978 // buffer were large enough (newer standards such as Unix98)
1981 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1982 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1983 // is true if *both* of them use our own implementation,
1984 // otherwise we can't be sure
1985 #if wxUSE_WXVSNPRINTF
1986 // we know that our own implementation of wxVsnprintf() returns -1
1987 // only for a format error - thus there's something wrong with
1988 // the user's format string
1991 #else // possibly using system version
1992 // assume it only returns error if there is not enough space, but
1993 // as we don't know how much we need, double the current size of
1996 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1997 // If errno was set to one of the two well-known hard errors
1998 // then fail immediately to avoid an infinite loop.
2001 #endif // __WXWINCE__
2002 // still not enough, as we don't know how much we need, double the
2003 // current size of the buffer
2005 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
2007 else if ( len
>= size
)
2009 #if wxUSE_WXVSNPRINTF
2010 // we know that our own implementation of wxVsnprintf() returns
2011 // size+1 when there's not enough space but that's not the size
2012 // of the required buffer!
2013 size
*= 2; // so we just double the current size of the buffer
2015 // some vsnprintf() implementations NUL-terminate the buffer and
2016 // some don't in len == size case, to be safe always add 1
2017 // FIXME: I don't quite understand this comment. The vsnprintf
2018 // function is specifically defined to return the number of
2019 // characters printed not including the null terminator.
2020 // So OF COURSE you need to add 1 to get the right buffer size.
2021 // The following line is definitely correct, no question.
2025 else // ok, there was enough space
2031 // we could have overshot
2034 return str
.length();
2037 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
2039 #if wxUSE_UNICODE_UTF8
2040 #if wxUSE_STL_BASED_WXSTRING
2041 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
2043 typedef wxStringInternalBuffer Utf8Buffer
;
2047 #if wxUSE_UTF8_LOCALE_ONLY
2048 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2050 #if wxUSE_UNICODE_UTF8
2051 if ( wxLocaleIsUtf8
)
2052 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2055 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
2057 return DoStringPrintfV(*this, format
, argptr
);
2058 #endif // UTF8/WCHAR
2062 // ----------------------------------------------------------------------------
2063 // misc other operations
2064 // ----------------------------------------------------------------------------
2066 // returns true if the string matches the pattern which may contain '*' and
2067 // '?' metacharacters (as usual, '?' matches any character and '*' any number
2069 bool wxString::Matches(const wxString
& mask
) const
2071 // I disable this code as it doesn't seem to be faster (in fact, it seems
2072 // to be much slower) than the old, hand-written code below and using it
2073 // here requires always linking with libregex even if the user code doesn't
2075 #if 0 // wxUSE_REGEX
2076 // first translate the shell-like mask into a regex
2078 pattern
.reserve(wxStrlen(pszMask
));
2090 pattern
+= _T(".*");
2101 // these characters are special in a RE, quote them
2102 // (however note that we don't quote '[' and ']' to allow
2103 // using them for Unix shell like matching)
2104 pattern
+= _T('\\');
2108 pattern
+= *pszMask
;
2116 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
2117 #else // !wxUSE_REGEX
2118 // TODO: this is, of course, awfully inefficient...
2120 // FIXME-UTF8: implement using iterators, remove #if
2121 #if wxUSE_UNICODE_UTF8
2122 const wxScopedWCharBuffer maskBuf
= mask
.wc_str();
2123 const wxScopedWCharBuffer txtBuf
= wc_str();
2124 const wxChar
*pszMask
= maskBuf
.data();
2125 const wxChar
*pszTxt
= txtBuf
.data();
2127 const wxChar
*pszMask
= mask
.wx_str();
2128 // the char currently being checked
2129 const wxChar
*pszTxt
= wx_str();
2132 // the last location where '*' matched
2133 const wxChar
*pszLastStarInText
= NULL
;
2134 const wxChar
*pszLastStarInMask
= NULL
;
2137 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
2138 switch ( *pszMask
) {
2140 if ( *pszTxt
== wxT('\0') )
2143 // pszTxt and pszMask will be incremented in the loop statement
2149 // remember where we started to be able to backtrack later
2150 pszLastStarInText
= pszTxt
;
2151 pszLastStarInMask
= pszMask
;
2153 // ignore special chars immediately following this one
2154 // (should this be an error?)
2155 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2158 // if there is nothing more, match
2159 if ( *pszMask
== wxT('\0') )
2162 // are there any other metacharacters in the mask?
2164 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2166 if ( pEndMask
!= NULL
) {
2167 // we have to match the string between two metachars
2168 uiLenMask
= pEndMask
- pszMask
;
2171 // we have to match the remainder of the string
2172 uiLenMask
= wxStrlen(pszMask
);
2175 wxString
strToMatch(pszMask
, uiLenMask
);
2176 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2177 if ( pMatch
== NULL
)
2180 // -1 to compensate "++" in the loop
2181 pszTxt
= pMatch
+ uiLenMask
- 1;
2182 pszMask
+= uiLenMask
- 1;
2187 if ( *pszMask
!= *pszTxt
)
2193 // match only if nothing left
2194 if ( *pszTxt
== wxT('\0') )
2197 // if we failed to match, backtrack if we can
2198 if ( pszLastStarInText
) {
2199 pszTxt
= pszLastStarInText
+ 1;
2200 pszMask
= pszLastStarInMask
;
2202 pszLastStarInText
= NULL
;
2204 // don't bother resetting pszLastStarInMask, it's unnecessary
2210 #endif // wxUSE_REGEX/!wxUSE_REGEX
2213 // Count the number of chars
2214 int wxString::Freq(wxUniChar ch
) const
2217 for ( const_iterator i
= begin(); i
!= end(); ++i
)