1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
38 #include "wx/hashmap.h"
39 #include "wx/vector.h"
41 // string handling functions used by wxString:
42 #if wxUSE_UNICODE_UTF8
43 #define wxStringMemcpy memcpy
44 #define wxStringMemcmp memcmp
45 #define wxStringMemchr memchr
46 #define wxStringStrlen strlen
48 #define wxStringMemcpy wxTmemcpy
49 #define wxStringMemcmp wxTmemcmp
50 #define wxStringMemchr wxTmemchr
51 #define wxStringStrlen wxStrlen
54 // ----------------------------------------------------------------------------
56 // ----------------------------------------------------------------------------
61 static UntypedBufferData
s_untypedNullData(NULL
);
63 UntypedBufferData
* const untypedNullDataPtr
= &s_untypedNullData
;
65 } // namespace wxPrivate
67 // ---------------------------------------------------------------------------
68 // static class variables definition
69 // ---------------------------------------------------------------------------
71 //According to STL _must_ be a -1 size_t
72 const size_t wxString::npos
= (size_t) -1;
74 #if wxUSE_STRING_POS_CACHE
76 #ifdef wxHAS_COMPILER_TLS
78 wxTLS_TYPE(wxString::Cache
) wxString::ms_cache
;
80 #else // !wxHAS_COMPILER_TLS
82 struct wxStrCacheInitializer
84 wxStrCacheInitializer()
86 // calling this function triggers s_cache initialization in it, and
87 // from now on it becomes safe to call from multiple threads
93 wxString::Cache& wxString::GetCache()
95 static wxTLS_TYPE(Cache) s_cache;
97 return wxTLS_VALUE(s_cache);
101 static wxStrCacheInitializer gs_stringCacheInit
;
103 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
105 // gdb seems to be unable to display thread-local variables correctly, at least
106 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
109 struct wxStrCacheDumper
111 static void ShowAll()
113 puts("*** wxString cache dump:");
114 for ( unsigned n
= 0; n
< wxString::Cache::SIZE
; n
++ )
116 const wxString::Cache::Element
&
117 c
= wxString::GetCacheBegin()[n
];
119 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
121 n
== wxString::LastUsedCacheElement() ? " [*]" : "",
123 (unsigned long)c
.pos
,
124 (unsigned long)c
.impl
,
130 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
132 #endif // __WXDEBUG__
134 #ifdef wxPROFILE_STRING_CACHE
136 wxString::CacheStats
wxString::ms_cacheStats
;
138 struct wxStrCacheStatsDumper
140 ~wxStrCacheStatsDumper()
142 const wxString::CacheStats
& stats
= wxString::ms_cacheStats
;
146 puts("*** wxString cache statistics:");
147 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
149 printf("\tHits %u (of which %u not used) or %.2f%%\n",
152 100.*float(stats
.poshits
- stats
.mishits
)/stats
.postot
);
153 printf("\tAverage position requested: %.2f\n",
154 float(stats
.sumpos
) / stats
.postot
);
155 printf("\tAverage offset after cached hint: %.2f\n",
156 float(stats
.sumofs
) / stats
.postot
);
161 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
162 stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
);
167 static wxStrCacheStatsDumper s_showCacheStats
;
169 #endif // wxPROFILE_STRING_CACHE
171 #endif // wxUSE_STRING_POS_CACHE
173 // ----------------------------------------------------------------------------
175 // ----------------------------------------------------------------------------
177 #if wxUSE_STD_IOSTREAM
181 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
183 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
184 const wxCharBuffer
buf(str
.AsCharBuf());
186 os
.clear(wxSTD
ios_base::failbit
);
192 return os
<< str
.AsInternal();
196 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
198 return os
<< str
.c_str();
201 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCharBuffer
& str
)
203 return os
<< str
.data();
207 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxWCharBuffer
& str
)
209 return os
<< str
.data();
213 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
215 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
217 return wos
<< str
.wc_str();
220 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
222 return wos
<< str
.AsWChar();
225 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxWCharBuffer
& str
)
227 return wos
<< str
.data();
230 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
232 #endif // wxUSE_STD_IOSTREAM
234 // ===========================================================================
235 // wxString class core
236 // ===========================================================================
238 #if wxUSE_UNICODE_UTF8
240 void wxString::PosLenToImpl(size_t pos
, size_t len
,
241 size_t *implPos
, size_t *implLen
) const
247 else // have valid start position
249 const const_iterator b
= GetIterForNthChar(pos
);
250 *implPos
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin();
255 else // have valid length too
257 // we need to handle the case of length specifying a substring
258 // going beyond the end of the string, just as std::string does
259 const const_iterator
e(end());
261 while ( len
&& i
<= e
)
267 *implLen
= i
.impl() - b
.impl();
272 #endif // wxUSE_UNICODE_UTF8
274 // ----------------------------------------------------------------------------
275 // wxCStrData converted strings caching
276 // ----------------------------------------------------------------------------
278 // FIXME-UTF8: temporarily disabled because it doesn't work with global
279 // string objects; re-enable after fixing this bug and benchmarking
280 // performance to see if using a hash is a good idea at all
283 // For backward compatibility reasons, it must be possible to assign the value
284 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
285 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
286 // because the memory would be freed immediately, but it has to be valid as long
287 // as the string is not modified, so that code like this still works:
289 // const wxChar *s = str.c_str();
290 // while ( s ) { ... }
292 // FIXME-UTF8: not thread safe!
293 // FIXME-UTF8: we currently clear the cached conversion only when the string is
294 // destroyed, but we should do it when the string is modified, to
295 // keep memory usage down
296 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
297 // invalidated the cache on every change, we could keep the previous
299 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
300 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
303 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
305 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
306 if ( i
!= hash
.end() )
314 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
315 // so we have to use wxString* here and const-cast when used
316 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
317 wxStringCharConversionCache
);
318 static wxStringCharConversionCache gs_stringsCharCache
;
320 const char* wxCStrData::AsChar() const
322 // remove previously cache value, if any (see FIXMEs above):
323 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
325 // convert the string and keep it:
326 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
327 m_str
->mb_str().release();
331 #endif // wxUSE_UNICODE
333 #if !wxUSE_UNICODE_WCHAR
334 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
335 wxStringWCharConversionCache
);
336 static wxStringWCharConversionCache gs_stringsWCharCache
;
338 const wchar_t* wxCStrData::AsWChar() const
340 // remove previously cache value, if any (see FIXMEs above):
341 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
343 // convert the string and keep it:
344 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
345 m_str
->wc_str().release();
349 #endif // !wxUSE_UNICODE_WCHAR
351 wxString::~wxString()
354 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
355 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
357 #if !wxUSE_UNICODE_WCHAR
358 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
363 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
364 const char* wxCStrData::AsChar() const
366 #if wxUSE_UNICODE_UTF8
367 if ( wxLocaleIsUtf8
)
370 // under non-UTF8 locales, we have to convert the internal UTF-8
371 // representation using wxConvLibc and cache the result
373 wxString
*str
= wxConstCast(m_str
, wxString
);
375 // convert the string:
377 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
378 // have it) but it's unfortunately not obvious to implement
379 // because we don't know how big buffer do we need for the
380 // given string length (in case of multibyte encodings, e.g.
381 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
383 // One idea would be to store more than just m_convertedToChar
384 // in wxString: then we could record the length of the string
385 // which was converted the last time and try to reuse the same
386 // buffer if the current length is not greater than it (this
387 // could still fail because string could have been modified in
388 // place but it would work most of the time, so we'd do it and
389 // only allocate the new buffer if in-place conversion returned
390 // an error). We could also store a bit saying if the string
391 // was modified since the last conversion (and update it in all
392 // operation modifying the string, of course) to avoid unneeded
393 // consequential conversions. But both of these ideas require
394 // adding more fields to wxString and require profiling results
395 // to be sure that we really gain enough from them to justify
397 wxCharBuffer
buf(str
->mb_str());
399 // if it failed, return empty string and not NULL to avoid crashes in code
400 // written with either wxWidgets 2 wxString or std::string behaviour in
401 // mind: neither of them ever returns NULL and so we shouldn't neither
405 if ( str
->m_convertedToChar
&&
406 strlen(buf
) == strlen(str
->m_convertedToChar
) )
408 // keep the same buffer for as long as possible, so that several calls
409 // to c_str() in a row still work:
410 strcpy(str
->m_convertedToChar
, buf
);
414 str
->m_convertedToChar
= buf
.release();
418 return str
->m_convertedToChar
+ m_offset
;
420 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
422 #if !wxUSE_UNICODE_WCHAR
423 const wchar_t* wxCStrData::AsWChar() const
425 wxString
*str
= wxConstCast(m_str
, wxString
);
427 // convert the string:
428 wxWCharBuffer
buf(str
->wc_str());
430 // notice that here, unlike above in AsChar(), conversion can't fail as our
431 // internal UTF-8 is always well-formed -- or the string was corrupted and
432 // all bets are off anyhow
434 // FIXME-UTF8: do the conversion in-place in the existing buffer
435 if ( str
->m_convertedToWChar
&&
436 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
438 // keep the same buffer for as long as possible, so that several calls
439 // to c_str() in a row still work:
440 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
444 str
->m_convertedToWChar
= buf
.release();
448 return str
->m_convertedToWChar
+ m_offset
;
450 #endif // !wxUSE_UNICODE_WCHAR
452 // ===========================================================================
453 // wxString class core
454 // ===========================================================================
456 // ---------------------------------------------------------------------------
457 // construction and conversion
458 // ---------------------------------------------------------------------------
460 #if wxUSE_UNICODE_WCHAR
462 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
463 const wxMBConv
& conv
)
466 if ( !psz
|| nLength
== 0 )
467 return SubstrBufFromMB(L
"", 0);
469 if ( nLength
== npos
)
473 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
475 return SubstrBufFromMB(_T(""), 0);
477 return SubstrBufFromMB(wcBuf
, wcLen
);
479 #endif // wxUSE_UNICODE_WCHAR
481 #if wxUSE_UNICODE_UTF8
483 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
484 const wxMBConv
& conv
)
487 if ( !psz
|| nLength
== 0 )
488 return SubstrBufFromMB("", 0);
490 // if psz is already in UTF-8, we don't have to do the roundtrip to
491 // wchar_t* and back:
494 // we need to validate the input because UTF8 iterators assume valid
495 // UTF-8 sequence and psz may be invalid:
496 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
498 // we must pass the real string length to SubstrBufFromMB ctor
499 if ( nLength
== npos
)
500 nLength
= psz
? strlen(psz
) : 0;
501 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz
), nLength
);
503 // else: do the roundtrip through wchar_t*
506 if ( nLength
== npos
)
509 // first convert to wide string:
511 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
513 return SubstrBufFromMB("", 0);
515 // and then to UTF-8:
516 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
517 // widechar -> UTF-8 conversion isn't supposed to ever fail:
518 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
522 #endif // wxUSE_UNICODE_UTF8
524 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
526 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
527 const wxMBConv
& conv
)
530 if ( !pwz
|| nLength
== 0 )
531 return SubstrBufFromWC("", 0);
533 if ( nLength
== npos
)
537 wxCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
539 return SubstrBufFromWC("", 0);
541 return SubstrBufFromWC(mbBuf
, mbLen
);
543 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
546 #if wxUSE_UNICODE_WCHAR
548 //Convert wxString in Unicode mode to a multi-byte string
549 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
551 return conv
.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL
);
554 #elif wxUSE_UNICODE_UTF8
556 const wxWCharBuffer
wxString::wc_str() const
558 return wxMBConvStrictUTF8().cMB2WC
561 m_impl
.length() + 1, // size, not length
566 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
569 return wxCharBuffer::CreateNonOwned(m_impl
.c_str());
571 // FIXME-UTF8: use wc_str() here once we have buffers with length
574 wxWCharBuffer
wcBuf(wxMBConvStrictUTF8().cMB2WC
577 m_impl
.length() + 1, // size
581 return wxCharBuffer("");
583 return conv
.cWC2MB(wcBuf
, wcLen
+1, NULL
);
588 //Converts this string to a wide character string if unicode
589 //mode is not enabled and wxUSE_WCHAR_T is enabled
590 const wxWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
592 return conv
.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL
);
595 #endif // Unicode/ANSI
597 // shrink to minimal size (releasing extra memory)
598 bool wxString::Shrink()
600 wxString
tmp(begin(), end());
602 return tmp
.length() == length();
605 // deprecated compatibility code:
606 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
607 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
609 return DoGetWriteBuf(nLen
);
612 void wxString::UngetWriteBuf()
617 void wxString::UngetWriteBuf(size_t nLen
)
619 DoUngetWriteBuf(nLen
);
621 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
624 // ---------------------------------------------------------------------------
626 // ---------------------------------------------------------------------------
628 // all functions are inline in string.h
630 // ---------------------------------------------------------------------------
631 // concatenation operators
632 // ---------------------------------------------------------------------------
635 * concatenation functions come in 5 flavours:
637 * char + string and string + char
638 * C str + string and string + C str
641 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
643 #if !wxUSE_STL_BASED_WXSTRING
644 wxASSERT( str1
.IsValid() );
645 wxASSERT( str2
.IsValid() );
654 wxString
operator+(const wxString
& str
, wxUniChar ch
)
656 #if !wxUSE_STL_BASED_WXSTRING
657 wxASSERT( str
.IsValid() );
666 wxString
operator+(wxUniChar ch
, const wxString
& str
)
668 #if !wxUSE_STL_BASED_WXSTRING
669 wxASSERT( str
.IsValid() );
678 wxString
operator+(const wxString
& str
, const char *psz
)
680 #if !wxUSE_STL_BASED_WXSTRING
681 wxASSERT( str
.IsValid() );
685 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
686 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
694 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
696 #if !wxUSE_STL_BASED_WXSTRING
697 wxASSERT( str
.IsValid() );
701 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
702 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
710 wxString
operator+(const char *psz
, const wxString
& str
)
712 #if !wxUSE_STL_BASED_WXSTRING
713 wxASSERT( str
.IsValid() );
717 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
718 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
726 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
728 #if !wxUSE_STL_BASED_WXSTRING
729 wxASSERT( str
.IsValid() );
733 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
734 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
742 // ---------------------------------------------------------------------------
744 // ---------------------------------------------------------------------------
746 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
748 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
749 : wxToupper(GetChar(0u)) == wxToupper(c
));
752 #ifdef HAVE_STD_STRING_COMPARE
754 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
755 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
756 // sort strings in characters code point order by sorting the byte sequence
757 // in byte values order (i.e. what strcmp() and memcmp() do).
759 int wxString::compare(const wxString
& str
) const
761 return m_impl
.compare(str
.m_impl
);
764 int wxString::compare(size_t nStart
, size_t nLen
,
765 const wxString
& str
) const
768 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
769 return m_impl
.compare(pos
, len
, str
.m_impl
);
772 int wxString::compare(size_t nStart
, size_t nLen
,
774 size_t nStart2
, size_t nLen2
) const
777 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
780 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
782 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
785 int wxString::compare(const char* sz
) const
787 return m_impl
.compare(ImplStr(sz
));
790 int wxString::compare(const wchar_t* sz
) const
792 return m_impl
.compare(ImplStr(sz
));
795 int wxString::compare(size_t nStart
, size_t nLen
,
796 const char* sz
, size_t nCount
) const
799 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
801 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
803 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
806 int wxString::compare(size_t nStart
, size_t nLen
,
807 const wchar_t* sz
, size_t nCount
) const
810 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
812 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
814 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
817 #else // !HAVE_STD_STRING_COMPARE
819 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
820 const wxStringCharType
* s2
, size_t l2
)
823 return wxStringMemcmp(s1
, s2
, l1
);
826 int ret
= wxStringMemcmp(s1
, s2
, l1
);
827 return ret
== 0 ? -1 : ret
;
831 int ret
= wxStringMemcmp(s1
, s2
, l2
);
832 return ret
== 0 ? +1 : ret
;
836 int wxString::compare(const wxString
& str
) const
838 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
839 str
.m_impl
.data(), str
.m_impl
.length());
842 int wxString::compare(size_t nStart
, size_t nLen
,
843 const wxString
& str
) const
845 wxASSERT(nStart
<= length());
846 size_type strLen
= length() - nStart
;
847 nLen
= strLen
< nLen
? strLen
: nLen
;
850 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
852 return ::wxDoCmp(m_impl
.data() + pos
, len
,
853 str
.m_impl
.data(), str
.m_impl
.length());
856 int wxString::compare(size_t nStart
, size_t nLen
,
858 size_t nStart2
, size_t nLen2
) const
860 wxASSERT(nStart
<= length());
861 wxASSERT(nStart2
<= str
.length());
862 size_type strLen
= length() - nStart
,
863 strLen2
= str
.length() - nStart2
;
864 nLen
= strLen
< nLen
? strLen
: nLen
;
865 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
868 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
870 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
872 return ::wxDoCmp(m_impl
.data() + pos
, len
,
873 str
.m_impl
.data() + pos2
, len2
);
876 int wxString::compare(const char* sz
) const
878 SubstrBufFromMB
str(ImplStr(sz
, npos
));
879 if ( str
.len
== npos
)
880 str
.len
= wxStringStrlen(str
.data
);
881 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
884 int wxString::compare(const wchar_t* sz
) const
886 SubstrBufFromWC
str(ImplStr(sz
, npos
));
887 if ( str
.len
== npos
)
888 str
.len
= wxStringStrlen(str
.data
);
889 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
892 int wxString::compare(size_t nStart
, size_t nLen
,
893 const char* sz
, size_t nCount
) const
895 wxASSERT(nStart
<= length());
896 size_type strLen
= length() - nStart
;
897 nLen
= strLen
< nLen
? strLen
: nLen
;
900 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
902 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
903 if ( str
.len
== npos
)
904 str
.len
= wxStringStrlen(str
.data
);
906 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
909 int wxString::compare(size_t nStart
, size_t nLen
,
910 const wchar_t* sz
, size_t nCount
) const
912 wxASSERT(nStart
<= length());
913 size_type strLen
= length() - nStart
;
914 nLen
= strLen
< nLen
? strLen
: nLen
;
917 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
919 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
920 if ( str
.len
== npos
)
921 str
.len
= wxStringStrlen(str
.data
);
923 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
926 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
929 // ---------------------------------------------------------------------------
930 // find_{first,last}_[not]_of functions
931 // ---------------------------------------------------------------------------
933 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
935 // NB: All these functions are implemented with the argument being wxChar*,
936 // i.e. widechar string in any Unicode build, even though native string
937 // representation is char* in the UTF-8 build. This is because we couldn't
938 // use memchr() to determine if a character is in a set encoded as UTF-8.
940 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
942 return find_first_of(sz
, nStart
, wxStrlen(sz
));
945 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
947 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
950 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
952 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
955 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
957 if ( wxTmemchr(sz
, *i
, n
) )
964 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
966 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
969 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
971 if ( !wxTmemchr(sz
, *i
, n
) )
979 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
981 return find_last_of(sz
, nStart
, wxStrlen(sz
));
984 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
986 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
989 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
991 size_t len
= length();
993 if ( nStart
== npos
)
999 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1002 size_t idx
= nStart
;
1003 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1004 i
!= rend(); --idx
, ++i
)
1006 if ( wxTmemchr(sz
, *i
, n
) )
1013 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
1015 size_t len
= length();
1017 if ( nStart
== npos
)
1023 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1026 size_t idx
= nStart
;
1027 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1028 i
!= rend(); --idx
, ++i
)
1030 if ( !wxTmemchr(sz
, *i
, n
) )
1037 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
1039 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
1041 size_t idx
= nStart
;
1042 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1051 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1053 size_t len
= length();
1055 if ( nStart
== npos
)
1061 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1064 size_t idx
= nStart
;
1065 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1066 i
!= rend(); --idx
, ++i
)
1075 // the functions above were implemented for wchar_t* arguments in Unicode
1076 // build and char* in ANSI build; below are implementations for the other
1079 #define wxOtherCharType char
1080 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1082 #define wxOtherCharType wchar_t
1083 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1086 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1087 { return find_first_of(STRCONV(sz
), nStart
); }
1089 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1091 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1092 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1093 { return find_last_of(STRCONV(sz
), nStart
); }
1094 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1096 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1097 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1098 { return find_first_not_of(STRCONV(sz
), nStart
); }
1099 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1101 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1102 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1103 { return find_last_not_of(STRCONV(sz
), nStart
); }
1104 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1106 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1108 #undef wxOtherCharType
1111 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1113 // ===========================================================================
1114 // other common string functions
1115 // ===========================================================================
1117 int wxString::CmpNoCase(const wxString
& s
) const
1119 #if wxUSE_UNICODE_UTF8
1120 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1122 const_iterator i1
= begin();
1123 const_iterator end1
= end();
1124 const_iterator i2
= s
.begin();
1125 const_iterator end2
= s
.end();
1127 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1129 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1130 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1131 if ( lower1
!= lower2
)
1132 return lower1
< lower2
? -1 : 1;
1135 size_t len1
= length();
1136 size_t len2
= s
.length();
1140 else if ( len1
> len2
)
1143 #else // wxUSE_UNICODE_WCHAR or ANSI
1144 return wxStricmp(m_impl
.c_str(), s
.m_impl
.c_str());
1152 #ifndef __SCHAR_MAX__
1153 #define __SCHAR_MAX__ 127
1157 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1159 if (!ascii
|| len
== 0)
1160 return wxEmptyString
;
1165 wxStringInternalBuffer
buf(res
, len
);
1166 wxStringCharType
*dest
= buf
;
1168 for ( ; len
> 0; --len
)
1170 unsigned char c
= (unsigned char)*ascii
++;
1171 wxASSERT_MSG( c
< 0x80,
1172 _T("Non-ASCII value passed to FromAscii().") );
1174 *dest
++ = (wchar_t)c
;
1181 wxString
wxString::FromAscii(const char *ascii
)
1183 return FromAscii(ascii
, wxStrlen(ascii
));
1186 wxString
wxString::FromAscii(char ascii
)
1188 // What do we do with '\0' ?
1190 unsigned char c
= (unsigned char)ascii
;
1192 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
1194 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1195 return wxString(wxUniChar((wchar_t)c
));
1198 const wxCharBuffer
wxString::ToAscii() const
1200 // this will allocate enough space for the terminating NUL too
1201 wxCharBuffer
buffer(length());
1202 char *dest
= buffer
.data();
1204 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1207 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1208 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1210 // the output string can't have embedded NULs anyhow, so we can safely
1211 // stop at first of them even if we do have any
1219 #endif // wxUSE_UNICODE
1221 // extract string of length nCount starting at nFirst
1222 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1224 size_t nLen
= length();
1226 // default value of nCount is npos and means "till the end"
1227 if ( nCount
== npos
)
1229 nCount
= nLen
- nFirst
;
1232 // out-of-bounds requests return sensible things
1233 if ( nFirst
+ nCount
> nLen
)
1235 nCount
= nLen
- nFirst
;
1238 if ( nFirst
> nLen
)
1240 // AllocCopy() will return empty string
1241 return wxEmptyString
;
1244 wxString
dest(*this, nFirst
, nCount
);
1245 if ( dest
.length() != nCount
)
1247 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1253 // check that the string starts with prefix and return the rest of the string
1254 // in the provided pointer if it is not NULL, otherwise return false
1255 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1257 if ( compare(0, prefix
.length(), prefix
) != 0 )
1262 // put the rest of the string into provided pointer
1263 rest
->assign(*this, prefix
.length(), npos
);
1270 // check that the string ends with suffix and return the rest of it in the
1271 // provided pointer if it is not NULL, otherwise return false
1272 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1274 int start
= length() - suffix
.length();
1276 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1281 // put the rest of the string into provided pointer
1282 rest
->assign(*this, 0, start
);
1289 // extract nCount last (rightmost) characters
1290 wxString
wxString::Right(size_t nCount
) const
1292 if ( nCount
> length() )
1295 wxString
dest(*this, length() - nCount
, nCount
);
1296 if ( dest
.length() != nCount
) {
1297 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1302 // get all characters after the last occurrence of ch
1303 // (returns the whole string if ch not found)
1304 wxString
wxString::AfterLast(wxUniChar ch
) const
1307 int iPos
= Find(ch
, true);
1308 if ( iPos
== wxNOT_FOUND
)
1311 str
.assign(*this, iPos
+ 1, npos
);
1316 // extract nCount first (leftmost) characters
1317 wxString
wxString::Left(size_t nCount
) const
1319 if ( nCount
> length() )
1322 wxString
dest(*this, 0, nCount
);
1323 if ( dest
.length() != nCount
) {
1324 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1329 // get all characters before the first occurrence of ch
1330 // (returns the whole string if ch not found)
1331 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1333 int iPos
= Find(ch
);
1334 if ( iPos
== wxNOT_FOUND
)
1336 return wxString(*this, 0, iPos
);
1339 /// get all characters before the last occurrence of ch
1340 /// (returns empty string if ch not found)
1341 wxString
wxString::BeforeLast(wxUniChar ch
) const
1344 int iPos
= Find(ch
, true);
1345 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1346 str
= wxString(c_str(), iPos
);
1351 /// get all characters after the first occurrence of ch
1352 /// (returns empty string if ch not found)
1353 wxString
wxString::AfterFirst(wxUniChar ch
) const
1356 int iPos
= Find(ch
);
1357 if ( iPos
!= wxNOT_FOUND
)
1358 str
.assign(*this, iPos
+ 1, npos
);
1363 // replace first (or all) occurrences of some substring with another one
1364 size_t wxString::Replace(const wxString
& strOld
,
1365 const wxString
& strNew
, bool bReplaceAll
)
1367 // if we tried to replace an empty string we'd enter an infinite loop below
1368 wxCHECK_MSG( !strOld
.empty(), 0,
1369 _T("wxString::Replace(): invalid parameter") );
1371 wxSTRING_INVALIDATE_CACHE();
1373 size_t uiCount
= 0; // count of replacements made
1375 // optimize the special common case: replacement of one character by
1376 // another one (in UTF-8 case we can only do this for ASCII characters)
1378 // benchmarks show that this special version is around 3 times faster
1379 // (depending on the proportion of matching characters and UTF-8/wchar_t
1381 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1383 const wxStringCharType chOld
= strOld
.m_impl
[0],
1384 chNew
= strNew
.m_impl
[0];
1386 // this loop is the simplified version of the one below
1387 for ( size_t pos
= 0; ; )
1389 pos
= m_impl
.find(chOld
, pos
);
1393 m_impl
[pos
++] = chNew
;
1401 else if ( !bReplaceAll
)
1403 size_t pos
= m_impl
.find(strOld
, 0);
1406 m_impl
.replace(pos
, strOld
.m_impl
.length(), strNew
.m_impl
);
1410 else // replace all occurrences
1412 const size_t uiOldLen
= strOld
.m_impl
.length();
1413 const size_t uiNewLen
= strNew
.m_impl
.length();
1415 // first scan the string to find all positions at which the replacement
1417 wxVector
<size_t> replacePositions
;
1420 for ( pos
= m_impl
.find(strOld
.m_impl
, 0);
1422 pos
= m_impl
.find(strOld
.m_impl
, pos
+ uiOldLen
))
1424 replacePositions
.push_back(pos
);
1431 // allocate enough memory for the whole new string
1433 tmp
.m_impl
.reserve(m_impl
.length() + uiCount
*(uiNewLen
- uiOldLen
));
1435 // copy this string to tmp doing replacements on the fly
1437 for ( pos
= 0; replNum
< uiCount
; replNum
++ )
1439 const size_t nextReplPos
= replacePositions
[replNum
];
1441 if ( pos
!= nextReplPos
)
1443 tmp
.m_impl
.append(m_impl
, pos
, nextReplPos
- pos
);
1446 tmp
.m_impl
.append(strNew
.m_impl
);
1447 pos
= nextReplPos
+ uiOldLen
;
1450 if ( pos
!= m_impl
.length() )
1452 // append the rest of the string unchanged
1453 tmp
.m_impl
.append(m_impl
, pos
, m_impl
.length() - pos
);
1462 bool wxString::IsAscii() const
1464 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1466 if ( !(*i
).IsAscii() )
1473 bool wxString::IsWord() const
1475 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1477 if ( !wxIsalpha(*i
) )
1484 bool wxString::IsNumber() const
1489 const_iterator i
= begin();
1491 if ( *i
== _T('-') || *i
== _T('+') )
1494 for ( ; i
!= end(); ++i
)
1496 if ( !wxIsdigit(*i
) )
1503 wxString
wxString::Strip(stripType w
) const
1506 if ( w
& leading
) s
.Trim(false);
1507 if ( w
& trailing
) s
.Trim(true);
1511 // ---------------------------------------------------------------------------
1513 // ---------------------------------------------------------------------------
1515 wxString
& wxString::MakeUpper()
1517 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1518 *it
= (wxChar
)wxToupper(*it
);
1523 wxString
& wxString::MakeLower()
1525 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1526 *it
= (wxChar
)wxTolower(*it
);
1531 wxString
& wxString::MakeCapitalized()
1533 const iterator en
= end();
1534 iterator it
= begin();
1537 *it
= (wxChar
)wxToupper(*it
);
1538 for ( ++it
; it
!= en
; ++it
)
1539 *it
= (wxChar
)wxTolower(*it
);
1545 // ---------------------------------------------------------------------------
1546 // trimming and padding
1547 // ---------------------------------------------------------------------------
1549 // some compilers (VC++ 6.0 not to name them) return true for a call to
1550 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1551 // to live with this by checking that the character is a 7 bit one - even if
1552 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1553 // space-like symbols somewhere except in the first 128 chars), it is arguably
1554 // still better than trimming away accented letters
1555 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1557 // trims spaces (in the sense of isspace) from left or right side
1558 wxString
& wxString::Trim(bool bFromRight
)
1560 // first check if we're going to modify the string at all
1563 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1564 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1570 // find last non-space character
1571 reverse_iterator psz
= rbegin();
1572 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1575 // truncate at trailing space start
1576 erase(psz
.base(), end());
1580 // find first non-space character
1581 iterator psz
= begin();
1582 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1585 // fix up data and length
1586 erase(begin(), psz
);
1593 // adds nCount characters chPad to the string from either side
1594 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1596 wxString
s(chPad
, nCount
);
1609 // truncate the string
1610 wxString
& wxString::Truncate(size_t uiLen
)
1612 if ( uiLen
< length() )
1614 erase(begin() + uiLen
, end());
1616 //else: nothing to do, string is already short enough
1621 // ---------------------------------------------------------------------------
1622 // finding (return wxNOT_FOUND if not found and index otherwise)
1623 // ---------------------------------------------------------------------------
1626 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1628 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1630 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1633 // ----------------------------------------------------------------------------
1634 // conversion to numbers
1635 // ----------------------------------------------------------------------------
1637 // The implementation of all the functions below is exactly the same so factor
1638 // it out. Note that number extraction works correctly on UTF-8 strings, so
1639 // we can use wxStringCharType and wx_str() for maximum efficiency.
1642 #define DO_IF_NOT_WINCE(x) x
1644 #define DO_IF_NOT_WINCE(x)
1647 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1648 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1649 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1651 DO_IF_NOT_WINCE( errno = 0; ) \
1653 const wxStringCharType *start = wx_str(); \
1654 wxStringCharType *end; \
1655 T val = func(start, &end, base); \
1657 /* return true only if scan was stopped by the terminating NUL and */ \
1658 /* if the string was not empty to start with and no under/overflow */ \
1660 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1665 bool wxString::ToLong(long *pVal
, int base
) const
1667 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtol
, long);
1670 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1672 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoul
, unsigned long);
1675 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1677 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoll
, wxLongLong_t
);
1680 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1682 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoull
, wxULongLong_t
);
1685 bool wxString::ToDouble(double *pVal
) const
1687 wxCHECK_MSG( pVal
, false, _T("NULL output pointer") );
1689 DO_IF_NOT_WINCE( errno
= 0; )
1691 const wxChar
*start
= c_str();
1693 double val
= wxStrtod(start
, &end
);
1695 // return true only if scan was stopped by the terminating NUL and if the
1696 // string was not empty to start with and no under/overflow occurred
1697 if ( *end
|| end
== start
DO_IF_NOT_WINCE(|| errno
== ERANGE
) )
1705 // ---------------------------------------------------------------------------
1707 // ---------------------------------------------------------------------------
1709 #if !wxUSE_UTF8_LOCALE_ONLY
1711 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1712 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1714 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1718 va_start(argptr
, format
);
1721 s
.PrintfV(format
, argptr
);
1727 #endif // !wxUSE_UTF8_LOCALE_ONLY
1729 #if wxUSE_UNICODE_UTF8
1731 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1734 va_start(argptr
, format
);
1737 s
.PrintfV(format
, argptr
);
1743 #endif // wxUSE_UNICODE_UTF8
1746 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1749 s
.PrintfV(format
, argptr
);
1753 #if !wxUSE_UTF8_LOCALE_ONLY
1754 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1755 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1757 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1761 va_start(argptr
, format
);
1763 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1764 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1765 // because it's the only cast that works safely for downcasting when
1766 // multiple inheritance is used:
1767 wxString
*str
= static_cast<wxString
*>(this);
1769 wxString
*str
= this;
1772 int iLen
= str
->PrintfV(format
, argptr
);
1778 #endif // !wxUSE_UTF8_LOCALE_ONLY
1780 #if wxUSE_UNICODE_UTF8
1781 int wxString::DoPrintfUtf8(const char *format
, ...)
1784 va_start(argptr
, format
);
1786 int iLen
= PrintfV(format
, argptr
);
1792 #endif // wxUSE_UNICODE_UTF8
1795 Uses wxVsnprintf and places the result into the this string.
1797 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1798 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1799 the ISO C99 (and thus SUSv3) standard the return value for the case of
1800 an undersized buffer is inconsistent. For conforming vsnprintf
1801 implementations the function must return the number of characters that
1802 would have been printed had the buffer been large enough. For conforming
1803 vswprintf implementations the function must return a negative number
1806 What vswprintf sets errno to is undefined but Darwin seems to set it to
1807 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1808 those are defined in the standard and backed up by several conformance
1809 statements. Note that ENOMEM mentioned in the manual page does not
1810 apply to swprintf, only wprintf and fwprintf.
1812 Official manual page:
1813 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1815 Some conformance statements (AIX, Solaris):
1816 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1817 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1819 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1820 EILSEQ and EINVAL are specifically defined to mean the error is other than
1821 an undersized buffer and no other errno are defined we treat those two
1822 as meaning hard errors and everything else gets the old behavior which
1823 is to keep looping and increasing buffer size until the function succeeds.
1825 In practice it's impossible to determine before compilation which behavior
1826 may be used. The vswprintf function may have vsnprintf-like behavior or
1827 vice-versa. Behavior detected on one release can theoretically change
1828 with an updated release. Not to mention that configure testing for it
1829 would require the test to be run on the host system, not the build system
1830 which makes cross compilation difficult. Therefore, we make no assumptions
1831 about behavior and try our best to handle every known case, including the
1832 case where wxVsnprintf returns a negative number and fails to set errno.
1834 There is yet one more non-standard implementation and that is our own.
1835 Fortunately, that can be detected at compile-time.
1837 On top of all that, ISO C99 explicitly defines snprintf to write a null
1838 character to the last position of the specified buffer. That would be at
1839 at the given buffer size minus 1. It is supposed to do this even if it
1840 turns out that the buffer is sized too small.
1842 Darwin (tested on 10.5) follows the C99 behavior exactly.
1844 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1845 errno even when it fails. However, it only seems to ever fail due
1846 to an undersized buffer.
1848 #if wxUSE_UNICODE_UTF8
1849 template<typename BufferType
>
1851 // we only need one version in non-UTF8 builds and at least two Windows
1852 // compilers have problems with this function template, so use just one
1853 // normal function here
1855 static int DoStringPrintfV(wxString
& str
,
1856 const wxString
& format
, va_list argptr
)
1862 #if wxUSE_UNICODE_UTF8
1863 BufferType
tmp(str
, size
+ 1);
1864 typename
BufferType::CharType
*buf
= tmp
;
1866 wxStringBuffer
tmp(str
, size
+ 1);
1874 // in UTF-8 build, leaving uninitialized junk in the buffer
1875 // could result in invalid non-empty UTF-8 string, so just
1876 // reset the string to empty on failure:
1881 // wxVsnprintf() may modify the original arg pointer, so pass it
1884 wxVaCopy(argptrcopy
, argptr
);
1887 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1890 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1893 // some implementations of vsnprintf() don't NUL terminate
1894 // the string if there is not enough space for it so
1895 // always do it manually
1896 // FIXME: This really seems to be the wrong and would be an off-by-one
1897 // bug except the code above allocates an extra character.
1898 buf
[size
] = _T('\0');
1900 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1901 // total number of characters which would have been written if the
1902 // buffer were large enough (newer standards such as Unix98)
1905 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1906 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1907 // is true if *both* of them use our own implementation,
1908 // otherwise we can't be sure
1909 #if wxUSE_WXVSNPRINTF
1910 // we know that our own implementation of wxVsnprintf() returns -1
1911 // only for a format error - thus there's something wrong with
1912 // the user's format string
1915 #else // possibly using system version
1916 // assume it only returns error if there is not enough space, but
1917 // as we don't know how much we need, double the current size of
1920 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1921 // If errno was set to one of the two well-known hard errors
1922 // then fail immediately to avoid an infinite loop.
1925 #endif // __WXWINCE__
1926 // still not enough, as we don't know how much we need, double the
1927 // current size of the buffer
1929 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1931 else if ( len
>= size
)
1933 #if wxUSE_WXVSNPRINTF
1934 // we know that our own implementation of wxVsnprintf() returns
1935 // size+1 when there's not enough space but that's not the size
1936 // of the required buffer!
1937 size
*= 2; // so we just double the current size of the buffer
1939 // some vsnprintf() implementations NUL-terminate the buffer and
1940 // some don't in len == size case, to be safe always add 1
1941 // FIXME: I don't quite understand this comment. The vsnprintf
1942 // function is specifically defined to return the number of
1943 // characters printed not including the null terminator.
1944 // So OF COURSE you need to add 1 to get the right buffer size.
1945 // The following line is definitely correct, no question.
1949 else // ok, there was enough space
1955 // we could have overshot
1958 return str
.length();
1961 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
1963 #if wxUSE_UNICODE_UTF8
1964 #if wxUSE_STL_BASED_WXSTRING
1965 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
1967 typedef wxStringInternalBuffer Utf8Buffer
;
1971 #if wxUSE_UTF8_LOCALE_ONLY
1972 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1974 #if wxUSE_UNICODE_UTF8
1975 if ( wxLocaleIsUtf8
)
1976 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1979 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
1981 return DoStringPrintfV(*this, format
, argptr
);
1982 #endif // UTF8/WCHAR
1986 // ----------------------------------------------------------------------------
1987 // misc other operations
1988 // ----------------------------------------------------------------------------
1990 // returns true if the string matches the pattern which may contain '*' and
1991 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1993 bool wxString::Matches(const wxString
& mask
) const
1995 // I disable this code as it doesn't seem to be faster (in fact, it seems
1996 // to be much slower) than the old, hand-written code below and using it
1997 // here requires always linking with libregex even if the user code doesn't
1999 #if 0 // wxUSE_REGEX
2000 // first translate the shell-like mask into a regex
2002 pattern
.reserve(wxStrlen(pszMask
));
2014 pattern
+= _T(".*");
2025 // these characters are special in a RE, quote them
2026 // (however note that we don't quote '[' and ']' to allow
2027 // using them for Unix shell like matching)
2028 pattern
+= _T('\\');
2032 pattern
+= *pszMask
;
2040 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
2041 #else // !wxUSE_REGEX
2042 // TODO: this is, of course, awfully inefficient...
2044 // FIXME-UTF8: implement using iterators, remove #if
2045 #if wxUSE_UNICODE_UTF8
2046 wxWCharBuffer maskBuf
= mask
.wc_str();
2047 wxWCharBuffer txtBuf
= wc_str();
2048 const wxChar
*pszMask
= maskBuf
.data();
2049 const wxChar
*pszTxt
= txtBuf
.data();
2051 const wxChar
*pszMask
= mask
.wx_str();
2052 // the char currently being checked
2053 const wxChar
*pszTxt
= wx_str();
2056 // the last location where '*' matched
2057 const wxChar
*pszLastStarInText
= NULL
;
2058 const wxChar
*pszLastStarInMask
= NULL
;
2061 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
2062 switch ( *pszMask
) {
2064 if ( *pszTxt
== wxT('\0') )
2067 // pszTxt and pszMask will be incremented in the loop statement
2073 // remember where we started to be able to backtrack later
2074 pszLastStarInText
= pszTxt
;
2075 pszLastStarInMask
= pszMask
;
2077 // ignore special chars immediately following this one
2078 // (should this be an error?)
2079 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2082 // if there is nothing more, match
2083 if ( *pszMask
== wxT('\0') )
2086 // are there any other metacharacters in the mask?
2088 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2090 if ( pEndMask
!= NULL
) {
2091 // we have to match the string between two metachars
2092 uiLenMask
= pEndMask
- pszMask
;
2095 // we have to match the remainder of the string
2096 uiLenMask
= wxStrlen(pszMask
);
2099 wxString
strToMatch(pszMask
, uiLenMask
);
2100 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2101 if ( pMatch
== NULL
)
2104 // -1 to compensate "++" in the loop
2105 pszTxt
= pMatch
+ uiLenMask
- 1;
2106 pszMask
+= uiLenMask
- 1;
2111 if ( *pszMask
!= *pszTxt
)
2117 // match only if nothing left
2118 if ( *pszTxt
== wxT('\0') )
2121 // if we failed to match, backtrack if we can
2122 if ( pszLastStarInText
) {
2123 pszTxt
= pszLastStarInText
+ 1;
2124 pszMask
= pszLastStarInMask
;
2126 pszLastStarInText
= NULL
;
2128 // don't bother resetting pszLastStarInMask, it's unnecessary
2134 #endif // wxUSE_REGEX/!wxUSE_REGEX
2137 // Count the number of chars
2138 int wxString::Freq(wxUniChar ch
) const
2141 for ( const_iterator i
= begin(); i
!= end(); ++i
)