1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
38 #include "wx/hashmap.h"
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
54 // ---------------------------------------------------------------------------
55 // static class variables definition
56 // ---------------------------------------------------------------------------
58 //According to STL _must_ be a -1 size_t
59 const size_t wxString::npos
= (size_t) -1;
61 #if wxUSE_STRING_POS_CACHE
63 // gdb seems to be unable to display thread-local variables correctly, at least
64 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
67 struct wxStrCacheDumper
71 puts("*** wxString cache dump:");
72 for ( unsigned n
= 0; n
< wxString::Cache::SIZE
; n
++ )
74 const wxString::Cache::Element
&
75 c
= wxString::GetCacheBegin()[n
];
77 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
79 n
== wxString::LastUsedCacheElement() ? " [*]" : "",
82 (unsigned long)c
.impl
,
88 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
92 #ifdef wxPROFILE_STRING_CACHE
94 wxString::CacheStats
wxString::ms_cacheStats
;
103 const wxString::CacheStats
& stats
= wxString::ms_cacheStats
;
107 puts("*** wxString cache statistics:");
108 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
110 printf("\tHits %u (of which %u not used) or %.2f%%\n",
113 100.*float(stats
.poshits
- stats
.mishits
)/stats
.postot
);
114 printf("\tAverage position requested: %.2f\n",
115 float(stats
.sumpos
) / stats
.postot
);
116 printf("\tAverage offset after cached hint: %.2f\n",
117 float(stats
.sumofs
) / stats
.postot
);
122 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
123 stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
);
128 } // anonymous namespace
130 #endif // wxPROFILE_STRING_CACHE
132 #endif // wxUSE_STRING_POS_CACHE
134 // ----------------------------------------------------------------------------
136 // ----------------------------------------------------------------------------
138 #if wxUSE_STD_IOSTREAM
142 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
144 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
145 return os
<< (const char *)str
.AsCharBuf();
147 return os
<< str
.AsInternal();
151 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
153 return os
<< str
.c_str();
156 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCharBuffer
& str
)
158 return os
<< str
.data();
162 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxWCharBuffer
& str
)
164 return os
<< str
.data();
168 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
170 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
172 return wos
<< str
.wc_str();
175 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
177 return wos
<< str
.AsWChar();
180 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxWCharBuffer
& str
)
182 return wos
<< str
.data();
185 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
187 #endif // wxUSE_STD_IOSTREAM
189 // ===========================================================================
190 // wxString class core
191 // ===========================================================================
193 #if wxUSE_UNICODE_UTF8
195 void wxString::PosLenToImpl(size_t pos
, size_t len
,
196 size_t *implPos
, size_t *implLen
) const
202 else // have valid start position
204 const const_iterator b
= GetIterForNthChar(pos
);
205 *implPos
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin();
210 else // have valid length too
212 // we need to handle the case of length specifying a substring
213 // going beyond the end of the string, just as std::string does
214 const const_iterator
e(end());
216 while ( len
&& i
<= e
)
222 *implLen
= i
.impl() - b
.impl();
227 #endif // wxUSE_UNICODE_UTF8
229 // ----------------------------------------------------------------------------
230 // wxCStrData converted strings caching
231 // ----------------------------------------------------------------------------
233 // FIXME-UTF8: temporarily disabled because it doesn't work with global
234 // string objects; re-enable after fixing this bug and benchmarking
235 // performance to see if using a hash is a good idea at all
238 // For backward compatibility reasons, it must be possible to assign the value
239 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
240 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
241 // because the memory would be freed immediately, but it has to be valid as long
242 // as the string is not modified, so that code like this still works:
244 // const wxChar *s = str.c_str();
245 // while ( s ) { ... }
247 // FIXME-UTF8: not thread safe!
248 // FIXME-UTF8: we currently clear the cached conversion only when the string is
249 // destroyed, but we should do it when the string is modified, to
250 // keep memory usage down
251 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
252 // invalidated the cache on every change, we could keep the previous
254 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
255 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
258 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
260 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
261 if ( i
!= hash
.end() )
269 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
270 // so we have to use wxString* here and const-cast when used
271 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
272 wxStringCharConversionCache
);
273 static wxStringCharConversionCache gs_stringsCharCache
;
275 const char* wxCStrData::AsChar() const
277 // remove previously cache value, if any (see FIXMEs above):
278 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
280 // convert the string and keep it:
281 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
282 m_str
->mb_str().release();
286 #endif // wxUSE_UNICODE
288 #if !wxUSE_UNICODE_WCHAR
289 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
290 wxStringWCharConversionCache
);
291 static wxStringWCharConversionCache gs_stringsWCharCache
;
293 const wchar_t* wxCStrData::AsWChar() const
295 // remove previously cache value, if any (see FIXMEs above):
296 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
298 // convert the string and keep it:
299 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
300 m_str
->wc_str().release();
304 #endif // !wxUSE_UNICODE_WCHAR
306 wxString::~wxString()
309 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
310 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
312 #if !wxUSE_UNICODE_WCHAR
313 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
318 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
319 const char* wxCStrData::AsChar() const
321 #if wxUSE_UNICODE_UTF8
322 if ( wxLocaleIsUtf8
)
325 // under non-UTF8 locales, we have to convert the internal UTF-8
326 // representation using wxConvLibc and cache the result
328 wxString
*str
= wxConstCast(m_str
, wxString
);
330 // convert the string:
332 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
333 // have it) but it's unfortunately not obvious to implement
334 // because we don't know how big buffer do we need for the
335 // given string length (in case of multibyte encodings, e.g.
336 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
338 // One idea would be to store more than just m_convertedToChar
339 // in wxString: then we could record the length of the string
340 // which was converted the last time and try to reuse the same
341 // buffer if the current length is not greater than it (this
342 // could still fail because string could have been modified in
343 // place but it would work most of the time, so we'd do it and
344 // only allocate the new buffer if in-place conversion returned
345 // an error). We could also store a bit saying if the string
346 // was modified since the last conversion (and update it in all
347 // operation modifying the string, of course) to avoid unneeded
348 // consequential conversions. But both of these ideas require
349 // adding more fields to wxString and require profiling results
350 // to be sure that we really gain enough from them to justify
352 wxCharBuffer
buf(str
->mb_str());
354 // if it failed, return empty string and not NULL to avoid crashes in code
355 // written with either wxWidgets 2 wxString or std::string behaviour in
356 // mind: neither of them ever returns NULL and so we shouldn't neither
360 if ( str
->m_convertedToChar
&&
361 strlen(buf
) == strlen(str
->m_convertedToChar
) )
363 // keep the same buffer for as long as possible, so that several calls
364 // to c_str() in a row still work:
365 strcpy(str
->m_convertedToChar
, buf
);
369 str
->m_convertedToChar
= buf
.release();
373 return str
->m_convertedToChar
+ m_offset
;
375 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
377 #if !wxUSE_UNICODE_WCHAR
378 const wchar_t* wxCStrData::AsWChar() const
380 wxString
*str
= wxConstCast(m_str
, wxString
);
382 // convert the string:
383 wxWCharBuffer
buf(str
->wc_str());
385 // notice that here, unlike above in AsChar(), conversion can't fail as our
386 // internal UTF-8 is always well-formed -- or the string was corrupted and
387 // all bets are off anyhow
389 // FIXME-UTF8: do the conversion in-place in the existing buffer
390 if ( str
->m_convertedToWChar
&&
391 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
393 // keep the same buffer for as long as possible, so that several calls
394 // to c_str() in a row still work:
395 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
399 str
->m_convertedToWChar
= buf
.release();
403 return str
->m_convertedToWChar
+ m_offset
;
405 #endif // !wxUSE_UNICODE_WCHAR
407 // ===========================================================================
408 // wxString class core
409 // ===========================================================================
411 // ---------------------------------------------------------------------------
412 // construction and conversion
413 // ---------------------------------------------------------------------------
415 #if wxUSE_UNICODE_WCHAR
417 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
418 const wxMBConv
& conv
)
421 if ( !psz
|| nLength
== 0 )
422 return SubstrBufFromMB(L
"", 0);
424 if ( nLength
== npos
)
428 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
430 return SubstrBufFromMB(_T(""), 0);
432 return SubstrBufFromMB(wcBuf
, wcLen
);
434 #endif // wxUSE_UNICODE_WCHAR
436 #if wxUSE_UNICODE_UTF8
438 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
439 const wxMBConv
& conv
)
442 if ( !psz
|| nLength
== 0 )
443 return SubstrBufFromMB("", 0);
445 // if psz is already in UTF-8, we don't have to do the roundtrip to
446 // wchar_t* and back:
449 // we need to validate the input because UTF8 iterators assume valid
450 // UTF-8 sequence and psz may be invalid:
451 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
453 // we must pass the real string length to SubstrBufFromMB ctor
454 if ( nLength
== npos
)
455 nLength
= psz
? strlen(psz
) : 0;
456 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz
), nLength
);
458 // else: do the roundtrip through wchar_t*
461 if ( nLength
== npos
)
464 // first convert to wide string:
466 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
468 return SubstrBufFromMB("", 0);
470 // and then to UTF-8:
471 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
472 // widechar -> UTF-8 conversion isn't supposed to ever fail:
473 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
477 #endif // wxUSE_UNICODE_UTF8
479 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
481 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
482 const wxMBConv
& conv
)
485 if ( !pwz
|| nLength
== 0 )
486 return SubstrBufFromWC("", 0);
488 if ( nLength
== npos
)
492 wxCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
494 return SubstrBufFromWC("", 0);
496 return SubstrBufFromWC(mbBuf
, mbLen
);
498 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
501 #if wxUSE_UNICODE_WCHAR
503 //Convert wxString in Unicode mode to a multi-byte string
504 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
506 return conv
.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL
);
509 #elif wxUSE_UNICODE_UTF8
511 const wxWCharBuffer
wxString::wc_str() const
513 return wxMBConvStrictUTF8().cMB2WC
516 m_impl
.length() + 1, // size, not length
521 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
524 return wxCharBuffer::CreateNonOwned(m_impl
.c_str());
526 // FIXME-UTF8: use wc_str() here once we have buffers with length
529 wxWCharBuffer
wcBuf(wxMBConvStrictUTF8().cMB2WC
532 m_impl
.length() + 1, // size
536 return wxCharBuffer("");
538 return conv
.cWC2MB(wcBuf
, wcLen
+1, NULL
);
543 //Converts this string to a wide character string if unicode
544 //mode is not enabled and wxUSE_WCHAR_T is enabled
545 const wxWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
547 return conv
.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL
);
550 #endif // Unicode/ANSI
552 // shrink to minimal size (releasing extra memory)
553 bool wxString::Shrink()
555 wxString
tmp(begin(), end());
557 return tmp
.length() == length();
560 // deprecated compatibility code:
561 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
562 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
564 return DoGetWriteBuf(nLen
);
567 void wxString::UngetWriteBuf()
572 void wxString::UngetWriteBuf(size_t nLen
)
574 DoUngetWriteBuf(nLen
);
576 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
579 // ---------------------------------------------------------------------------
581 // ---------------------------------------------------------------------------
583 // all functions are inline in string.h
585 // ---------------------------------------------------------------------------
586 // concatenation operators
587 // ---------------------------------------------------------------------------
590 * concatenation functions come in 5 flavours:
592 * char + string and string + char
593 * C str + string and string + C str
596 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
598 #if !wxUSE_STL_BASED_WXSTRING
599 wxASSERT( str1
.IsValid() );
600 wxASSERT( str2
.IsValid() );
609 wxString
operator+(const wxString
& str
, wxUniChar ch
)
611 #if !wxUSE_STL_BASED_WXSTRING
612 wxASSERT( str
.IsValid() );
621 wxString
operator+(wxUniChar ch
, const wxString
& str
)
623 #if !wxUSE_STL_BASED_WXSTRING
624 wxASSERT( str
.IsValid() );
633 wxString
operator+(const wxString
& str
, const char *psz
)
635 #if !wxUSE_STL_BASED_WXSTRING
636 wxASSERT( str
.IsValid() );
640 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
641 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
649 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
651 #if !wxUSE_STL_BASED_WXSTRING
652 wxASSERT( str
.IsValid() );
656 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
657 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
665 wxString
operator+(const char *psz
, const wxString
& str
)
667 #if !wxUSE_STL_BASED_WXSTRING
668 wxASSERT( str
.IsValid() );
672 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
673 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
681 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
683 #if !wxUSE_STL_BASED_WXSTRING
684 wxASSERT( str
.IsValid() );
688 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
689 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
697 // ---------------------------------------------------------------------------
699 // ---------------------------------------------------------------------------
701 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
703 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
704 : wxToupper(GetChar(0u)) == wxToupper(c
));
707 #ifdef HAVE_STD_STRING_COMPARE
709 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
710 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
711 // sort strings in characters code point order by sorting the byte sequence
712 // in byte values order (i.e. what strcmp() and memcmp() do).
714 int wxString::compare(const wxString
& str
) const
716 return m_impl
.compare(str
.m_impl
);
719 int wxString::compare(size_t nStart
, size_t nLen
,
720 const wxString
& str
) const
723 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
724 return m_impl
.compare(pos
, len
, str
.m_impl
);
727 int wxString::compare(size_t nStart
, size_t nLen
,
729 size_t nStart2
, size_t nLen2
) const
732 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
735 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
737 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
740 int wxString::compare(const char* sz
) const
742 return m_impl
.compare(ImplStr(sz
));
745 int wxString::compare(const wchar_t* sz
) const
747 return m_impl
.compare(ImplStr(sz
));
750 int wxString::compare(size_t nStart
, size_t nLen
,
751 const char* sz
, size_t nCount
) const
754 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
756 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
758 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
761 int wxString::compare(size_t nStart
, size_t nLen
,
762 const wchar_t* sz
, size_t nCount
) const
765 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
767 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
769 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
772 #else // !HAVE_STD_STRING_COMPARE
774 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
775 const wxStringCharType
* s2
, size_t l2
)
778 return wxStringMemcmp(s1
, s2
, l1
);
781 int ret
= wxStringMemcmp(s1
, s2
, l1
);
782 return ret
== 0 ? -1 : ret
;
786 int ret
= wxStringMemcmp(s1
, s2
, l2
);
787 return ret
== 0 ? +1 : ret
;
791 int wxString::compare(const wxString
& str
) const
793 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
794 str
.m_impl
.data(), str
.m_impl
.length());
797 int wxString::compare(size_t nStart
, size_t nLen
,
798 const wxString
& str
) const
800 wxASSERT(nStart
<= length());
801 size_type strLen
= length() - nStart
;
802 nLen
= strLen
< nLen
? strLen
: nLen
;
805 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
807 return ::wxDoCmp(m_impl
.data() + pos
, len
,
808 str
.m_impl
.data(), str
.m_impl
.length());
811 int wxString::compare(size_t nStart
, size_t nLen
,
813 size_t nStart2
, size_t nLen2
) const
815 wxASSERT(nStart
<= length());
816 wxASSERT(nStart2
<= str
.length());
817 size_type strLen
= length() - nStart
,
818 strLen2
= str
.length() - nStart2
;
819 nLen
= strLen
< nLen
? strLen
: nLen
;
820 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
823 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
825 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
827 return ::wxDoCmp(m_impl
.data() + pos
, len
,
828 str
.m_impl
.data() + pos2
, len2
);
831 int wxString::compare(const char* sz
) const
833 SubstrBufFromMB
str(ImplStr(sz
, npos
));
834 if ( str
.len
== npos
)
835 str
.len
= wxStringStrlen(str
.data
);
836 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
839 int wxString::compare(const wchar_t* sz
) const
841 SubstrBufFromWC
str(ImplStr(sz
, npos
));
842 if ( str
.len
== npos
)
843 str
.len
= wxStringStrlen(str
.data
);
844 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
847 int wxString::compare(size_t nStart
, size_t nLen
,
848 const char* sz
, size_t nCount
) const
850 wxASSERT(nStart
<= length());
851 size_type strLen
= length() - nStart
;
852 nLen
= strLen
< nLen
? strLen
: nLen
;
855 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
857 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
858 if ( str
.len
== npos
)
859 str
.len
= wxStringStrlen(str
.data
);
861 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
864 int wxString::compare(size_t nStart
, size_t nLen
,
865 const wchar_t* sz
, size_t nCount
) const
867 wxASSERT(nStart
<= length());
868 size_type strLen
= length() - nStart
;
869 nLen
= strLen
< nLen
? strLen
: nLen
;
872 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
874 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
875 if ( str
.len
== npos
)
876 str
.len
= wxStringStrlen(str
.data
);
878 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
881 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
884 // ---------------------------------------------------------------------------
885 // find_{first,last}_[not]_of functions
886 // ---------------------------------------------------------------------------
888 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
890 // NB: All these functions are implemented with the argument being wxChar*,
891 // i.e. widechar string in any Unicode build, even though native string
892 // representation is char* in the UTF-8 build. This is because we couldn't
893 // use memchr() to determine if a character is in a set encoded as UTF-8.
895 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
897 return find_first_of(sz
, nStart
, wxStrlen(sz
));
900 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
902 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
905 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
907 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
910 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
912 if ( wxTmemchr(sz
, *i
, n
) )
919 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
921 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
924 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
926 if ( !wxTmemchr(sz
, *i
, n
) )
934 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
936 return find_last_of(sz
, nStart
, wxStrlen(sz
));
939 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
941 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
944 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
946 size_t len
= length();
948 if ( nStart
== npos
)
954 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
958 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
959 i
!= rend(); --idx
, ++i
)
961 if ( wxTmemchr(sz
, *i
, n
) )
968 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
970 size_t len
= length();
972 if ( nStart
== npos
)
978 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
982 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
983 i
!= rend(); --idx
, ++i
)
985 if ( !wxTmemchr(sz
, *i
, n
) )
992 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
994 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
997 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1006 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1008 size_t len
= length();
1010 if ( nStart
== npos
)
1016 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1019 size_t idx
= nStart
;
1020 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1021 i
!= rend(); --idx
, ++i
)
1030 // the functions above were implemented for wchar_t* arguments in Unicode
1031 // build and char* in ANSI build; below are implementations for the other
1034 #define wxOtherCharType char
1035 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1037 #define wxOtherCharType wchar_t
1038 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1041 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1042 { return find_first_of(STRCONV(sz
), nStart
); }
1044 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1046 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1047 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1048 { return find_last_of(STRCONV(sz
), nStart
); }
1049 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1051 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1052 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1053 { return find_first_not_of(STRCONV(sz
), nStart
); }
1054 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1056 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1057 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1058 { return find_last_not_of(STRCONV(sz
), nStart
); }
1059 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1061 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1063 #undef wxOtherCharType
1066 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1068 // ===========================================================================
1069 // other common string functions
1070 // ===========================================================================
1072 int wxString::CmpNoCase(const wxString
& s
) const
1074 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1076 const_iterator i1
= begin();
1077 const_iterator end1
= end();
1078 const_iterator i2
= s
.begin();
1079 const_iterator end2
= s
.end();
1081 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1083 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1084 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1085 if ( lower1
!= lower2
)
1086 return lower1
< lower2
? -1 : 1;
1089 size_t len1
= length();
1090 size_t len2
= s
.length();
1094 else if ( len1
> len2
)
1103 #ifndef __SCHAR_MAX__
1104 #define __SCHAR_MAX__ 127
1108 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1110 if (!ascii
|| len
== 0)
1111 return wxEmptyString
;
1116 wxStringInternalBuffer
buf(res
, len
);
1117 wxStringCharType
*dest
= buf
;
1119 for ( ; len
> 0; --len
)
1121 unsigned char c
= (unsigned char)*ascii
++;
1122 wxASSERT_MSG( c
< 0x80,
1123 _T("Non-ASCII value passed to FromAscii().") );
1125 *dest
++ = (wchar_t)c
;
1132 wxString
wxString::FromAscii(const char *ascii
)
1134 return FromAscii(ascii
, wxStrlen(ascii
));
1137 wxString
wxString::FromAscii(char ascii
)
1139 // What do we do with '\0' ?
1141 unsigned char c
= (unsigned char)ascii
;
1143 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
1145 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1146 return wxString(wxUniChar((wchar_t)c
));
1149 const wxCharBuffer
wxString::ToAscii() const
1151 // this will allocate enough space for the terminating NUL too
1152 wxCharBuffer
buffer(length());
1153 char *dest
= buffer
.data();
1155 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1158 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1159 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1161 // the output string can't have embedded NULs anyhow, so we can safely
1162 // stop at first of them even if we do have any
1170 #endif // wxUSE_UNICODE
1172 // extract string of length nCount starting at nFirst
1173 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1175 size_t nLen
= length();
1177 // default value of nCount is npos and means "till the end"
1178 if ( nCount
== npos
)
1180 nCount
= nLen
- nFirst
;
1183 // out-of-bounds requests return sensible things
1184 if ( nFirst
+ nCount
> nLen
)
1186 nCount
= nLen
- nFirst
;
1189 if ( nFirst
> nLen
)
1191 // AllocCopy() will return empty string
1192 return wxEmptyString
;
1195 wxString
dest(*this, nFirst
, nCount
);
1196 if ( dest
.length() != nCount
)
1198 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1204 // check that the string starts with prefix and return the rest of the string
1205 // in the provided pointer if it is not NULL, otherwise return false
1206 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1208 if ( compare(0, prefix
.length(), prefix
) != 0 )
1213 // put the rest of the string into provided pointer
1214 rest
->assign(*this, prefix
.length(), npos
);
1221 // check that the string ends with suffix and return the rest of it in the
1222 // provided pointer if it is not NULL, otherwise return false
1223 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1225 int start
= length() - suffix
.length();
1227 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1232 // put the rest of the string into provided pointer
1233 rest
->assign(*this, 0, start
);
1240 // extract nCount last (rightmost) characters
1241 wxString
wxString::Right(size_t nCount
) const
1243 if ( nCount
> length() )
1246 wxString
dest(*this, length() - nCount
, nCount
);
1247 if ( dest
.length() != nCount
) {
1248 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1253 // get all characters after the last occurence of ch
1254 // (returns the whole string if ch not found)
1255 wxString
wxString::AfterLast(wxUniChar ch
) const
1258 int iPos
= Find(ch
, true);
1259 if ( iPos
== wxNOT_FOUND
)
1262 str
= wx_str() + iPos
+ 1;
1267 // extract nCount first (leftmost) characters
1268 wxString
wxString::Left(size_t nCount
) const
1270 if ( nCount
> length() )
1273 wxString
dest(*this, 0, nCount
);
1274 if ( dest
.length() != nCount
) {
1275 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1280 // get all characters before the first occurence of ch
1281 // (returns the whole string if ch not found)
1282 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1284 int iPos
= Find(ch
);
1285 if ( iPos
== wxNOT_FOUND
) iPos
= length();
1286 return wxString(*this, 0, iPos
);
1289 /// get all characters before the last occurence of ch
1290 /// (returns empty string if ch not found)
1291 wxString
wxString::BeforeLast(wxUniChar ch
) const
1294 int iPos
= Find(ch
, true);
1295 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1296 str
= wxString(c_str(), iPos
);
1301 /// get all characters after the first occurence of ch
1302 /// (returns empty string if ch not found)
1303 wxString
wxString::AfterFirst(wxUniChar ch
) const
1306 int iPos
= Find(ch
);
1307 if ( iPos
!= wxNOT_FOUND
)
1308 str
= wx_str() + iPos
+ 1;
1313 // replace first (or all) occurences of some substring with another one
1314 size_t wxString::Replace(const wxString
& strOld
,
1315 const wxString
& strNew
, bool bReplaceAll
)
1317 // if we tried to replace an empty string we'd enter an infinite loop below
1318 wxCHECK_MSG( !strOld
.empty(), 0,
1319 _T("wxString::Replace(): invalid parameter") );
1321 wxSTRING_INVALIDATE_CACHE();
1323 size_t uiCount
= 0; // count of replacements made
1325 // optimize the special common case: replacement of one character by
1326 // another one (in UTF-8 case we can only do this for ASCII characters)
1328 // benchmarks show that this special version is around 3 times faster
1329 // (depending on the proportion of matching characters and UTF-8/wchar_t
1331 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1333 const wxStringCharType chOld
= strOld
.m_impl
[0],
1334 chNew
= strNew
.m_impl
[0];
1336 // this loop is the simplified version of the one below
1337 for ( size_t pos
= 0; ; )
1339 pos
= m_impl
.find(chOld
, pos
);
1343 m_impl
[pos
++] = chNew
;
1351 else // general case
1353 const size_t uiOldLen
= strOld
.m_impl
.length();
1354 const size_t uiNewLen
= strNew
.m_impl
.length();
1356 for ( size_t pos
= 0; ; )
1358 pos
= m_impl
.find(strOld
.m_impl
, pos
);
1362 // replace this occurrence of the old string with the new one
1363 m_impl
.replace(pos
, uiOldLen
, strNew
.m_impl
);
1365 // move up pos past the string that was replaced
1368 // increase replace count
1371 // stop after the first one?
1380 bool wxString::IsAscii() const
1382 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1384 if ( !(*i
).IsAscii() )
1391 bool wxString::IsWord() const
1393 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1395 if ( !wxIsalpha(*i
) )
1402 bool wxString::IsNumber() const
1407 const_iterator i
= begin();
1409 if ( *i
== _T('-') || *i
== _T('+') )
1412 for ( ; i
!= end(); ++i
)
1414 if ( !wxIsdigit(*i
) )
1421 wxString
wxString::Strip(stripType w
) const
1424 if ( w
& leading
) s
.Trim(false);
1425 if ( w
& trailing
) s
.Trim(true);
1429 // ---------------------------------------------------------------------------
1431 // ---------------------------------------------------------------------------
1433 wxString
& wxString::MakeUpper()
1435 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1436 *it
= (wxChar
)wxToupper(*it
);
1441 wxString
& wxString::MakeLower()
1443 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1444 *it
= (wxChar
)wxTolower(*it
);
1449 wxString
& wxString::MakeCapitalized()
1451 const iterator en
= end();
1452 iterator it
= begin();
1455 *it
= (wxChar
)wxToupper(*it
);
1456 for ( ++it
; it
!= en
; ++it
)
1457 *it
= (wxChar
)wxTolower(*it
);
1463 // ---------------------------------------------------------------------------
1464 // trimming and padding
1465 // ---------------------------------------------------------------------------
1467 // some compilers (VC++ 6.0 not to name them) return true for a call to
1468 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1469 // to live with this by checking that the character is a 7 bit one - even if
1470 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1471 // space-like symbols somewhere except in the first 128 chars), it is arguably
1472 // still better than trimming away accented letters
1473 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1475 // trims spaces (in the sense of isspace) from left or right side
1476 wxString
& wxString::Trim(bool bFromRight
)
1478 // first check if we're going to modify the string at all
1481 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1482 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1488 // find last non-space character
1489 reverse_iterator psz
= rbegin();
1490 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1493 // truncate at trailing space start
1494 erase(psz
.base(), end());
1498 // find first non-space character
1499 iterator psz
= begin();
1500 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1503 // fix up data and length
1504 erase(begin(), psz
);
1511 // adds nCount characters chPad to the string from either side
1512 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1514 wxString
s(chPad
, nCount
);
1527 // truncate the string
1528 wxString
& wxString::Truncate(size_t uiLen
)
1530 if ( uiLen
< length() )
1532 erase(begin() + uiLen
, end());
1534 //else: nothing to do, string is already short enough
1539 // ---------------------------------------------------------------------------
1540 // finding (return wxNOT_FOUND if not found and index otherwise)
1541 // ---------------------------------------------------------------------------
1544 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1546 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1548 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1551 // ----------------------------------------------------------------------------
1552 // conversion to numbers
1553 // ----------------------------------------------------------------------------
1555 // The implementation of all the functions below is exactly the same so factor
1556 // it out. Note that number extraction works correctly on UTF-8 strings, so
1557 // we can use wxStringCharType and wx_str() for maximum efficiency.
1560 #define DO_IF_NOT_WINCE(x) x
1562 #define DO_IF_NOT_WINCE(x)
1565 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1566 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1567 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1569 DO_IF_NOT_WINCE( errno = 0; ) \
1571 const wxStringCharType *start = wx_str(); \
1572 wxStringCharType *end; \
1573 T val = func(start, &end, base); \
1575 /* return true only if scan was stopped by the terminating NUL and */ \
1576 /* if the string was not empty to start with and no under/overflow */ \
1578 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1583 bool wxString::ToLong(long *pVal
, int base
) const
1585 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtol
, long);
1588 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1590 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoul
, unsigned long);
1593 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1595 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoll
, wxLongLong_t
);
1598 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1600 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoull
, wxULongLong_t
);
1603 bool wxString::ToDouble(double *pVal
) const
1605 wxCHECK_MSG( pVal
, false, _T("NULL output pointer") );
1607 DO_IF_NOT_WINCE( errno
= 0; )
1609 const wxChar
*start
= c_str();
1611 double val
= wxStrtod(start
, &end
);
1613 // return true only if scan was stopped by the terminating NUL and if the
1614 // string was not empty to start with and no under/overflow occurred
1615 if ( *end
|| end
== start
DO_IF_NOT_WINCE(|| errno
== ERANGE
) )
1623 // ---------------------------------------------------------------------------
1625 // ---------------------------------------------------------------------------
1627 #if !wxUSE_UTF8_LOCALE_ONLY
1629 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1630 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1632 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1636 va_start(argptr
, format
);
1639 s
.PrintfV(format
, argptr
);
1645 #endif // !wxUSE_UTF8_LOCALE_ONLY
1647 #if wxUSE_UNICODE_UTF8
1649 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1652 va_start(argptr
, format
);
1655 s
.PrintfV(format
, argptr
);
1661 #endif // wxUSE_UNICODE_UTF8
1664 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1667 s
.PrintfV(format
, argptr
);
1671 #if !wxUSE_UTF8_LOCALE_ONLY
1672 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1673 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1675 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1679 va_start(argptr
, format
);
1681 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1682 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1683 // because it's the only cast that works safely for downcasting when
1684 // multiple inheritance is used:
1685 wxString
*str
= static_cast<wxString
*>(this);
1687 wxString
*str
= this;
1690 int iLen
= str
->PrintfV(format
, argptr
);
1696 #endif // !wxUSE_UTF8_LOCALE_ONLY
1698 #if wxUSE_UNICODE_UTF8
1699 int wxString::DoPrintfUtf8(const char *format
, ...)
1702 va_start(argptr
, format
);
1704 int iLen
= PrintfV(format
, argptr
);
1710 #endif // wxUSE_UNICODE_UTF8
1713 Uses wxVsnprintf and places the result into the this string.
1715 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1716 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1717 the ISO C99 (and thus SUSv3) standard the return value for the case of
1718 an undersized buffer is inconsistent. For conforming vsnprintf
1719 implementations the function must return the number of characters that
1720 would have been printed had the buffer been large enough. For conforming
1721 vswprintf implementations the function must return a negative number
1724 What vswprintf sets errno to is undefined but Darwin seems to set it to
1725 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1726 those are defined in the standard and backed up by several conformance
1727 statements. Note that ENOMEM mentioned in the manual page does not
1728 apply to swprintf, only wprintf and fwprintf.
1730 Official manual page:
1731 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1733 Some conformance statements (AIX, Solaris):
1734 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1735 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1737 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1738 EILSEQ and EINVAL are specifically defined to mean the error is other than
1739 an undersized buffer and no other errno are defined we treat those two
1740 as meaning hard errors and everything else gets the old behavior which
1741 is to keep looping and increasing buffer size until the function succeeds.
1743 In practice it's impossible to determine before compilation which behavior
1744 may be used. The vswprintf function may have vsnprintf-like behavior or
1745 vice-versa. Behavior detected on one release can theoretically change
1746 with an updated release. Not to mention that configure testing for it
1747 would require the test to be run on the host system, not the build system
1748 which makes cross compilation difficult. Therefore, we make no assumptions
1749 about behavior and try our best to handle every known case, including the
1750 case where wxVsnprintf returns a negative number and fails to set errno.
1752 There is yet one more non-standard implementation and that is our own.
1753 Fortunately, that can be detected at compile-time.
1755 On top of all that, ISO C99 explicitly defines snprintf to write a null
1756 character to the last position of the specified buffer. That would be at
1757 at the given buffer size minus 1. It is supposed to do this even if it
1758 turns out that the buffer is sized too small.
1760 Darwin (tested on 10.5) follows the C99 behavior exactly.
1762 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1763 errno even when it fails. However, it only seems to ever fail due
1764 to an undersized buffer.
1766 #if wxUSE_UNICODE_UTF8
1767 template<typename BufferType
>
1769 // we only need one version in non-UTF8 builds and at least two Windows
1770 // compilers have problems with this function template, so use just one
1771 // normal function here
1773 static int DoStringPrintfV(wxString
& str
,
1774 const wxString
& format
, va_list argptr
)
1780 #if wxUSE_UNICODE_UTF8
1781 BufferType
tmp(str
, size
+ 1);
1782 typename
BufferType::CharType
*buf
= tmp
;
1784 wxStringBuffer
tmp(str
, size
+ 1);
1792 // in UTF-8 build, leaving uninitialized junk in the buffer
1793 // could result in invalid non-empty UTF-8 string, so just
1794 // reset the string to empty on failure:
1799 // wxVsnprintf() may modify the original arg pointer, so pass it
1802 wxVaCopy(argptrcopy
, argptr
);
1805 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1808 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1811 // some implementations of vsnprintf() don't NUL terminate
1812 // the string if there is not enough space for it so
1813 // always do it manually
1814 // FIXME: This really seems to be the wrong and would be an off-by-one
1815 // bug except the code above allocates an extra character.
1816 buf
[size
] = _T('\0');
1818 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1819 // total number of characters which would have been written if the
1820 // buffer were large enough (newer standards such as Unix98)
1823 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1824 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1825 // is true if *both* of them use our own implementation,
1826 // otherwise we can't be sure
1827 #if wxUSE_WXVSNPRINTF
1828 // we know that our own implementation of wxVsnprintf() returns -1
1829 // only for a format error - thus there's something wrong with
1830 // the user's format string
1833 #else // possibly using system version
1834 // assume it only returns error if there is not enough space, but
1835 // as we don't know how much we need, double the current size of
1838 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1839 // If errno was set to one of the two well-known hard errors
1840 // then fail immediately to avoid an infinite loop.
1843 #endif // __WXWINCE__
1844 // still not enough, as we don't know how much we need, double the
1845 // current size of the buffer
1847 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1849 else if ( len
>= size
)
1851 #if wxUSE_WXVSNPRINTF
1852 // we know that our own implementation of wxVsnprintf() returns
1853 // size+1 when there's not enough space but that's not the size
1854 // of the required buffer!
1855 size
*= 2; // so we just double the current size of the buffer
1857 // some vsnprintf() implementations NUL-terminate the buffer and
1858 // some don't in len == size case, to be safe always add 1
1859 // FIXME: I don't quite understand this comment. The vsnprintf
1860 // function is specifically defined to return the number of
1861 // characters printed not including the null terminator.
1862 // So OF COURSE you need to add 1 to get the right buffer size.
1863 // The following line is definitely correct, no question.
1867 else // ok, there was enough space
1873 // we could have overshot
1876 return str
.length();
1879 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
1881 #if wxUSE_UNICODE_UTF8
1882 #if wxUSE_STL_BASED_WXSTRING
1883 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
1885 typedef wxStringInternalBuffer Utf8Buffer
;
1889 #if wxUSE_UTF8_LOCALE_ONLY
1890 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1892 #if wxUSE_UNICODE_UTF8
1893 if ( wxLocaleIsUtf8
)
1894 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1897 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
1899 return DoStringPrintfV(*this, format
, argptr
);
1900 #endif // UTF8/WCHAR
1904 // ----------------------------------------------------------------------------
1905 // misc other operations
1906 // ----------------------------------------------------------------------------
1908 // returns true if the string matches the pattern which may contain '*' and
1909 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1911 bool wxString::Matches(const wxString
& mask
) const
1913 // I disable this code as it doesn't seem to be faster (in fact, it seems
1914 // to be much slower) than the old, hand-written code below and using it
1915 // here requires always linking with libregex even if the user code doesn't
1917 #if 0 // wxUSE_REGEX
1918 // first translate the shell-like mask into a regex
1920 pattern
.reserve(wxStrlen(pszMask
));
1932 pattern
+= _T(".*");
1943 // these characters are special in a RE, quote them
1944 // (however note that we don't quote '[' and ']' to allow
1945 // using them for Unix shell like matching)
1946 pattern
+= _T('\\');
1950 pattern
+= *pszMask
;
1958 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
1959 #else // !wxUSE_REGEX
1960 // TODO: this is, of course, awfully inefficient...
1962 // FIXME-UTF8: implement using iterators, remove #if
1963 #if wxUSE_UNICODE_UTF8
1964 wxWCharBuffer maskBuf
= mask
.wc_str();
1965 wxWCharBuffer txtBuf
= wc_str();
1966 const wxChar
*pszMask
= maskBuf
.data();
1967 const wxChar
*pszTxt
= txtBuf
.data();
1969 const wxChar
*pszMask
= mask
.wx_str();
1970 // the char currently being checked
1971 const wxChar
*pszTxt
= wx_str();
1974 // the last location where '*' matched
1975 const wxChar
*pszLastStarInText
= NULL
;
1976 const wxChar
*pszLastStarInMask
= NULL
;
1979 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
1980 switch ( *pszMask
) {
1982 if ( *pszTxt
== wxT('\0') )
1985 // pszTxt and pszMask will be incremented in the loop statement
1991 // remember where we started to be able to backtrack later
1992 pszLastStarInText
= pszTxt
;
1993 pszLastStarInMask
= pszMask
;
1995 // ignore special chars immediately following this one
1996 // (should this be an error?)
1997 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2000 // if there is nothing more, match
2001 if ( *pszMask
== wxT('\0') )
2004 // are there any other metacharacters in the mask?
2006 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2008 if ( pEndMask
!= NULL
) {
2009 // we have to match the string between two metachars
2010 uiLenMask
= pEndMask
- pszMask
;
2013 // we have to match the remainder of the string
2014 uiLenMask
= wxStrlen(pszMask
);
2017 wxString
strToMatch(pszMask
, uiLenMask
);
2018 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2019 if ( pMatch
== NULL
)
2022 // -1 to compensate "++" in the loop
2023 pszTxt
= pMatch
+ uiLenMask
- 1;
2024 pszMask
+= uiLenMask
- 1;
2029 if ( *pszMask
!= *pszTxt
)
2035 // match only if nothing left
2036 if ( *pszTxt
== wxT('\0') )
2039 // if we failed to match, backtrack if we can
2040 if ( pszLastStarInText
) {
2041 pszTxt
= pszLastStarInText
+ 1;
2042 pszMask
= pszLastStarInMask
;
2044 pszLastStarInText
= NULL
;
2046 // don't bother resetting pszLastStarInMask, it's unnecessary
2052 #endif // wxUSE_REGEX/!wxUSE_REGEX
2055 // Count the number of chars
2056 int wxString::Freq(wxUniChar ch
) const
2059 for ( const_iterator i
= begin(); i
!= end(); ++i
)
2067 // ----------------------------------------------------------------------------
2068 // wxUTF8StringBuffer
2069 // ----------------------------------------------------------------------------
2071 #if wxUSE_UNICODE_WCHAR
2072 wxUTF8StringBuffer::~wxUTF8StringBuffer()
2074 wxMBConvStrictUTF8 conv
;
2075 size_t wlen
= conv
.ToWChar(NULL
, 0, m_buf
);
2076 wxCHECK_RET( wlen
!= wxCONV_FAILED
, "invalid UTF-8 data in string buffer?" );
2078 wxStringInternalBuffer
wbuf(m_str
, wlen
);
2079 conv
.ToWChar(wbuf
, wlen
, m_buf
);
2082 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2084 wxCHECK_RET(m_lenSet
, "length not set");
2086 wxMBConvStrictUTF8 conv
;
2087 size_t wlen
= conv
.ToWChar(NULL
, 0, m_buf
, m_len
);
2088 wxCHECK_RET( wlen
!= wxCONV_FAILED
, "invalid UTF-8 data in string buffer?" );
2090 wxStringInternalBufferLength
wbuf(m_str
, wlen
);
2091 conv
.ToWChar(wbuf
, wlen
, m_buf
, m_len
);
2092 wbuf
.SetLength(wlen
);
2094 #endif // wxUSE_UNICODE_WCHAR
2096 // ----------------------------------------------------------------------------
2097 // wxCharBufferType<T>
2098 // ----------------------------------------------------------------------------
2101 wxCharTypeBuffer
<char>::Data
2102 wxCharTypeBuffer
<char>::NullData(NULL
);
2105 wxCharTypeBuffer
<wchar_t>::Data
2106 wxCharTypeBuffer
<wchar_t>::NullData(NULL
);