1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
38 #include "wx/hashmap.h"
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
54 // ---------------------------------------------------------------------------
55 // static class variables definition
56 // ---------------------------------------------------------------------------
58 //According to STL _must_ be a -1 size_t
59 const size_t wxString::npos
= (size_t) -1;
61 #if wxUSE_STRING_POS_CACHE
63 #ifdef wxHAS_COMPILER_TLS
65 wxTLS_TYPE(wxString::Cache
) wxString::ms_cache
;
67 #else // !wxHAS_COMPILER_TLS
69 struct wxStrCacheInitializer
71 wxStrCacheInitializer()
73 // calling this function triggers s_cache initialization in it, and
74 // from now on it becomes safe to call from multiple threads
80 wxString::Cache& wxString::GetCache()
82 static wxTLS_TYPE(Cache) s_cache;
84 return wxTLS_VALUE(s_cache);
88 static wxStrCacheInitializer gs_stringCacheInit
;
90 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
92 // gdb seems to be unable to display thread-local variables correctly, at least
93 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
96 struct wxStrCacheDumper
100 puts("*** wxString cache dump:");
101 for ( unsigned n
= 0; n
< wxString::Cache::SIZE
; n
++ )
103 const wxString::Cache::Element
&
104 c
= wxString::GetCacheBegin()[n
];
106 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
108 n
== wxString::LastUsedCacheElement() ? " [*]" : "",
110 (unsigned long)c
.pos
,
111 (unsigned long)c
.impl
,
117 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
119 #endif // __WXDEBUG__
121 #ifdef wxPROFILE_STRING_CACHE
123 wxString::CacheStats
wxString::ms_cacheStats
;
125 struct wxStrCacheStatsDumper
127 ~wxStrCacheStatsDumper()
129 const wxString::CacheStats
& stats
= wxString::ms_cacheStats
;
133 puts("*** wxString cache statistics:");
134 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
136 printf("\tHits %u (of which %u not used) or %.2f%%\n",
139 100.*float(stats
.poshits
- stats
.mishits
)/stats
.postot
);
140 printf("\tAverage position requested: %.2f\n",
141 float(stats
.sumpos
) / stats
.postot
);
142 printf("\tAverage offset after cached hint: %.2f\n",
143 float(stats
.sumofs
) / stats
.postot
);
148 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
149 stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
);
154 static wxStrCacheStatsDumper s_showCacheStats
;
156 #endif // wxPROFILE_STRING_CACHE
158 #endif // wxUSE_STRING_POS_CACHE
160 // ----------------------------------------------------------------------------
162 // ----------------------------------------------------------------------------
164 #if wxUSE_STD_IOSTREAM
168 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
170 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
171 return os
<< (const char *)str
.AsCharBuf();
173 return os
<< str
.AsInternal();
177 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
179 return os
<< str
.c_str();
182 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCharBuffer
& str
)
184 return os
<< str
.data();
188 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxWCharBuffer
& str
)
190 return os
<< str
.data();
194 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
196 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
198 return wos
<< str
.wc_str();
201 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
203 return wos
<< str
.AsWChar();
206 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxWCharBuffer
& str
)
208 return wos
<< str
.data();
211 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
213 #endif // wxUSE_STD_IOSTREAM
215 // ===========================================================================
216 // wxString class core
217 // ===========================================================================
219 #if wxUSE_UNICODE_UTF8
221 void wxString::PosLenToImpl(size_t pos
, size_t len
,
222 size_t *implPos
, size_t *implLen
) const
228 else // have valid start position
230 const const_iterator b
= GetIterForNthChar(pos
);
231 *implPos
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin();
236 else // have valid length too
238 // we need to handle the case of length specifying a substring
239 // going beyond the end of the string, just as std::string does
240 const const_iterator
e(end());
242 while ( len
&& i
<= e
)
248 *implLen
= i
.impl() - b
.impl();
253 #endif // wxUSE_UNICODE_UTF8
255 // ----------------------------------------------------------------------------
256 // wxCStrData converted strings caching
257 // ----------------------------------------------------------------------------
259 // FIXME-UTF8: temporarily disabled because it doesn't work with global
260 // string objects; re-enable after fixing this bug and benchmarking
261 // performance to see if using a hash is a good idea at all
264 // For backward compatibility reasons, it must be possible to assign the value
265 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
266 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
267 // because the memory would be freed immediately, but it has to be valid as long
268 // as the string is not modified, so that code like this still works:
270 // const wxChar *s = str.c_str();
271 // while ( s ) { ... }
273 // FIXME-UTF8: not thread safe!
274 // FIXME-UTF8: we currently clear the cached conversion only when the string is
275 // destroyed, but we should do it when the string is modified, to
276 // keep memory usage down
277 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
278 // invalidated the cache on every change, we could keep the previous
280 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
281 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
284 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
286 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
287 if ( i
!= hash
.end() )
295 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
296 // so we have to use wxString* here and const-cast when used
297 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
298 wxStringCharConversionCache
);
299 static wxStringCharConversionCache gs_stringsCharCache
;
301 const char* wxCStrData::AsChar() const
303 // remove previously cache value, if any (see FIXMEs above):
304 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
306 // convert the string and keep it:
307 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
308 m_str
->mb_str().release();
312 #endif // wxUSE_UNICODE
314 #if !wxUSE_UNICODE_WCHAR
315 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
316 wxStringWCharConversionCache
);
317 static wxStringWCharConversionCache gs_stringsWCharCache
;
319 const wchar_t* wxCStrData::AsWChar() const
321 // remove previously cache value, if any (see FIXMEs above):
322 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
324 // convert the string and keep it:
325 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
326 m_str
->wc_str().release();
330 #endif // !wxUSE_UNICODE_WCHAR
332 wxString::~wxString()
335 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
336 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
338 #if !wxUSE_UNICODE_WCHAR
339 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
344 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
345 const char* wxCStrData::AsChar() const
347 #if wxUSE_UNICODE_UTF8
348 if ( wxLocaleIsUtf8
)
351 // under non-UTF8 locales, we have to convert the internal UTF-8
352 // representation using wxConvLibc and cache the result
354 wxString
*str
= wxConstCast(m_str
, wxString
);
356 // convert the string:
358 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
359 // have it) but it's unfortunately not obvious to implement
360 // because we don't know how big buffer do we need for the
361 // given string length (in case of multibyte encodings, e.g.
362 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
364 // One idea would be to store more than just m_convertedToChar
365 // in wxString: then we could record the length of the string
366 // which was converted the last time and try to reuse the same
367 // buffer if the current length is not greater than it (this
368 // could still fail because string could have been modified in
369 // place but it would work most of the time, so we'd do it and
370 // only allocate the new buffer if in-place conversion returned
371 // an error). We could also store a bit saying if the string
372 // was modified since the last conversion (and update it in all
373 // operation modifying the string, of course) to avoid unneeded
374 // consequential conversions. But both of these ideas require
375 // adding more fields to wxString and require profiling results
376 // to be sure that we really gain enough from them to justify
378 wxCharBuffer
buf(str
->mb_str());
380 // if it failed, return empty string and not NULL to avoid crashes in code
381 // written with either wxWidgets 2 wxString or std::string behaviour in
382 // mind: neither of them ever returns NULL and so we shouldn't neither
386 if ( str
->m_convertedToChar
&&
387 strlen(buf
) == strlen(str
->m_convertedToChar
) )
389 // keep the same buffer for as long as possible, so that several calls
390 // to c_str() in a row still work:
391 strcpy(str
->m_convertedToChar
, buf
);
395 str
->m_convertedToChar
= buf
.release();
399 return str
->m_convertedToChar
+ m_offset
;
401 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
403 #if !wxUSE_UNICODE_WCHAR
404 const wchar_t* wxCStrData::AsWChar() const
406 wxString
*str
= wxConstCast(m_str
, wxString
);
408 // convert the string:
409 wxWCharBuffer
buf(str
->wc_str());
411 // notice that here, unlike above in AsChar(), conversion can't fail as our
412 // internal UTF-8 is always well-formed -- or the string was corrupted and
413 // all bets are off anyhow
415 // FIXME-UTF8: do the conversion in-place in the existing buffer
416 if ( str
->m_convertedToWChar
&&
417 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
419 // keep the same buffer for as long as possible, so that several calls
420 // to c_str() in a row still work:
421 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
425 str
->m_convertedToWChar
= buf
.release();
429 return str
->m_convertedToWChar
+ m_offset
;
431 #endif // !wxUSE_UNICODE_WCHAR
433 // ===========================================================================
434 // wxString class core
435 // ===========================================================================
437 // ---------------------------------------------------------------------------
438 // construction and conversion
439 // ---------------------------------------------------------------------------
441 #if wxUSE_UNICODE_WCHAR
443 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
444 const wxMBConv
& conv
)
447 if ( !psz
|| nLength
== 0 )
448 return SubstrBufFromMB(L
"", 0);
450 if ( nLength
== npos
)
454 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
456 return SubstrBufFromMB(_T(""), 0);
458 return SubstrBufFromMB(wcBuf
, wcLen
);
460 #endif // wxUSE_UNICODE_WCHAR
462 #if wxUSE_UNICODE_UTF8
464 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
465 const wxMBConv
& conv
)
468 if ( !psz
|| nLength
== 0 )
469 return SubstrBufFromMB("", 0);
471 // if psz is already in UTF-8, we don't have to do the roundtrip to
472 // wchar_t* and back:
475 // we need to validate the input because UTF8 iterators assume valid
476 // UTF-8 sequence and psz may be invalid:
477 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
479 // we must pass the real string length to SubstrBufFromMB ctor
480 if ( nLength
== npos
)
481 nLength
= psz
? strlen(psz
) : 0;
482 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz
), nLength
);
484 // else: do the roundtrip through wchar_t*
487 if ( nLength
== npos
)
490 // first convert to wide string:
492 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
494 return SubstrBufFromMB("", 0);
496 // and then to UTF-8:
497 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
498 // widechar -> UTF-8 conversion isn't supposed to ever fail:
499 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
503 #endif // wxUSE_UNICODE_UTF8
505 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
507 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
508 const wxMBConv
& conv
)
511 if ( !pwz
|| nLength
== 0 )
512 return SubstrBufFromWC("", 0);
514 if ( nLength
== npos
)
518 wxCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
520 return SubstrBufFromWC("", 0);
522 return SubstrBufFromWC(mbBuf
, mbLen
);
524 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
527 #if wxUSE_UNICODE_WCHAR
529 //Convert wxString in Unicode mode to a multi-byte string
530 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
532 return conv
.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL
);
535 #elif wxUSE_UNICODE_UTF8
537 const wxWCharBuffer
wxString::wc_str() const
539 return wxMBConvStrictUTF8().cMB2WC
542 m_impl
.length() + 1, // size, not length
547 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
550 return wxCharBuffer::CreateNonOwned(m_impl
.c_str());
552 // FIXME-UTF8: use wc_str() here once we have buffers with length
555 wxWCharBuffer
wcBuf(wxMBConvStrictUTF8().cMB2WC
558 m_impl
.length() + 1, // size
562 return wxCharBuffer("");
564 return conv
.cWC2MB(wcBuf
, wcLen
+1, NULL
);
569 //Converts this string to a wide character string if unicode
570 //mode is not enabled and wxUSE_WCHAR_T is enabled
571 const wxWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
573 return conv
.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL
);
576 #endif // Unicode/ANSI
578 // shrink to minimal size (releasing extra memory)
579 bool wxString::Shrink()
581 wxString
tmp(begin(), end());
583 return tmp
.length() == length();
586 // deprecated compatibility code:
587 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
588 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
590 return DoGetWriteBuf(nLen
);
593 void wxString::UngetWriteBuf()
598 void wxString::UngetWriteBuf(size_t nLen
)
600 DoUngetWriteBuf(nLen
);
602 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
605 // ---------------------------------------------------------------------------
607 // ---------------------------------------------------------------------------
609 // all functions are inline in string.h
611 // ---------------------------------------------------------------------------
612 // concatenation operators
613 // ---------------------------------------------------------------------------
616 * concatenation functions come in 5 flavours:
618 * char + string and string + char
619 * C str + string and string + C str
622 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
624 #if !wxUSE_STL_BASED_WXSTRING
625 wxASSERT( str1
.IsValid() );
626 wxASSERT( str2
.IsValid() );
635 wxString
operator+(const wxString
& str
, wxUniChar ch
)
637 #if !wxUSE_STL_BASED_WXSTRING
638 wxASSERT( str
.IsValid() );
647 wxString
operator+(wxUniChar ch
, const wxString
& str
)
649 #if !wxUSE_STL_BASED_WXSTRING
650 wxASSERT( str
.IsValid() );
659 wxString
operator+(const wxString
& str
, const char *psz
)
661 #if !wxUSE_STL_BASED_WXSTRING
662 wxASSERT( str
.IsValid() );
666 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
667 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
675 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
677 #if !wxUSE_STL_BASED_WXSTRING
678 wxASSERT( str
.IsValid() );
682 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
683 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
691 wxString
operator+(const char *psz
, const wxString
& str
)
693 #if !wxUSE_STL_BASED_WXSTRING
694 wxASSERT( str
.IsValid() );
698 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
699 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
707 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
709 #if !wxUSE_STL_BASED_WXSTRING
710 wxASSERT( str
.IsValid() );
714 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
715 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
723 // ---------------------------------------------------------------------------
725 // ---------------------------------------------------------------------------
727 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
729 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
730 : wxToupper(GetChar(0u)) == wxToupper(c
));
733 #ifdef HAVE_STD_STRING_COMPARE
735 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
736 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
737 // sort strings in characters code point order by sorting the byte sequence
738 // in byte values order (i.e. what strcmp() and memcmp() do).
740 int wxString::compare(const wxString
& str
) const
742 return m_impl
.compare(str
.m_impl
);
745 int wxString::compare(size_t nStart
, size_t nLen
,
746 const wxString
& str
) const
749 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
750 return m_impl
.compare(pos
, len
, str
.m_impl
);
753 int wxString::compare(size_t nStart
, size_t nLen
,
755 size_t nStart2
, size_t nLen2
) const
758 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
761 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
763 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
766 int wxString::compare(const char* sz
) const
768 return m_impl
.compare(ImplStr(sz
));
771 int wxString::compare(const wchar_t* sz
) const
773 return m_impl
.compare(ImplStr(sz
));
776 int wxString::compare(size_t nStart
, size_t nLen
,
777 const char* sz
, size_t nCount
) const
780 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
782 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
784 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
787 int wxString::compare(size_t nStart
, size_t nLen
,
788 const wchar_t* sz
, size_t nCount
) const
791 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
793 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
795 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
798 #else // !HAVE_STD_STRING_COMPARE
800 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
801 const wxStringCharType
* s2
, size_t l2
)
804 return wxStringMemcmp(s1
, s2
, l1
);
807 int ret
= wxStringMemcmp(s1
, s2
, l1
);
808 return ret
== 0 ? -1 : ret
;
812 int ret
= wxStringMemcmp(s1
, s2
, l2
);
813 return ret
== 0 ? +1 : ret
;
817 int wxString::compare(const wxString
& str
) const
819 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
820 str
.m_impl
.data(), str
.m_impl
.length());
823 int wxString::compare(size_t nStart
, size_t nLen
,
824 const wxString
& str
) const
826 wxASSERT(nStart
<= length());
827 size_type strLen
= length() - nStart
;
828 nLen
= strLen
< nLen
? strLen
: nLen
;
831 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
833 return ::wxDoCmp(m_impl
.data() + pos
, len
,
834 str
.m_impl
.data(), str
.m_impl
.length());
837 int wxString::compare(size_t nStart
, size_t nLen
,
839 size_t nStart2
, size_t nLen2
) const
841 wxASSERT(nStart
<= length());
842 wxASSERT(nStart2
<= str
.length());
843 size_type strLen
= length() - nStart
,
844 strLen2
= str
.length() - nStart2
;
845 nLen
= strLen
< nLen
? strLen
: nLen
;
846 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
849 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
851 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
853 return ::wxDoCmp(m_impl
.data() + pos
, len
,
854 str
.m_impl
.data() + pos2
, len2
);
857 int wxString::compare(const char* sz
) const
859 SubstrBufFromMB
str(ImplStr(sz
, npos
));
860 if ( str
.len
== npos
)
861 str
.len
= wxStringStrlen(str
.data
);
862 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
865 int wxString::compare(const wchar_t* sz
) const
867 SubstrBufFromWC
str(ImplStr(sz
, npos
));
868 if ( str
.len
== npos
)
869 str
.len
= wxStringStrlen(str
.data
);
870 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
873 int wxString::compare(size_t nStart
, size_t nLen
,
874 const char* sz
, size_t nCount
) const
876 wxASSERT(nStart
<= length());
877 size_type strLen
= length() - nStart
;
878 nLen
= strLen
< nLen
? strLen
: nLen
;
881 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
883 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
884 if ( str
.len
== npos
)
885 str
.len
= wxStringStrlen(str
.data
);
887 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
890 int wxString::compare(size_t nStart
, size_t nLen
,
891 const wchar_t* sz
, size_t nCount
) const
893 wxASSERT(nStart
<= length());
894 size_type strLen
= length() - nStart
;
895 nLen
= strLen
< nLen
? strLen
: nLen
;
898 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
900 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
901 if ( str
.len
== npos
)
902 str
.len
= wxStringStrlen(str
.data
);
904 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
907 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
910 // ---------------------------------------------------------------------------
911 // find_{first,last}_[not]_of functions
912 // ---------------------------------------------------------------------------
914 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
916 // NB: All these functions are implemented with the argument being wxChar*,
917 // i.e. widechar string in any Unicode build, even though native string
918 // representation is char* in the UTF-8 build. This is because we couldn't
919 // use memchr() to determine if a character is in a set encoded as UTF-8.
921 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
923 return find_first_of(sz
, nStart
, wxStrlen(sz
));
926 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
928 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
931 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
933 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
936 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
938 if ( wxTmemchr(sz
, *i
, n
) )
945 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
947 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
950 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
952 if ( !wxTmemchr(sz
, *i
, n
) )
960 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
962 return find_last_of(sz
, nStart
, wxStrlen(sz
));
965 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
967 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
970 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
972 size_t len
= length();
974 if ( nStart
== npos
)
980 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
984 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
985 i
!= rend(); --idx
, ++i
)
987 if ( wxTmemchr(sz
, *i
, n
) )
994 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
996 size_t len
= length();
998 if ( nStart
== npos
)
1004 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1007 size_t idx
= nStart
;
1008 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1009 i
!= rend(); --idx
, ++i
)
1011 if ( !wxTmemchr(sz
, *i
, n
) )
1018 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
1020 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
1022 size_t idx
= nStart
;
1023 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1032 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1034 size_t len
= length();
1036 if ( nStart
== npos
)
1042 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1045 size_t idx
= nStart
;
1046 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1047 i
!= rend(); --idx
, ++i
)
1056 // the functions above were implemented for wchar_t* arguments in Unicode
1057 // build and char* in ANSI build; below are implementations for the other
1060 #define wxOtherCharType char
1061 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1063 #define wxOtherCharType wchar_t
1064 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1067 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1068 { return find_first_of(STRCONV(sz
), nStart
); }
1070 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1072 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1073 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1074 { return find_last_of(STRCONV(sz
), nStart
); }
1075 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1077 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1078 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1079 { return find_first_not_of(STRCONV(sz
), nStart
); }
1080 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1082 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1083 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1084 { return find_last_not_of(STRCONV(sz
), nStart
); }
1085 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1087 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1089 #undef wxOtherCharType
1092 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1094 // ===========================================================================
1095 // other common string functions
1096 // ===========================================================================
1098 int wxString::CmpNoCase(const wxString
& s
) const
1100 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1102 const_iterator i1
= begin();
1103 const_iterator end1
= end();
1104 const_iterator i2
= s
.begin();
1105 const_iterator end2
= s
.end();
1107 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1109 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1110 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1111 if ( lower1
!= lower2
)
1112 return lower1
< lower2
? -1 : 1;
1115 size_t len1
= length();
1116 size_t len2
= s
.length();
1120 else if ( len1
> len2
)
1129 #ifndef __SCHAR_MAX__
1130 #define __SCHAR_MAX__ 127
1134 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1136 if (!ascii
|| len
== 0)
1137 return wxEmptyString
;
1142 wxStringInternalBuffer
buf(res
, len
);
1143 wxStringCharType
*dest
= buf
;
1145 for ( ; len
> 0; --len
)
1147 unsigned char c
= (unsigned char)*ascii
++;
1148 wxASSERT_MSG( c
< 0x80,
1149 _T("Non-ASCII value passed to FromAscii().") );
1151 *dest
++ = (wchar_t)c
;
1158 wxString
wxString::FromAscii(const char *ascii
)
1160 return FromAscii(ascii
, wxStrlen(ascii
));
1163 wxString
wxString::FromAscii(char ascii
)
1165 // What do we do with '\0' ?
1167 unsigned char c
= (unsigned char)ascii
;
1169 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
1171 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1172 return wxString(wxUniChar((wchar_t)c
));
1175 const wxCharBuffer
wxString::ToAscii() const
1177 // this will allocate enough space for the terminating NUL too
1178 wxCharBuffer
buffer(length());
1179 char *dest
= buffer
.data();
1181 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1184 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1185 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1187 // the output string can't have embedded NULs anyhow, so we can safely
1188 // stop at first of them even if we do have any
1196 #endif // wxUSE_UNICODE
1198 // extract string of length nCount starting at nFirst
1199 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1201 size_t nLen
= length();
1203 // default value of nCount is npos and means "till the end"
1204 if ( nCount
== npos
)
1206 nCount
= nLen
- nFirst
;
1209 // out-of-bounds requests return sensible things
1210 if ( nFirst
+ nCount
> nLen
)
1212 nCount
= nLen
- nFirst
;
1215 if ( nFirst
> nLen
)
1217 // AllocCopy() will return empty string
1218 return wxEmptyString
;
1221 wxString
dest(*this, nFirst
, nCount
);
1222 if ( dest
.length() != nCount
)
1224 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1230 // check that the string starts with prefix and return the rest of the string
1231 // in the provided pointer if it is not NULL, otherwise return false
1232 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1234 if ( compare(0, prefix
.length(), prefix
) != 0 )
1239 // put the rest of the string into provided pointer
1240 rest
->assign(*this, prefix
.length(), npos
);
1247 // check that the string ends with suffix and return the rest of it in the
1248 // provided pointer if it is not NULL, otherwise return false
1249 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1251 int start
= length() - suffix
.length();
1253 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1258 // put the rest of the string into provided pointer
1259 rest
->assign(*this, 0, start
);
1266 // extract nCount last (rightmost) characters
1267 wxString
wxString::Right(size_t nCount
) const
1269 if ( nCount
> length() )
1272 wxString
dest(*this, length() - nCount
, nCount
);
1273 if ( dest
.length() != nCount
) {
1274 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1279 // get all characters after the last occurence of ch
1280 // (returns the whole string if ch not found)
1281 wxString
wxString::AfterLast(wxUniChar ch
) const
1284 int iPos
= Find(ch
, true);
1285 if ( iPos
== wxNOT_FOUND
)
1288 str
= wx_str() + iPos
+ 1;
1293 // extract nCount first (leftmost) characters
1294 wxString
wxString::Left(size_t nCount
) const
1296 if ( nCount
> length() )
1299 wxString
dest(*this, 0, nCount
);
1300 if ( dest
.length() != nCount
) {
1301 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1306 // get all characters before the first occurence of ch
1307 // (returns the whole string if ch not found)
1308 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1310 int iPos
= Find(ch
);
1311 if ( iPos
== wxNOT_FOUND
) iPos
= length();
1312 return wxString(*this, 0, iPos
);
1315 /// get all characters before the last occurence of ch
1316 /// (returns empty string if ch not found)
1317 wxString
wxString::BeforeLast(wxUniChar ch
) const
1320 int iPos
= Find(ch
, true);
1321 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1322 str
= wxString(c_str(), iPos
);
1327 /// get all characters after the first occurence of ch
1328 /// (returns empty string if ch not found)
1329 wxString
wxString::AfterFirst(wxUniChar ch
) const
1332 int iPos
= Find(ch
);
1333 if ( iPos
!= wxNOT_FOUND
)
1334 str
= wx_str() + iPos
+ 1;
1339 // replace first (or all) occurences of some substring with another one
1340 size_t wxString::Replace(const wxString
& strOld
,
1341 const wxString
& strNew
, bool bReplaceAll
)
1343 // if we tried to replace an empty string we'd enter an infinite loop below
1344 wxCHECK_MSG( !strOld
.empty(), 0,
1345 _T("wxString::Replace(): invalid parameter") );
1347 wxSTRING_INVALIDATE_CACHE();
1349 size_t uiCount
= 0; // count of replacements made
1351 // optimize the special common case: replacement of one character by
1352 // another one (in UTF-8 case we can only do this for ASCII characters)
1354 // benchmarks show that this special version is around 3 times faster
1355 // (depending on the proportion of matching characters and UTF-8/wchar_t
1357 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1359 const wxStringCharType chOld
= strOld
.m_impl
[0],
1360 chNew
= strNew
.m_impl
[0];
1362 // this loop is the simplified version of the one below
1363 for ( size_t pos
= 0; ; )
1365 pos
= m_impl
.find(chOld
, pos
);
1369 m_impl
[pos
++] = chNew
;
1377 else // general case
1379 const size_t uiOldLen
= strOld
.m_impl
.length();
1380 const size_t uiNewLen
= strNew
.m_impl
.length();
1382 for ( size_t pos
= 0; ; )
1384 pos
= m_impl
.find(strOld
.m_impl
, pos
);
1388 // replace this occurrence of the old string with the new one
1389 m_impl
.replace(pos
, uiOldLen
, strNew
.m_impl
);
1391 // move up pos past the string that was replaced
1394 // increase replace count
1397 // stop after the first one?
1406 bool wxString::IsAscii() const
1408 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1410 if ( !(*i
).IsAscii() )
1417 bool wxString::IsWord() const
1419 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1421 if ( !wxIsalpha(*i
) )
1428 bool wxString::IsNumber() const
1433 const_iterator i
= begin();
1435 if ( *i
== _T('-') || *i
== _T('+') )
1438 for ( ; i
!= end(); ++i
)
1440 if ( !wxIsdigit(*i
) )
1447 wxString
wxString::Strip(stripType w
) const
1450 if ( w
& leading
) s
.Trim(false);
1451 if ( w
& trailing
) s
.Trim(true);
1455 // ---------------------------------------------------------------------------
1457 // ---------------------------------------------------------------------------
1459 wxString
& wxString::MakeUpper()
1461 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1462 *it
= (wxChar
)wxToupper(*it
);
1467 wxString
& wxString::MakeLower()
1469 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1470 *it
= (wxChar
)wxTolower(*it
);
1475 wxString
& wxString::MakeCapitalized()
1477 const iterator en
= end();
1478 iterator it
= begin();
1481 *it
= (wxChar
)wxToupper(*it
);
1482 for ( ++it
; it
!= en
; ++it
)
1483 *it
= (wxChar
)wxTolower(*it
);
1489 // ---------------------------------------------------------------------------
1490 // trimming and padding
1491 // ---------------------------------------------------------------------------
1493 // some compilers (VC++ 6.0 not to name them) return true for a call to
1494 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1495 // to live with this by checking that the character is a 7 bit one - even if
1496 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1497 // space-like symbols somewhere except in the first 128 chars), it is arguably
1498 // still better than trimming away accented letters
1499 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1501 // trims spaces (in the sense of isspace) from left or right side
1502 wxString
& wxString::Trim(bool bFromRight
)
1504 // first check if we're going to modify the string at all
1507 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1508 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1514 // find last non-space character
1515 reverse_iterator psz
= rbegin();
1516 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1519 // truncate at trailing space start
1520 erase(psz
.base(), end());
1524 // find first non-space character
1525 iterator psz
= begin();
1526 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1529 // fix up data and length
1530 erase(begin(), psz
);
1537 // adds nCount characters chPad to the string from either side
1538 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1540 wxString
s(chPad
, nCount
);
1553 // truncate the string
1554 wxString
& wxString::Truncate(size_t uiLen
)
1556 if ( uiLen
< length() )
1558 erase(begin() + uiLen
, end());
1560 //else: nothing to do, string is already short enough
1565 // ---------------------------------------------------------------------------
1566 // finding (return wxNOT_FOUND if not found and index otherwise)
1567 // ---------------------------------------------------------------------------
1570 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1572 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1574 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1577 // ----------------------------------------------------------------------------
1578 // conversion to numbers
1579 // ----------------------------------------------------------------------------
1581 // The implementation of all the functions below is exactly the same so factor
1582 // it out. Note that number extraction works correctly on UTF-8 strings, so
1583 // we can use wxStringCharType and wx_str() for maximum efficiency.
1586 #define DO_IF_NOT_WINCE(x) x
1588 #define DO_IF_NOT_WINCE(x)
1591 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1592 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1593 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1595 DO_IF_NOT_WINCE( errno = 0; ) \
1597 const wxStringCharType *start = wx_str(); \
1598 wxStringCharType *end; \
1599 T val = func(start, &end, base); \
1601 /* return true only if scan was stopped by the terminating NUL and */ \
1602 /* if the string was not empty to start with and no under/overflow */ \
1604 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1609 bool wxString::ToLong(long *pVal
, int base
) const
1611 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtol
, long);
1614 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1616 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoul
, unsigned long);
1619 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1621 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoll
, wxLongLong_t
);
1624 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1626 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoull
, wxULongLong_t
);
1629 bool wxString::ToDouble(double *pVal
) const
1631 wxCHECK_MSG( pVal
, false, _T("NULL output pointer") );
1633 DO_IF_NOT_WINCE( errno
= 0; )
1635 const wxChar
*start
= c_str();
1637 double val
= wxStrtod(start
, &end
);
1639 // return true only if scan was stopped by the terminating NUL and if the
1640 // string was not empty to start with and no under/overflow occurred
1641 if ( *end
|| end
== start
DO_IF_NOT_WINCE(|| errno
== ERANGE
) )
1649 // ---------------------------------------------------------------------------
1651 // ---------------------------------------------------------------------------
1653 #if !wxUSE_UTF8_LOCALE_ONLY
1655 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1656 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1658 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1662 va_start(argptr
, format
);
1665 s
.PrintfV(format
, argptr
);
1671 #endif // !wxUSE_UTF8_LOCALE_ONLY
1673 #if wxUSE_UNICODE_UTF8
1675 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1678 va_start(argptr
, format
);
1681 s
.PrintfV(format
, argptr
);
1687 #endif // wxUSE_UNICODE_UTF8
1690 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1693 s
.PrintfV(format
, argptr
);
1697 #if !wxUSE_UTF8_LOCALE_ONLY
1698 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1699 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1701 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1705 va_start(argptr
, format
);
1707 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1708 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1709 // because it's the only cast that works safely for downcasting when
1710 // multiple inheritance is used:
1711 wxString
*str
= static_cast<wxString
*>(this);
1713 wxString
*str
= this;
1716 int iLen
= str
->PrintfV(format
, argptr
);
1722 #endif // !wxUSE_UTF8_LOCALE_ONLY
1724 #if wxUSE_UNICODE_UTF8
1725 int wxString::DoPrintfUtf8(const char *format
, ...)
1728 va_start(argptr
, format
);
1730 int iLen
= PrintfV(format
, argptr
);
1736 #endif // wxUSE_UNICODE_UTF8
1739 Uses wxVsnprintf and places the result into the this string.
1741 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1742 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1743 the ISO C99 (and thus SUSv3) standard the return value for the case of
1744 an undersized buffer is inconsistent. For conforming vsnprintf
1745 implementations the function must return the number of characters that
1746 would have been printed had the buffer been large enough. For conforming
1747 vswprintf implementations the function must return a negative number
1750 What vswprintf sets errno to is undefined but Darwin seems to set it to
1751 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1752 those are defined in the standard and backed up by several conformance
1753 statements. Note that ENOMEM mentioned in the manual page does not
1754 apply to swprintf, only wprintf and fwprintf.
1756 Official manual page:
1757 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1759 Some conformance statements (AIX, Solaris):
1760 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1761 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1763 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1764 EILSEQ and EINVAL are specifically defined to mean the error is other than
1765 an undersized buffer and no other errno are defined we treat those two
1766 as meaning hard errors and everything else gets the old behavior which
1767 is to keep looping and increasing buffer size until the function succeeds.
1769 In practice it's impossible to determine before compilation which behavior
1770 may be used. The vswprintf function may have vsnprintf-like behavior or
1771 vice-versa. Behavior detected on one release can theoretically change
1772 with an updated release. Not to mention that configure testing for it
1773 would require the test to be run on the host system, not the build system
1774 which makes cross compilation difficult. Therefore, we make no assumptions
1775 about behavior and try our best to handle every known case, including the
1776 case where wxVsnprintf returns a negative number and fails to set errno.
1778 There is yet one more non-standard implementation and that is our own.
1779 Fortunately, that can be detected at compile-time.
1781 On top of all that, ISO C99 explicitly defines snprintf to write a null
1782 character to the last position of the specified buffer. That would be at
1783 at the given buffer size minus 1. It is supposed to do this even if it
1784 turns out that the buffer is sized too small.
1786 Darwin (tested on 10.5) follows the C99 behavior exactly.
1788 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1789 errno even when it fails. However, it only seems to ever fail due
1790 to an undersized buffer.
1792 #if wxUSE_UNICODE_UTF8
1793 template<typename BufferType
>
1795 // we only need one version in non-UTF8 builds and at least two Windows
1796 // compilers have problems with this function template, so use just one
1797 // normal function here
1799 static int DoStringPrintfV(wxString
& str
,
1800 const wxString
& format
, va_list argptr
)
1806 #if wxUSE_UNICODE_UTF8
1807 BufferType
tmp(str
, size
+ 1);
1808 typename
BufferType::CharType
*buf
= tmp
;
1810 wxStringBuffer
tmp(str
, size
+ 1);
1818 // in UTF-8 build, leaving uninitialized junk in the buffer
1819 // could result in invalid non-empty UTF-8 string, so just
1820 // reset the string to empty on failure:
1825 // wxVsnprintf() may modify the original arg pointer, so pass it
1828 wxVaCopy(argptrcopy
, argptr
);
1831 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1834 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1837 // some implementations of vsnprintf() don't NUL terminate
1838 // the string if there is not enough space for it so
1839 // always do it manually
1840 // FIXME: This really seems to be the wrong and would be an off-by-one
1841 // bug except the code above allocates an extra character.
1842 buf
[size
] = _T('\0');
1844 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1845 // total number of characters which would have been written if the
1846 // buffer were large enough (newer standards such as Unix98)
1849 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1850 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1851 // is true if *both* of them use our own implementation,
1852 // otherwise we can't be sure
1853 #if wxUSE_WXVSNPRINTF
1854 // we know that our own implementation of wxVsnprintf() returns -1
1855 // only for a format error - thus there's something wrong with
1856 // the user's format string
1859 #else // possibly using system version
1860 // assume it only returns error if there is not enough space, but
1861 // as we don't know how much we need, double the current size of
1864 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1865 // If errno was set to one of the two well-known hard errors
1866 // then fail immediately to avoid an infinite loop.
1869 #endif // __WXWINCE__
1870 // still not enough, as we don't know how much we need, double the
1871 // current size of the buffer
1873 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1875 else if ( len
>= size
)
1877 #if wxUSE_WXVSNPRINTF
1878 // we know that our own implementation of wxVsnprintf() returns
1879 // size+1 when there's not enough space but that's not the size
1880 // of the required buffer!
1881 size
*= 2; // so we just double the current size of the buffer
1883 // some vsnprintf() implementations NUL-terminate the buffer and
1884 // some don't in len == size case, to be safe always add 1
1885 // FIXME: I don't quite understand this comment. The vsnprintf
1886 // function is specifically defined to return the number of
1887 // characters printed not including the null terminator.
1888 // So OF COURSE you need to add 1 to get the right buffer size.
1889 // The following line is definitely correct, no question.
1893 else // ok, there was enough space
1899 // we could have overshot
1902 return str
.length();
1905 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
1907 #if wxUSE_UNICODE_UTF8
1908 #if wxUSE_STL_BASED_WXSTRING
1909 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
1911 typedef wxStringInternalBuffer Utf8Buffer
;
1915 #if wxUSE_UTF8_LOCALE_ONLY
1916 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1918 #if wxUSE_UNICODE_UTF8
1919 if ( wxLocaleIsUtf8
)
1920 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1923 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
1925 return DoStringPrintfV(*this, format
, argptr
);
1926 #endif // UTF8/WCHAR
1930 // ----------------------------------------------------------------------------
1931 // misc other operations
1932 // ----------------------------------------------------------------------------
1934 // returns true if the string matches the pattern which may contain '*' and
1935 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1937 bool wxString::Matches(const wxString
& mask
) const
1939 // I disable this code as it doesn't seem to be faster (in fact, it seems
1940 // to be much slower) than the old, hand-written code below and using it
1941 // here requires always linking with libregex even if the user code doesn't
1943 #if 0 // wxUSE_REGEX
1944 // first translate the shell-like mask into a regex
1946 pattern
.reserve(wxStrlen(pszMask
));
1958 pattern
+= _T(".*");
1969 // these characters are special in a RE, quote them
1970 // (however note that we don't quote '[' and ']' to allow
1971 // using them for Unix shell like matching)
1972 pattern
+= _T('\\');
1976 pattern
+= *pszMask
;
1984 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
1985 #else // !wxUSE_REGEX
1986 // TODO: this is, of course, awfully inefficient...
1988 // FIXME-UTF8: implement using iterators, remove #if
1989 #if wxUSE_UNICODE_UTF8
1990 wxWCharBuffer maskBuf
= mask
.wc_str();
1991 wxWCharBuffer txtBuf
= wc_str();
1992 const wxChar
*pszMask
= maskBuf
.data();
1993 const wxChar
*pszTxt
= txtBuf
.data();
1995 const wxChar
*pszMask
= mask
.wx_str();
1996 // the char currently being checked
1997 const wxChar
*pszTxt
= wx_str();
2000 // the last location where '*' matched
2001 const wxChar
*pszLastStarInText
= NULL
;
2002 const wxChar
*pszLastStarInMask
= NULL
;
2005 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
2006 switch ( *pszMask
) {
2008 if ( *pszTxt
== wxT('\0') )
2011 // pszTxt and pszMask will be incremented in the loop statement
2017 // remember where we started to be able to backtrack later
2018 pszLastStarInText
= pszTxt
;
2019 pszLastStarInMask
= pszMask
;
2021 // ignore special chars immediately following this one
2022 // (should this be an error?)
2023 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2026 // if there is nothing more, match
2027 if ( *pszMask
== wxT('\0') )
2030 // are there any other metacharacters in the mask?
2032 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2034 if ( pEndMask
!= NULL
) {
2035 // we have to match the string between two metachars
2036 uiLenMask
= pEndMask
- pszMask
;
2039 // we have to match the remainder of the string
2040 uiLenMask
= wxStrlen(pszMask
);
2043 wxString
strToMatch(pszMask
, uiLenMask
);
2044 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2045 if ( pMatch
== NULL
)
2048 // -1 to compensate "++" in the loop
2049 pszTxt
= pMatch
+ uiLenMask
- 1;
2050 pszMask
+= uiLenMask
- 1;
2055 if ( *pszMask
!= *pszTxt
)
2061 // match only if nothing left
2062 if ( *pszTxt
== wxT('\0') )
2065 // if we failed to match, backtrack if we can
2066 if ( pszLastStarInText
) {
2067 pszTxt
= pszLastStarInText
+ 1;
2068 pszMask
= pszLastStarInMask
;
2070 pszLastStarInText
= NULL
;
2072 // don't bother resetting pszLastStarInMask, it's unnecessary
2078 #endif // wxUSE_REGEX/!wxUSE_REGEX
2081 // Count the number of chars
2082 int wxString::Freq(wxUniChar ch
) const
2085 for ( const_iterator i
= begin(); i
!= end(); ++i
)
2093 // ----------------------------------------------------------------------------
2094 // wxUTF8StringBuffer
2095 // ----------------------------------------------------------------------------
2097 #if wxUSE_UNICODE_WCHAR
2098 wxUTF8StringBuffer::~wxUTF8StringBuffer()
2100 wxMBConvStrictUTF8 conv
;
2101 size_t wlen
= conv
.ToWChar(NULL
, 0, m_buf
);
2102 wxCHECK_RET( wlen
!= wxCONV_FAILED
, "invalid UTF-8 data in string buffer?" );
2104 wxStringInternalBuffer
wbuf(m_str
, wlen
);
2105 conv
.ToWChar(wbuf
, wlen
, m_buf
);
2108 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2110 wxCHECK_RET(m_lenSet
, "length not set");
2112 wxMBConvStrictUTF8 conv
;
2113 size_t wlen
= conv
.ToWChar(NULL
, 0, m_buf
, m_len
);
2114 wxCHECK_RET( wlen
!= wxCONV_FAILED
, "invalid UTF-8 data in string buffer?" );
2116 wxStringInternalBufferLength
wbuf(m_str
, wlen
);
2117 conv
.ToWChar(wbuf
, wlen
, m_buf
, m_len
);
2118 wbuf
.SetLength(wlen
);
2120 #endif // wxUSE_UNICODE_WCHAR
2122 // ----------------------------------------------------------------------------
2123 // wxCharBufferType<T>
2124 // ----------------------------------------------------------------------------
2126 #ifndef __VMS_BROKEN_TEMPLATES
2129 wxCharTypeBuffer
<char>::Data
2130 wxCharTypeBuffer
<char>::NullData(NULL
);
2132 #ifndef __VMS_BROKEN_TEMPLATES
2135 wxCharTypeBuffer
<wchar_t>::Data
2136 wxCharTypeBuffer
<wchar_t>::NullData(NULL
);