1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
38 #include "wx/hashmap.h"
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
54 // ---------------------------------------------------------------------------
55 // static class variables definition
56 // ---------------------------------------------------------------------------
58 //According to STL _must_ be a -1 size_t
59 const size_t wxString::npos
= (size_t) -1;
61 #if wxUSE_STRING_POS_CACHE
63 #ifdef wxHAS_COMPILER_TLS
65 wxTLS_TYPE(wxString::Cache
) wxString::ms_cache
;
67 #else // !wxHAS_COMPILER_TLS
69 struct wxStrCacheInitializer
71 wxStrCacheInitializer()
73 // calling this function triggers s_cache initialization in it, and
74 // from now on it becomes safe to call from multiple threads
80 wxString::Cache& wxString::GetCache()
82 static wxTLS_TYPE(Cache) s_cache;
84 return wxTLS_VALUE(s_cache);
88 static wxStrCacheInitializer gs_stringCacheInit
;
90 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
92 // gdb seems to be unable to display thread-local variables correctly, at least
93 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
96 struct wxStrCacheDumper
100 puts("*** wxString cache dump:");
101 for ( unsigned n
= 0; n
< wxString::Cache::SIZE
; n
++ )
103 const wxString::Cache::Element
&
104 c
= wxString::GetCacheBegin()[n
];
106 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
108 n
== wxString::LastUsedCacheElement() ? " [*]" : "",
110 (unsigned long)c
.pos
,
111 (unsigned long)c
.impl
,
117 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
119 #endif // __WXDEBUG__
121 #ifdef wxPROFILE_STRING_CACHE
123 wxString::CacheStats
wxString::ms_cacheStats
;
125 struct wxStrCacheStatsDumper
127 ~wxStrCacheStatsDumper()
129 const wxString::CacheStats
& stats
= wxString::ms_cacheStats
;
133 puts("*** wxString cache statistics:");
134 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
136 printf("\tHits %u (of which %u not used) or %.2f%%\n",
139 100.*float(stats
.poshits
- stats
.mishits
)/stats
.postot
);
140 printf("\tAverage position requested: %.2f\n",
141 float(stats
.sumpos
) / stats
.postot
);
142 printf("\tAverage offset after cached hint: %.2f\n",
143 float(stats
.sumofs
) / stats
.postot
);
148 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
149 stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
);
154 static wxStrCacheStatsDumper s_showCacheStats
;
156 #endif // wxPROFILE_STRING_CACHE
158 #endif // wxUSE_STRING_POS_CACHE
160 // ----------------------------------------------------------------------------
162 // ----------------------------------------------------------------------------
164 #if wxUSE_STD_IOSTREAM
168 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
170 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
171 const wxCharBuffer
buf(str
.AsCharBuf());
173 os
.clear(wxSTD
ios_base::failbit
);
179 return os
<< str
.AsInternal();
183 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
185 return os
<< str
.c_str();
188 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCharBuffer
& str
)
190 return os
<< str
.data();
194 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxWCharBuffer
& str
)
196 return os
<< str
.data();
200 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
202 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
204 return wos
<< str
.wc_str();
207 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
209 return wos
<< str
.AsWChar();
212 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxWCharBuffer
& str
)
214 return wos
<< str
.data();
217 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
219 #endif // wxUSE_STD_IOSTREAM
221 // ===========================================================================
222 // wxString class core
223 // ===========================================================================
225 #if wxUSE_UNICODE_UTF8
227 void wxString::PosLenToImpl(size_t pos
, size_t len
,
228 size_t *implPos
, size_t *implLen
) const
234 else // have valid start position
236 const const_iterator b
= GetIterForNthChar(pos
);
237 *implPos
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin();
242 else // have valid length too
244 // we need to handle the case of length specifying a substring
245 // going beyond the end of the string, just as std::string does
246 const const_iterator
e(end());
248 while ( len
&& i
<= e
)
254 *implLen
= i
.impl() - b
.impl();
259 #endif // wxUSE_UNICODE_UTF8
261 // ----------------------------------------------------------------------------
262 // wxCStrData converted strings caching
263 // ----------------------------------------------------------------------------
265 // FIXME-UTF8: temporarily disabled because it doesn't work with global
266 // string objects; re-enable after fixing this bug and benchmarking
267 // performance to see if using a hash is a good idea at all
270 // For backward compatibility reasons, it must be possible to assign the value
271 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
272 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
273 // because the memory would be freed immediately, but it has to be valid as long
274 // as the string is not modified, so that code like this still works:
276 // const wxChar *s = str.c_str();
277 // while ( s ) { ... }
279 // FIXME-UTF8: not thread safe!
280 // FIXME-UTF8: we currently clear the cached conversion only when the string is
281 // destroyed, but we should do it when the string is modified, to
282 // keep memory usage down
283 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
284 // invalidated the cache on every change, we could keep the previous
286 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
287 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
290 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
292 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
293 if ( i
!= hash
.end() )
301 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
302 // so we have to use wxString* here and const-cast when used
303 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
304 wxStringCharConversionCache
);
305 static wxStringCharConversionCache gs_stringsCharCache
;
307 const char* wxCStrData::AsChar() const
309 // remove previously cache value, if any (see FIXMEs above):
310 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
312 // convert the string and keep it:
313 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
314 m_str
->mb_str().release();
318 #endif // wxUSE_UNICODE
320 #if !wxUSE_UNICODE_WCHAR
321 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
322 wxStringWCharConversionCache
);
323 static wxStringWCharConversionCache gs_stringsWCharCache
;
325 const wchar_t* wxCStrData::AsWChar() const
327 // remove previously cache value, if any (see FIXMEs above):
328 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
330 // convert the string and keep it:
331 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
332 m_str
->wc_str().release();
336 #endif // !wxUSE_UNICODE_WCHAR
338 wxString::~wxString()
341 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
342 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
344 #if !wxUSE_UNICODE_WCHAR
345 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
350 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
351 const char* wxCStrData::AsChar() const
353 #if wxUSE_UNICODE_UTF8
354 if ( wxLocaleIsUtf8
)
357 // under non-UTF8 locales, we have to convert the internal UTF-8
358 // representation using wxConvLibc and cache the result
360 wxString
*str
= wxConstCast(m_str
, wxString
);
362 // convert the string:
364 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
365 // have it) but it's unfortunately not obvious to implement
366 // because we don't know how big buffer do we need for the
367 // given string length (in case of multibyte encodings, e.g.
368 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
370 // One idea would be to store more than just m_convertedToChar
371 // in wxString: then we could record the length of the string
372 // which was converted the last time and try to reuse the same
373 // buffer if the current length is not greater than it (this
374 // could still fail because string could have been modified in
375 // place but it would work most of the time, so we'd do it and
376 // only allocate the new buffer if in-place conversion returned
377 // an error). We could also store a bit saying if the string
378 // was modified since the last conversion (and update it in all
379 // operation modifying the string, of course) to avoid unneeded
380 // consequential conversions. But both of these ideas require
381 // adding more fields to wxString and require profiling results
382 // to be sure that we really gain enough from them to justify
384 wxCharBuffer
buf(str
->mb_str());
386 // if it failed, return empty string and not NULL to avoid crashes in code
387 // written with either wxWidgets 2 wxString or std::string behaviour in
388 // mind: neither of them ever returns NULL and so we shouldn't neither
392 if ( str
->m_convertedToChar
&&
393 strlen(buf
) == strlen(str
->m_convertedToChar
) )
395 // keep the same buffer for as long as possible, so that several calls
396 // to c_str() in a row still work:
397 strcpy(str
->m_convertedToChar
, buf
);
401 str
->m_convertedToChar
= buf
.release();
405 return str
->m_convertedToChar
+ m_offset
;
407 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
409 #if !wxUSE_UNICODE_WCHAR
410 const wchar_t* wxCStrData::AsWChar() const
412 wxString
*str
= wxConstCast(m_str
, wxString
);
414 // convert the string:
415 wxWCharBuffer
buf(str
->wc_str());
417 // notice that here, unlike above in AsChar(), conversion can't fail as our
418 // internal UTF-8 is always well-formed -- or the string was corrupted and
419 // all bets are off anyhow
421 // FIXME-UTF8: do the conversion in-place in the existing buffer
422 if ( str
->m_convertedToWChar
&&
423 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
425 // keep the same buffer for as long as possible, so that several calls
426 // to c_str() in a row still work:
427 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
431 str
->m_convertedToWChar
= buf
.release();
435 return str
->m_convertedToWChar
+ m_offset
;
437 #endif // !wxUSE_UNICODE_WCHAR
439 // ===========================================================================
440 // wxString class core
441 // ===========================================================================
443 // ---------------------------------------------------------------------------
444 // construction and conversion
445 // ---------------------------------------------------------------------------
447 #if wxUSE_UNICODE_WCHAR
449 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
450 const wxMBConv
& conv
)
453 if ( !psz
|| nLength
== 0 )
454 return SubstrBufFromMB(L
"", 0);
456 if ( nLength
== npos
)
460 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
462 return SubstrBufFromMB(_T(""), 0);
464 return SubstrBufFromMB(wcBuf
, wcLen
);
466 #endif // wxUSE_UNICODE_WCHAR
468 #if wxUSE_UNICODE_UTF8
470 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
471 const wxMBConv
& conv
)
474 if ( !psz
|| nLength
== 0 )
475 return SubstrBufFromMB("", 0);
477 // if psz is already in UTF-8, we don't have to do the roundtrip to
478 // wchar_t* and back:
481 // we need to validate the input because UTF8 iterators assume valid
482 // UTF-8 sequence and psz may be invalid:
483 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
485 // we must pass the real string length to SubstrBufFromMB ctor
486 if ( nLength
== npos
)
487 nLength
= psz
? strlen(psz
) : 0;
488 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz
), nLength
);
490 // else: do the roundtrip through wchar_t*
493 if ( nLength
== npos
)
496 // first convert to wide string:
498 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
500 return SubstrBufFromMB("", 0);
502 // and then to UTF-8:
503 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
504 // widechar -> UTF-8 conversion isn't supposed to ever fail:
505 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
509 #endif // wxUSE_UNICODE_UTF8
511 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
513 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
514 const wxMBConv
& conv
)
517 if ( !pwz
|| nLength
== 0 )
518 return SubstrBufFromWC("", 0);
520 if ( nLength
== npos
)
524 wxCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
526 return SubstrBufFromWC("", 0);
528 return SubstrBufFromWC(mbBuf
, mbLen
);
530 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
533 #if wxUSE_UNICODE_WCHAR
535 //Convert wxString in Unicode mode to a multi-byte string
536 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
538 return conv
.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL
);
541 #elif wxUSE_UNICODE_UTF8
543 const wxWCharBuffer
wxString::wc_str() const
545 return wxMBConvStrictUTF8().cMB2WC
548 m_impl
.length() + 1, // size, not length
553 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
556 return wxCharBuffer::CreateNonOwned(m_impl
.c_str());
558 // FIXME-UTF8: use wc_str() here once we have buffers with length
561 wxWCharBuffer
wcBuf(wxMBConvStrictUTF8().cMB2WC
564 m_impl
.length() + 1, // size
568 return wxCharBuffer("");
570 return conv
.cWC2MB(wcBuf
, wcLen
+1, NULL
);
575 //Converts this string to a wide character string if unicode
576 //mode is not enabled and wxUSE_WCHAR_T is enabled
577 const wxWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
579 return conv
.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL
);
582 #endif // Unicode/ANSI
584 // shrink to minimal size (releasing extra memory)
585 bool wxString::Shrink()
587 wxString
tmp(begin(), end());
589 return tmp
.length() == length();
592 // deprecated compatibility code:
593 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
594 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
596 return DoGetWriteBuf(nLen
);
599 void wxString::UngetWriteBuf()
604 void wxString::UngetWriteBuf(size_t nLen
)
606 DoUngetWriteBuf(nLen
);
608 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
611 // ---------------------------------------------------------------------------
613 // ---------------------------------------------------------------------------
615 // all functions are inline in string.h
617 // ---------------------------------------------------------------------------
618 // concatenation operators
619 // ---------------------------------------------------------------------------
622 * concatenation functions come in 5 flavours:
624 * char + string and string + char
625 * C str + string and string + C str
628 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
630 #if !wxUSE_STL_BASED_WXSTRING
631 wxASSERT( str1
.IsValid() );
632 wxASSERT( str2
.IsValid() );
641 wxString
operator+(const wxString
& str
, wxUniChar ch
)
643 #if !wxUSE_STL_BASED_WXSTRING
644 wxASSERT( str
.IsValid() );
653 wxString
operator+(wxUniChar ch
, const wxString
& str
)
655 #if !wxUSE_STL_BASED_WXSTRING
656 wxASSERT( str
.IsValid() );
665 wxString
operator+(const wxString
& str
, const char *psz
)
667 #if !wxUSE_STL_BASED_WXSTRING
668 wxASSERT( str
.IsValid() );
672 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
673 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
681 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
683 #if !wxUSE_STL_BASED_WXSTRING
684 wxASSERT( str
.IsValid() );
688 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
689 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
697 wxString
operator+(const char *psz
, const wxString
& str
)
699 #if !wxUSE_STL_BASED_WXSTRING
700 wxASSERT( str
.IsValid() );
704 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
705 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
713 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
715 #if !wxUSE_STL_BASED_WXSTRING
716 wxASSERT( str
.IsValid() );
720 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
721 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
729 // ---------------------------------------------------------------------------
731 // ---------------------------------------------------------------------------
733 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
735 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
736 : wxToupper(GetChar(0u)) == wxToupper(c
));
739 #ifdef HAVE_STD_STRING_COMPARE
741 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
742 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
743 // sort strings in characters code point order by sorting the byte sequence
744 // in byte values order (i.e. what strcmp() and memcmp() do).
746 int wxString::compare(const wxString
& str
) const
748 return m_impl
.compare(str
.m_impl
);
751 int wxString::compare(size_t nStart
, size_t nLen
,
752 const wxString
& str
) const
755 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
756 return m_impl
.compare(pos
, len
, str
.m_impl
);
759 int wxString::compare(size_t nStart
, size_t nLen
,
761 size_t nStart2
, size_t nLen2
) const
764 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
767 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
769 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
772 int wxString::compare(const char* sz
) const
774 return m_impl
.compare(ImplStr(sz
));
777 int wxString::compare(const wchar_t* sz
) const
779 return m_impl
.compare(ImplStr(sz
));
782 int wxString::compare(size_t nStart
, size_t nLen
,
783 const char* sz
, size_t nCount
) const
786 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
788 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
790 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
793 int wxString::compare(size_t nStart
, size_t nLen
,
794 const wchar_t* sz
, size_t nCount
) const
797 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
799 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
801 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
804 #else // !HAVE_STD_STRING_COMPARE
806 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
807 const wxStringCharType
* s2
, size_t l2
)
810 return wxStringMemcmp(s1
, s2
, l1
);
813 int ret
= wxStringMemcmp(s1
, s2
, l1
);
814 return ret
== 0 ? -1 : ret
;
818 int ret
= wxStringMemcmp(s1
, s2
, l2
);
819 return ret
== 0 ? +1 : ret
;
823 int wxString::compare(const wxString
& str
) const
825 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
826 str
.m_impl
.data(), str
.m_impl
.length());
829 int wxString::compare(size_t nStart
, size_t nLen
,
830 const wxString
& str
) const
832 wxASSERT(nStart
<= length());
833 size_type strLen
= length() - nStart
;
834 nLen
= strLen
< nLen
? strLen
: nLen
;
837 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
839 return ::wxDoCmp(m_impl
.data() + pos
, len
,
840 str
.m_impl
.data(), str
.m_impl
.length());
843 int wxString::compare(size_t nStart
, size_t nLen
,
845 size_t nStart2
, size_t nLen2
) const
847 wxASSERT(nStart
<= length());
848 wxASSERT(nStart2
<= str
.length());
849 size_type strLen
= length() - nStart
,
850 strLen2
= str
.length() - nStart2
;
851 nLen
= strLen
< nLen
? strLen
: nLen
;
852 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
855 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
857 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
859 return ::wxDoCmp(m_impl
.data() + pos
, len
,
860 str
.m_impl
.data() + pos2
, len2
);
863 int wxString::compare(const char* sz
) const
865 SubstrBufFromMB
str(ImplStr(sz
, npos
));
866 if ( str
.len
== npos
)
867 str
.len
= wxStringStrlen(str
.data
);
868 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
871 int wxString::compare(const wchar_t* sz
) const
873 SubstrBufFromWC
str(ImplStr(sz
, npos
));
874 if ( str
.len
== npos
)
875 str
.len
= wxStringStrlen(str
.data
);
876 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
879 int wxString::compare(size_t nStart
, size_t nLen
,
880 const char* sz
, size_t nCount
) const
882 wxASSERT(nStart
<= length());
883 size_type strLen
= length() - nStart
;
884 nLen
= strLen
< nLen
? strLen
: nLen
;
887 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
889 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
890 if ( str
.len
== npos
)
891 str
.len
= wxStringStrlen(str
.data
);
893 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
896 int wxString::compare(size_t nStart
, size_t nLen
,
897 const wchar_t* sz
, size_t nCount
) const
899 wxASSERT(nStart
<= length());
900 size_type strLen
= length() - nStart
;
901 nLen
= strLen
< nLen
? strLen
: nLen
;
904 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
906 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
907 if ( str
.len
== npos
)
908 str
.len
= wxStringStrlen(str
.data
);
910 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
913 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
916 // ---------------------------------------------------------------------------
917 // find_{first,last}_[not]_of functions
918 // ---------------------------------------------------------------------------
920 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
922 // NB: All these functions are implemented with the argument being wxChar*,
923 // i.e. widechar string in any Unicode build, even though native string
924 // representation is char* in the UTF-8 build. This is because we couldn't
925 // use memchr() to determine if a character is in a set encoded as UTF-8.
927 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
929 return find_first_of(sz
, nStart
, wxStrlen(sz
));
932 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
934 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
937 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
939 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
942 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
944 if ( wxTmemchr(sz
, *i
, n
) )
951 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
953 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
956 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
958 if ( !wxTmemchr(sz
, *i
, n
) )
966 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
968 return find_last_of(sz
, nStart
, wxStrlen(sz
));
971 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
973 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
976 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
978 size_t len
= length();
980 if ( nStart
== npos
)
986 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
990 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
991 i
!= rend(); --idx
, ++i
)
993 if ( wxTmemchr(sz
, *i
, n
) )
1000 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
1002 size_t len
= length();
1004 if ( nStart
== npos
)
1010 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1013 size_t idx
= nStart
;
1014 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1015 i
!= rend(); --idx
, ++i
)
1017 if ( !wxTmemchr(sz
, *i
, n
) )
1024 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
1026 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
1028 size_t idx
= nStart
;
1029 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1038 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1040 size_t len
= length();
1042 if ( nStart
== npos
)
1048 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1051 size_t idx
= nStart
;
1052 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1053 i
!= rend(); --idx
, ++i
)
1062 // the functions above were implemented for wchar_t* arguments in Unicode
1063 // build and char* in ANSI build; below are implementations for the other
1066 #define wxOtherCharType char
1067 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1069 #define wxOtherCharType wchar_t
1070 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1073 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1074 { return find_first_of(STRCONV(sz
), nStart
); }
1076 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1078 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1079 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1080 { return find_last_of(STRCONV(sz
), nStart
); }
1081 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1083 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1084 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1085 { return find_first_not_of(STRCONV(sz
), nStart
); }
1086 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1088 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1089 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1090 { return find_last_not_of(STRCONV(sz
), nStart
); }
1091 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1093 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1095 #undef wxOtherCharType
1098 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1100 // ===========================================================================
1101 // other common string functions
1102 // ===========================================================================
1104 int wxString::CmpNoCase(const wxString
& s
) const
1106 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1108 const_iterator i1
= begin();
1109 const_iterator end1
= end();
1110 const_iterator i2
= s
.begin();
1111 const_iterator end2
= s
.end();
1113 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1115 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1116 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1117 if ( lower1
!= lower2
)
1118 return lower1
< lower2
? -1 : 1;
1121 size_t len1
= length();
1122 size_t len2
= s
.length();
1126 else if ( len1
> len2
)
1135 #ifndef __SCHAR_MAX__
1136 #define __SCHAR_MAX__ 127
1140 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1142 if (!ascii
|| len
== 0)
1143 return wxEmptyString
;
1148 wxStringInternalBuffer
buf(res
, len
);
1149 wxStringCharType
*dest
= buf
;
1151 for ( ; len
> 0; --len
)
1153 unsigned char c
= (unsigned char)*ascii
++;
1154 wxASSERT_MSG( c
< 0x80,
1155 _T("Non-ASCII value passed to FromAscii().") );
1157 *dest
++ = (wchar_t)c
;
1164 wxString
wxString::FromAscii(const char *ascii
)
1166 return FromAscii(ascii
, wxStrlen(ascii
));
1169 wxString
wxString::FromAscii(char ascii
)
1171 // What do we do with '\0' ?
1173 unsigned char c
= (unsigned char)ascii
;
1175 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
1177 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1178 return wxString(wxUniChar((wchar_t)c
));
1181 const wxCharBuffer
wxString::ToAscii() const
1183 // this will allocate enough space for the terminating NUL too
1184 wxCharBuffer
buffer(length());
1185 char *dest
= buffer
.data();
1187 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1190 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1191 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1193 // the output string can't have embedded NULs anyhow, so we can safely
1194 // stop at first of them even if we do have any
1202 #endif // wxUSE_UNICODE
1204 // extract string of length nCount starting at nFirst
1205 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1207 size_t nLen
= length();
1209 // default value of nCount is npos and means "till the end"
1210 if ( nCount
== npos
)
1212 nCount
= nLen
- nFirst
;
1215 // out-of-bounds requests return sensible things
1216 if ( nFirst
+ nCount
> nLen
)
1218 nCount
= nLen
- nFirst
;
1221 if ( nFirst
> nLen
)
1223 // AllocCopy() will return empty string
1224 return wxEmptyString
;
1227 wxString
dest(*this, nFirst
, nCount
);
1228 if ( dest
.length() != nCount
)
1230 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1236 // check that the string starts with prefix and return the rest of the string
1237 // in the provided pointer if it is not NULL, otherwise return false
1238 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1240 if ( compare(0, prefix
.length(), prefix
) != 0 )
1245 // put the rest of the string into provided pointer
1246 rest
->assign(*this, prefix
.length(), npos
);
1253 // check that the string ends with suffix and return the rest of it in the
1254 // provided pointer if it is not NULL, otherwise return false
1255 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1257 int start
= length() - suffix
.length();
1259 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1264 // put the rest of the string into provided pointer
1265 rest
->assign(*this, 0, start
);
1272 // extract nCount last (rightmost) characters
1273 wxString
wxString::Right(size_t nCount
) const
1275 if ( nCount
> length() )
1278 wxString
dest(*this, length() - nCount
, nCount
);
1279 if ( dest
.length() != nCount
) {
1280 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1285 // get all characters after the last occurrence of ch
1286 // (returns the whole string if ch not found)
1287 wxString
wxString::AfterLast(wxUniChar ch
) const
1290 int iPos
= Find(ch
, true);
1291 if ( iPos
== wxNOT_FOUND
)
1294 str
.assign(*this, iPos
+ 1, npos
);
1299 // extract nCount first (leftmost) characters
1300 wxString
wxString::Left(size_t nCount
) const
1302 if ( nCount
> length() )
1305 wxString
dest(*this, 0, nCount
);
1306 if ( dest
.length() != nCount
) {
1307 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1312 // get all characters before the first occurrence of ch
1313 // (returns the whole string if ch not found)
1314 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1316 int iPos
= Find(ch
);
1317 if ( iPos
== wxNOT_FOUND
)
1319 return wxString(*this, 0, iPos
);
1322 /// get all characters before the last occurrence of ch
1323 /// (returns empty string if ch not found)
1324 wxString
wxString::BeforeLast(wxUniChar ch
) const
1327 int iPos
= Find(ch
, true);
1328 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1329 str
= wxString(c_str(), iPos
);
1334 /// get all characters after the first occurrence of ch
1335 /// (returns empty string if ch not found)
1336 wxString
wxString::AfterFirst(wxUniChar ch
) const
1339 int iPos
= Find(ch
);
1340 if ( iPos
!= wxNOT_FOUND
)
1341 str
.assign(*this, iPos
+ 1, npos
);
1346 // replace first (or all) occurrences of some substring with another one
1347 size_t wxString::Replace(const wxString
& strOld
,
1348 const wxString
& strNew
, bool bReplaceAll
)
1350 // if we tried to replace an empty string we'd enter an infinite loop below
1351 wxCHECK_MSG( !strOld
.empty(), 0,
1352 _T("wxString::Replace(): invalid parameter") );
1354 wxSTRING_INVALIDATE_CACHE();
1356 size_t uiCount
= 0; // count of replacements made
1358 // optimize the special common case: replacement of one character by
1359 // another one (in UTF-8 case we can only do this for ASCII characters)
1361 // benchmarks show that this special version is around 3 times faster
1362 // (depending on the proportion of matching characters and UTF-8/wchar_t
1364 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1366 const wxStringCharType chOld
= strOld
.m_impl
[0],
1367 chNew
= strNew
.m_impl
[0];
1369 // this loop is the simplified version of the one below
1370 for ( size_t pos
= 0; ; )
1372 pos
= m_impl
.find(chOld
, pos
);
1376 m_impl
[pos
++] = chNew
;
1384 else // general case
1386 const size_t uiOldLen
= strOld
.m_impl
.length();
1387 const size_t uiNewLen
= strNew
.m_impl
.length();
1389 for ( size_t pos
= 0; ; )
1391 pos
= m_impl
.find(strOld
.m_impl
, pos
);
1395 // replace this occurrence of the old string with the new one
1396 m_impl
.replace(pos
, uiOldLen
, strNew
.m_impl
);
1398 // move up pos past the string that was replaced
1401 // increase replace count
1404 // stop after the first one?
1413 bool wxString::IsAscii() const
1415 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1417 if ( !(*i
).IsAscii() )
1424 bool wxString::IsWord() const
1426 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1428 if ( !wxIsalpha(*i
) )
1435 bool wxString::IsNumber() const
1440 const_iterator i
= begin();
1442 if ( *i
== _T('-') || *i
== _T('+') )
1445 for ( ; i
!= end(); ++i
)
1447 if ( !wxIsdigit(*i
) )
1454 wxString
wxString::Strip(stripType w
) const
1457 if ( w
& leading
) s
.Trim(false);
1458 if ( w
& trailing
) s
.Trim(true);
1462 // ---------------------------------------------------------------------------
1464 // ---------------------------------------------------------------------------
1466 wxString
& wxString::MakeUpper()
1468 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1469 *it
= (wxChar
)wxToupper(*it
);
1474 wxString
& wxString::MakeLower()
1476 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1477 *it
= (wxChar
)wxTolower(*it
);
1482 wxString
& wxString::MakeCapitalized()
1484 const iterator en
= end();
1485 iterator it
= begin();
1488 *it
= (wxChar
)wxToupper(*it
);
1489 for ( ++it
; it
!= en
; ++it
)
1490 *it
= (wxChar
)wxTolower(*it
);
1496 // ---------------------------------------------------------------------------
1497 // trimming and padding
1498 // ---------------------------------------------------------------------------
1500 // some compilers (VC++ 6.0 not to name them) return true for a call to
1501 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1502 // to live with this by checking that the character is a 7 bit one - even if
1503 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1504 // space-like symbols somewhere except in the first 128 chars), it is arguably
1505 // still better than trimming away accented letters
1506 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1508 // trims spaces (in the sense of isspace) from left or right side
1509 wxString
& wxString::Trim(bool bFromRight
)
1511 // first check if we're going to modify the string at all
1514 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1515 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1521 // find last non-space character
1522 reverse_iterator psz
= rbegin();
1523 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1526 // truncate at trailing space start
1527 erase(psz
.base(), end());
1531 // find first non-space character
1532 iterator psz
= begin();
1533 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1536 // fix up data and length
1537 erase(begin(), psz
);
1544 // adds nCount characters chPad to the string from either side
1545 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1547 wxString
s(chPad
, nCount
);
1560 // truncate the string
1561 wxString
& wxString::Truncate(size_t uiLen
)
1563 if ( uiLen
< length() )
1565 erase(begin() + uiLen
, end());
1567 //else: nothing to do, string is already short enough
1572 // ---------------------------------------------------------------------------
1573 // finding (return wxNOT_FOUND if not found and index otherwise)
1574 // ---------------------------------------------------------------------------
1577 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1579 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1581 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1584 // ----------------------------------------------------------------------------
1585 // conversion to numbers
1586 // ----------------------------------------------------------------------------
1588 // The implementation of all the functions below is exactly the same so factor
1589 // it out. Note that number extraction works correctly on UTF-8 strings, so
1590 // we can use wxStringCharType and wx_str() for maximum efficiency.
1593 #define DO_IF_NOT_WINCE(x) x
1595 #define DO_IF_NOT_WINCE(x)
1598 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1599 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1600 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1602 DO_IF_NOT_WINCE( errno = 0; ) \
1604 const wxStringCharType *start = wx_str(); \
1605 wxStringCharType *end; \
1606 T val = func(start, &end, base); \
1608 /* return true only if scan was stopped by the terminating NUL and */ \
1609 /* if the string was not empty to start with and no under/overflow */ \
1611 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1616 bool wxString::ToLong(long *pVal
, int base
) const
1618 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtol
, long);
1621 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1623 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoul
, unsigned long);
1626 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1628 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoll
, wxLongLong_t
);
1631 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1633 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoull
, wxULongLong_t
);
1636 bool wxString::ToDouble(double *pVal
) const
1638 wxCHECK_MSG( pVal
, false, _T("NULL output pointer") );
1640 DO_IF_NOT_WINCE( errno
= 0; )
1642 const wxChar
*start
= c_str();
1644 double val
= wxStrtod(start
, &end
);
1646 // return true only if scan was stopped by the terminating NUL and if the
1647 // string was not empty to start with and no under/overflow occurred
1648 if ( *end
|| end
== start
DO_IF_NOT_WINCE(|| errno
== ERANGE
) )
1656 // ---------------------------------------------------------------------------
1658 // ---------------------------------------------------------------------------
1660 #if !wxUSE_UTF8_LOCALE_ONLY
1662 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1663 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1665 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1669 va_start(argptr
, format
);
1672 s
.PrintfV(format
, argptr
);
1678 #endif // !wxUSE_UTF8_LOCALE_ONLY
1680 #if wxUSE_UNICODE_UTF8
1682 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1685 va_start(argptr
, format
);
1688 s
.PrintfV(format
, argptr
);
1694 #endif // wxUSE_UNICODE_UTF8
1697 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1700 s
.PrintfV(format
, argptr
);
1704 #if !wxUSE_UTF8_LOCALE_ONLY
1705 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1706 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1708 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1712 va_start(argptr
, format
);
1714 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1715 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1716 // because it's the only cast that works safely for downcasting when
1717 // multiple inheritance is used:
1718 wxString
*str
= static_cast<wxString
*>(this);
1720 wxString
*str
= this;
1723 int iLen
= str
->PrintfV(format
, argptr
);
1729 #endif // !wxUSE_UTF8_LOCALE_ONLY
1731 #if wxUSE_UNICODE_UTF8
1732 int wxString::DoPrintfUtf8(const char *format
, ...)
1735 va_start(argptr
, format
);
1737 int iLen
= PrintfV(format
, argptr
);
1743 #endif // wxUSE_UNICODE_UTF8
1746 Uses wxVsnprintf and places the result into the this string.
1748 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1749 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1750 the ISO C99 (and thus SUSv3) standard the return value for the case of
1751 an undersized buffer is inconsistent. For conforming vsnprintf
1752 implementations the function must return the number of characters that
1753 would have been printed had the buffer been large enough. For conforming
1754 vswprintf implementations the function must return a negative number
1757 What vswprintf sets errno to is undefined but Darwin seems to set it to
1758 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1759 those are defined in the standard and backed up by several conformance
1760 statements. Note that ENOMEM mentioned in the manual page does not
1761 apply to swprintf, only wprintf and fwprintf.
1763 Official manual page:
1764 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1766 Some conformance statements (AIX, Solaris):
1767 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1768 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1770 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1771 EILSEQ and EINVAL are specifically defined to mean the error is other than
1772 an undersized buffer and no other errno are defined we treat those two
1773 as meaning hard errors and everything else gets the old behavior which
1774 is to keep looping and increasing buffer size until the function succeeds.
1776 In practice it's impossible to determine before compilation which behavior
1777 may be used. The vswprintf function may have vsnprintf-like behavior or
1778 vice-versa. Behavior detected on one release can theoretically change
1779 with an updated release. Not to mention that configure testing for it
1780 would require the test to be run on the host system, not the build system
1781 which makes cross compilation difficult. Therefore, we make no assumptions
1782 about behavior and try our best to handle every known case, including the
1783 case where wxVsnprintf returns a negative number and fails to set errno.
1785 There is yet one more non-standard implementation and that is our own.
1786 Fortunately, that can be detected at compile-time.
1788 On top of all that, ISO C99 explicitly defines snprintf to write a null
1789 character to the last position of the specified buffer. That would be at
1790 at the given buffer size minus 1. It is supposed to do this even if it
1791 turns out that the buffer is sized too small.
1793 Darwin (tested on 10.5) follows the C99 behavior exactly.
1795 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1796 errno even when it fails. However, it only seems to ever fail due
1797 to an undersized buffer.
1799 #if wxUSE_UNICODE_UTF8
1800 template<typename BufferType
>
1802 // we only need one version in non-UTF8 builds and at least two Windows
1803 // compilers have problems with this function template, so use just one
1804 // normal function here
1806 static int DoStringPrintfV(wxString
& str
,
1807 const wxString
& format
, va_list argptr
)
1813 #if wxUSE_UNICODE_UTF8
1814 BufferType
tmp(str
, size
+ 1);
1815 typename
BufferType::CharType
*buf
= tmp
;
1817 wxStringBuffer
tmp(str
, size
+ 1);
1825 // in UTF-8 build, leaving uninitialized junk in the buffer
1826 // could result in invalid non-empty UTF-8 string, so just
1827 // reset the string to empty on failure:
1832 // wxVsnprintf() may modify the original arg pointer, so pass it
1835 wxVaCopy(argptrcopy
, argptr
);
1838 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1841 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1844 // some implementations of vsnprintf() don't NUL terminate
1845 // the string if there is not enough space for it so
1846 // always do it manually
1847 // FIXME: This really seems to be the wrong and would be an off-by-one
1848 // bug except the code above allocates an extra character.
1849 buf
[size
] = _T('\0');
1851 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1852 // total number of characters which would have been written if the
1853 // buffer were large enough (newer standards such as Unix98)
1856 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1857 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1858 // is true if *both* of them use our own implementation,
1859 // otherwise we can't be sure
1860 #if wxUSE_WXVSNPRINTF
1861 // we know that our own implementation of wxVsnprintf() returns -1
1862 // only for a format error - thus there's something wrong with
1863 // the user's format string
1866 #else // possibly using system version
1867 // assume it only returns error if there is not enough space, but
1868 // as we don't know how much we need, double the current size of
1871 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1872 // If errno was set to one of the two well-known hard errors
1873 // then fail immediately to avoid an infinite loop.
1876 #endif // __WXWINCE__
1877 // still not enough, as we don't know how much we need, double the
1878 // current size of the buffer
1880 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1882 else if ( len
>= size
)
1884 #if wxUSE_WXVSNPRINTF
1885 // we know that our own implementation of wxVsnprintf() returns
1886 // size+1 when there's not enough space but that's not the size
1887 // of the required buffer!
1888 size
*= 2; // so we just double the current size of the buffer
1890 // some vsnprintf() implementations NUL-terminate the buffer and
1891 // some don't in len == size case, to be safe always add 1
1892 // FIXME: I don't quite understand this comment. The vsnprintf
1893 // function is specifically defined to return the number of
1894 // characters printed not including the null terminator.
1895 // So OF COURSE you need to add 1 to get the right buffer size.
1896 // The following line is definitely correct, no question.
1900 else // ok, there was enough space
1906 // we could have overshot
1909 return str
.length();
1912 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
1914 #if wxUSE_UNICODE_UTF8
1915 #if wxUSE_STL_BASED_WXSTRING
1916 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
1918 typedef wxStringInternalBuffer Utf8Buffer
;
1922 #if wxUSE_UTF8_LOCALE_ONLY
1923 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1925 #if wxUSE_UNICODE_UTF8
1926 if ( wxLocaleIsUtf8
)
1927 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1930 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
1932 return DoStringPrintfV(*this, format
, argptr
);
1933 #endif // UTF8/WCHAR
1937 // ----------------------------------------------------------------------------
1938 // misc other operations
1939 // ----------------------------------------------------------------------------
1941 // returns true if the string matches the pattern which may contain '*' and
1942 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1944 bool wxString::Matches(const wxString
& mask
) const
1946 // I disable this code as it doesn't seem to be faster (in fact, it seems
1947 // to be much slower) than the old, hand-written code below and using it
1948 // here requires always linking with libregex even if the user code doesn't
1950 #if 0 // wxUSE_REGEX
1951 // first translate the shell-like mask into a regex
1953 pattern
.reserve(wxStrlen(pszMask
));
1965 pattern
+= _T(".*");
1976 // these characters are special in a RE, quote them
1977 // (however note that we don't quote '[' and ']' to allow
1978 // using them for Unix shell like matching)
1979 pattern
+= _T('\\');
1983 pattern
+= *pszMask
;
1991 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
1992 #else // !wxUSE_REGEX
1993 // TODO: this is, of course, awfully inefficient...
1995 // FIXME-UTF8: implement using iterators, remove #if
1996 #if wxUSE_UNICODE_UTF8
1997 wxWCharBuffer maskBuf
= mask
.wc_str();
1998 wxWCharBuffer txtBuf
= wc_str();
1999 const wxChar
*pszMask
= maskBuf
.data();
2000 const wxChar
*pszTxt
= txtBuf
.data();
2002 const wxChar
*pszMask
= mask
.wx_str();
2003 // the char currently being checked
2004 const wxChar
*pszTxt
= wx_str();
2007 // the last location where '*' matched
2008 const wxChar
*pszLastStarInText
= NULL
;
2009 const wxChar
*pszLastStarInMask
= NULL
;
2012 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
2013 switch ( *pszMask
) {
2015 if ( *pszTxt
== wxT('\0') )
2018 // pszTxt and pszMask will be incremented in the loop statement
2024 // remember where we started to be able to backtrack later
2025 pszLastStarInText
= pszTxt
;
2026 pszLastStarInMask
= pszMask
;
2028 // ignore special chars immediately following this one
2029 // (should this be an error?)
2030 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2033 // if there is nothing more, match
2034 if ( *pszMask
== wxT('\0') )
2037 // are there any other metacharacters in the mask?
2039 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2041 if ( pEndMask
!= NULL
) {
2042 // we have to match the string between two metachars
2043 uiLenMask
= pEndMask
- pszMask
;
2046 // we have to match the remainder of the string
2047 uiLenMask
= wxStrlen(pszMask
);
2050 wxString
strToMatch(pszMask
, uiLenMask
);
2051 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2052 if ( pMatch
== NULL
)
2055 // -1 to compensate "++" in the loop
2056 pszTxt
= pMatch
+ uiLenMask
- 1;
2057 pszMask
+= uiLenMask
- 1;
2062 if ( *pszMask
!= *pszTxt
)
2068 // match only if nothing left
2069 if ( *pszTxt
== wxT('\0') )
2072 // if we failed to match, backtrack if we can
2073 if ( pszLastStarInText
) {
2074 pszTxt
= pszLastStarInText
+ 1;
2075 pszMask
= pszLastStarInMask
;
2077 pszLastStarInText
= NULL
;
2079 // don't bother resetting pszLastStarInMask, it's unnecessary
2085 #endif // wxUSE_REGEX/!wxUSE_REGEX
2088 // Count the number of chars
2089 int wxString::Freq(wxUniChar ch
) const
2092 for ( const_iterator i
= begin(); i
!= end(); ++i
)
2100 // ----------------------------------------------------------------------------
2101 // wxUTF8StringBuffer
2102 // ----------------------------------------------------------------------------
2104 #if wxUSE_UNICODE_WCHAR
2105 wxUTF8StringBuffer::~wxUTF8StringBuffer()
2107 wxMBConvStrictUTF8 conv
;
2108 size_t wlen
= conv
.ToWChar(NULL
, 0, m_buf
);
2109 wxCHECK_RET( wlen
!= wxCONV_FAILED
, "invalid UTF-8 data in string buffer?" );
2111 wxStringInternalBuffer
wbuf(m_str
, wlen
);
2112 conv
.ToWChar(wbuf
, wlen
, m_buf
);
2115 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2117 wxCHECK_RET(m_lenSet
, "length not set");
2119 wxMBConvStrictUTF8 conv
;
2120 size_t wlen
= conv
.ToWChar(NULL
, 0, m_buf
, m_len
);
2121 wxCHECK_RET( wlen
!= wxCONV_FAILED
, "invalid UTF-8 data in string buffer?" );
2123 wxStringInternalBufferLength
wbuf(m_str
, wlen
);
2124 conv
.ToWChar(wbuf
, wlen
, m_buf
, m_len
);
2125 wbuf
.SetLength(wlen
);
2127 #endif // wxUSE_UNICODE_WCHAR
2129 // ----------------------------------------------------------------------------
2130 // wxCharBufferType<T>
2131 // ----------------------------------------------------------------------------
2133 #ifndef __VMS_BROKEN_TEMPLATES
2136 wxCharTypeBuffer
<char>::Data
2137 wxCharTypeBuffer
<char>::NullData(NULL
);
2139 #ifndef __VMS_BROKEN_TEMPLATES
2142 wxCharTypeBuffer
<wchar_t>::Data
2143 wxCharTypeBuffer
<wchar_t>::NullData(NULL
);