1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
38 #include "wx/hashmap.h"
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
54 // ---------------------------------------------------------------------------
55 // static class variables definition
56 // ---------------------------------------------------------------------------
58 //According to STL _must_ be a -1 size_t
59 const size_t wxString::npos
= (size_t) -1;
61 #if wxUSE_STRING_POS_CACHE
62 wxTLS_TYPE(wxString::Cache
) wxString::ms_cache
;
64 // gdb seems to be unable to display thread-local variables correctly, at least
65 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
68 struct wxStrCacheDumper
72 puts("*** wxString cache dump:");
73 for ( unsigned n
= 0; n
< wxString::Cache::SIZE
; n
++ )
75 const wxString::Cache::Element
&
76 c
= wxString::ms_cache
.cached
[n
];
78 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
80 n
== wxString::ms_cache
.lastUsed
? " [*]" : "",
83 (unsigned long)c
.impl
,
89 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
93 #ifdef wxPROFILE_STRING_CACHE
95 wxString::CacheStats
wxString::ms_cacheStats
;
100 struct ShowCacheStats
104 const wxString::CacheStats
& stats
= wxString::ms_cacheStats
;
108 puts("*** wxString cache statistics:");
109 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
111 printf("\tHits %u (of which %u not used) or %.2f%%\n",
114 100.*float(stats
.poshits
- stats
.mishits
)/stats
.postot
);
115 printf("\tAverage position requested: %.2f\n",
116 float(stats
.sumpos
) / stats
.postot
);
117 printf("\tAverage offset after cached hint: %.2f\n",
118 float(stats
.sumofs
) / stats
.postot
);
123 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
124 stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
);
129 } // anonymous namespace
131 #endif // wxPROFILE_STRING_CACHE
133 #endif // wxUSE_STRING_POS_CACHE
135 // ----------------------------------------------------------------------------
137 // ----------------------------------------------------------------------------
139 #if wxUSE_STD_IOSTREAM
143 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
145 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
146 return os
<< (const char *)str
.AsCharBuf();
148 return os
<< str
.AsInternal();
152 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
154 return os
<< str
.c_str();
157 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCharBuffer
& str
)
159 return os
<< str
.data();
163 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxWCharBuffer
& str
)
165 return os
<< str
.data();
169 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
171 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
173 return wos
<< str
.wc_str();
176 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
178 return wos
<< str
.AsWChar();
181 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxWCharBuffer
& str
)
183 return wos
<< str
.data();
186 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
188 #endif // wxUSE_STD_IOSTREAM
190 // ===========================================================================
191 // wxString class core
192 // ===========================================================================
194 #if wxUSE_UNICODE_UTF8
196 void wxString::PosLenToImpl(size_t pos
, size_t len
,
197 size_t *implPos
, size_t *implLen
) const
203 else // have valid start position
205 const const_iterator b
= GetIterForNthChar(pos
);
206 *implPos
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin();
211 else // have valid length too
213 // we need to handle the case of length specifying a substring
214 // going beyond the end of the string, just as std::string does
215 const const_iterator
e(end());
217 while ( len
&& i
<= e
)
223 *implLen
= i
.impl() - b
.impl();
228 #endif // wxUSE_UNICODE_UTF8
230 // ----------------------------------------------------------------------------
231 // wxCStrData converted strings caching
232 // ----------------------------------------------------------------------------
234 // FIXME-UTF8: temporarily disabled because it doesn't work with global
235 // string objects; re-enable after fixing this bug and benchmarking
236 // performance to see if using a hash is a good idea at all
239 // For backward compatibility reasons, it must be possible to assign the value
240 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
241 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
242 // because the memory would be freed immediately, but it has to be valid as long
243 // as the string is not modified, so that code like this still works:
245 // const wxChar *s = str.c_str();
246 // while ( s ) { ... }
248 // FIXME-UTF8: not thread safe!
249 // FIXME-UTF8: we currently clear the cached conversion only when the string is
250 // destroyed, but we should do it when the string is modified, to
251 // keep memory usage down
252 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
253 // invalidated the cache on every change, we could keep the previous
255 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
256 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
259 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
261 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
262 if ( i
!= hash
.end() )
270 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
271 // so we have to use wxString* here and const-cast when used
272 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
273 wxStringCharConversionCache
);
274 static wxStringCharConversionCache gs_stringsCharCache
;
276 const char* wxCStrData::AsChar() const
278 // remove previously cache value, if any (see FIXMEs above):
279 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
281 // convert the string and keep it:
282 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
283 m_str
->mb_str().release();
287 #endif // wxUSE_UNICODE
289 #if !wxUSE_UNICODE_WCHAR
290 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
291 wxStringWCharConversionCache
);
292 static wxStringWCharConversionCache gs_stringsWCharCache
;
294 const wchar_t* wxCStrData::AsWChar() const
296 // remove previously cache value, if any (see FIXMEs above):
297 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
299 // convert the string and keep it:
300 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
301 m_str
->wc_str().release();
305 #endif // !wxUSE_UNICODE_WCHAR
307 wxString::~wxString()
310 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
311 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
313 #if !wxUSE_UNICODE_WCHAR
314 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
319 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
320 const char* wxCStrData::AsChar() const
322 #if wxUSE_UNICODE_UTF8
323 if ( wxLocaleIsUtf8
)
326 // under non-UTF8 locales, we have to convert the internal UTF-8
327 // representation using wxConvLibc and cache the result
329 wxString
*str
= wxConstCast(m_str
, wxString
);
331 // convert the string:
333 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
334 // have it) but it's unfortunately not obvious to implement
335 // because we don't know how big buffer do we need for the
336 // given string length (in case of multibyte encodings, e.g.
337 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
339 // One idea would be to store more than just m_convertedToChar
340 // in wxString: then we could record the length of the string
341 // which was converted the last time and try to reuse the same
342 // buffer if the current length is not greater than it (this
343 // could still fail because string could have been modified in
344 // place but it would work most of the time, so we'd do it and
345 // only allocate the new buffer if in-place conversion returned
346 // an error). We could also store a bit saying if the string
347 // was modified since the last conversion (and update it in all
348 // operation modifying the string, of course) to avoid unneeded
349 // consequential conversions. But both of these ideas require
350 // adding more fields to wxString and require profiling results
351 // to be sure that we really gain enough from them to justify
353 wxCharBuffer
buf(str
->mb_str());
355 // if it failed, return empty string and not NULL to avoid crashes in code
356 // written with either wxWidgets 2 wxString or std::string behaviour in
357 // mind: neither of them ever returns NULL and so we shouldn't neither
361 if ( str
->m_convertedToChar
&&
362 strlen(buf
) == strlen(str
->m_convertedToChar
) )
364 // keep the same buffer for as long as possible, so that several calls
365 // to c_str() in a row still work:
366 strcpy(str
->m_convertedToChar
, buf
);
370 str
->m_convertedToChar
= buf
.release();
374 return str
->m_convertedToChar
+ m_offset
;
376 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
378 #if !wxUSE_UNICODE_WCHAR
379 const wchar_t* wxCStrData::AsWChar() const
381 wxString
*str
= wxConstCast(m_str
, wxString
);
383 // convert the string:
384 wxWCharBuffer
buf(str
->wc_str());
386 // notice that here, unlike above in AsChar(), conversion can't fail as our
387 // internal UTF-8 is always well-formed -- or the string was corrupted and
388 // all bets are off anyhow
390 // FIXME-UTF8: do the conversion in-place in the existing buffer
391 if ( str
->m_convertedToWChar
&&
392 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
394 // keep the same buffer for as long as possible, so that several calls
395 // to c_str() in a row still work:
396 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
400 str
->m_convertedToWChar
= buf
.release();
404 return str
->m_convertedToWChar
+ m_offset
;
406 #endif // !wxUSE_UNICODE_WCHAR
408 // ===========================================================================
409 // wxString class core
410 // ===========================================================================
412 // ---------------------------------------------------------------------------
413 // construction and conversion
414 // ---------------------------------------------------------------------------
416 #if wxUSE_UNICODE_WCHAR
418 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
419 const wxMBConv
& conv
)
422 if ( !psz
|| nLength
== 0 )
423 return SubstrBufFromMB(L
"", 0);
425 if ( nLength
== npos
)
429 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
431 return SubstrBufFromMB(_T(""), 0);
433 return SubstrBufFromMB(wcBuf
, wcLen
);
435 #endif // wxUSE_UNICODE_WCHAR
437 #if wxUSE_UNICODE_UTF8
439 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
440 const wxMBConv
& conv
)
443 if ( !psz
|| nLength
== 0 )
444 return SubstrBufFromMB("", 0);
446 // if psz is already in UTF-8, we don't have to do the roundtrip to
447 // wchar_t* and back:
450 // we need to validate the input because UTF8 iterators assume valid
451 // UTF-8 sequence and psz may be invalid:
452 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
454 // we must pass the real string length to SubstrBufFromMB ctor
455 if ( nLength
== npos
)
456 nLength
= psz
? strlen(psz
) : 0;
457 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz
), nLength
);
459 // else: do the roundtrip through wchar_t*
462 if ( nLength
== npos
)
465 // first convert to wide string:
467 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
469 return SubstrBufFromMB("", 0);
471 // and then to UTF-8:
472 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
473 // widechar -> UTF-8 conversion isn't supposed to ever fail:
474 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
478 #endif // wxUSE_UNICODE_UTF8
480 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
482 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
483 const wxMBConv
& conv
)
486 if ( !pwz
|| nLength
== 0 )
487 return SubstrBufFromWC("", 0);
489 if ( nLength
== npos
)
493 wxCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
495 return SubstrBufFromWC("", 0);
497 return SubstrBufFromWC(mbBuf
, mbLen
);
499 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
502 #if wxUSE_UNICODE_WCHAR
504 //Convert wxString in Unicode mode to a multi-byte string
505 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
507 return conv
.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL
);
510 #elif wxUSE_UNICODE_UTF8
512 const wxWCharBuffer
wxString::wc_str() const
514 return wxMBConvStrictUTF8().cMB2WC
517 m_impl
.length() + 1, // size, not length
522 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
525 return wxCharBuffer::CreateNonOwned(m_impl
.c_str());
527 // FIXME-UTF8: use wc_str() here once we have buffers with length
530 wxWCharBuffer
wcBuf(wxMBConvStrictUTF8().cMB2WC
533 m_impl
.length() + 1, // size
537 return wxCharBuffer("");
539 return conv
.cWC2MB(wcBuf
, wcLen
+1, NULL
);
544 //Converts this string to a wide character string if unicode
545 //mode is not enabled and wxUSE_WCHAR_T is enabled
546 const wxWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
548 return conv
.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL
);
551 #endif // Unicode/ANSI
553 // shrink to minimal size (releasing extra memory)
554 bool wxString::Shrink()
556 wxString
tmp(begin(), end());
558 return tmp
.length() == length();
561 // deprecated compatibility code:
562 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
563 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
565 return DoGetWriteBuf(nLen
);
568 void wxString::UngetWriteBuf()
573 void wxString::UngetWriteBuf(size_t nLen
)
575 DoUngetWriteBuf(nLen
);
577 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
580 // ---------------------------------------------------------------------------
582 // ---------------------------------------------------------------------------
584 // all functions are inline in string.h
586 // ---------------------------------------------------------------------------
587 // concatenation operators
588 // ---------------------------------------------------------------------------
591 * concatenation functions come in 5 flavours:
593 * char + string and string + char
594 * C str + string and string + C str
597 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
599 #if !wxUSE_STL_BASED_WXSTRING
600 wxASSERT( str1
.IsValid() );
601 wxASSERT( str2
.IsValid() );
610 wxString
operator+(const wxString
& str
, wxUniChar ch
)
612 #if !wxUSE_STL_BASED_WXSTRING
613 wxASSERT( str
.IsValid() );
622 wxString
operator+(wxUniChar ch
, const wxString
& str
)
624 #if !wxUSE_STL_BASED_WXSTRING
625 wxASSERT( str
.IsValid() );
634 wxString
operator+(const wxString
& str
, const char *psz
)
636 #if !wxUSE_STL_BASED_WXSTRING
637 wxASSERT( str
.IsValid() );
641 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
642 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
650 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
652 #if !wxUSE_STL_BASED_WXSTRING
653 wxASSERT( str
.IsValid() );
657 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
658 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
666 wxString
operator+(const char *psz
, const wxString
& str
)
668 #if !wxUSE_STL_BASED_WXSTRING
669 wxASSERT( str
.IsValid() );
673 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
674 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
682 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
684 #if !wxUSE_STL_BASED_WXSTRING
685 wxASSERT( str
.IsValid() );
689 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
690 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
698 // ---------------------------------------------------------------------------
700 // ---------------------------------------------------------------------------
702 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
704 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
705 : wxToupper(GetChar(0u)) == wxToupper(c
));
708 #ifdef HAVE_STD_STRING_COMPARE
710 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
711 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
712 // sort strings in characters code point order by sorting the byte sequence
713 // in byte values order (i.e. what strcmp() and memcmp() do).
715 int wxString::compare(const wxString
& str
) const
717 return m_impl
.compare(str
.m_impl
);
720 int wxString::compare(size_t nStart
, size_t nLen
,
721 const wxString
& str
) const
724 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
725 return m_impl
.compare(pos
, len
, str
.m_impl
);
728 int wxString::compare(size_t nStart
, size_t nLen
,
730 size_t nStart2
, size_t nLen2
) const
733 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
736 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
738 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
741 int wxString::compare(const char* sz
) const
743 return m_impl
.compare(ImplStr(sz
));
746 int wxString::compare(const wchar_t* sz
) const
748 return m_impl
.compare(ImplStr(sz
));
751 int wxString::compare(size_t nStart
, size_t nLen
,
752 const char* sz
, size_t nCount
) const
755 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
757 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
759 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
762 int wxString::compare(size_t nStart
, size_t nLen
,
763 const wchar_t* sz
, size_t nCount
) const
766 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
768 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
770 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
773 #else // !HAVE_STD_STRING_COMPARE
775 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
776 const wxStringCharType
* s2
, size_t l2
)
779 return wxStringMemcmp(s1
, s2
, l1
);
782 int ret
= wxStringMemcmp(s1
, s2
, l1
);
783 return ret
== 0 ? -1 : ret
;
787 int ret
= wxStringMemcmp(s1
, s2
, l2
);
788 return ret
== 0 ? +1 : ret
;
792 int wxString::compare(const wxString
& str
) const
794 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
795 str
.m_impl
.data(), str
.m_impl
.length());
798 int wxString::compare(size_t nStart
, size_t nLen
,
799 const wxString
& str
) const
801 wxASSERT(nStart
<= length());
802 size_type strLen
= length() - nStart
;
803 nLen
= strLen
< nLen
? strLen
: nLen
;
806 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
808 return ::wxDoCmp(m_impl
.data() + pos
, len
,
809 str
.m_impl
.data(), str
.m_impl
.length());
812 int wxString::compare(size_t nStart
, size_t nLen
,
814 size_t nStart2
, size_t nLen2
) const
816 wxASSERT(nStart
<= length());
817 wxASSERT(nStart2
<= str
.length());
818 size_type strLen
= length() - nStart
,
819 strLen2
= str
.length() - nStart2
;
820 nLen
= strLen
< nLen
? strLen
: nLen
;
821 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
824 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
826 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
828 return ::wxDoCmp(m_impl
.data() + pos
, len
,
829 str
.m_impl
.data() + pos2
, len2
);
832 int wxString::compare(const char* sz
) const
834 SubstrBufFromMB
str(ImplStr(sz
, npos
));
835 if ( str
.len
== npos
)
836 str
.len
= wxStringStrlen(str
.data
);
837 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
840 int wxString::compare(const wchar_t* sz
) const
842 SubstrBufFromWC
str(ImplStr(sz
, npos
));
843 if ( str
.len
== npos
)
844 str
.len
= wxStringStrlen(str
.data
);
845 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
848 int wxString::compare(size_t nStart
, size_t nLen
,
849 const char* sz
, size_t nCount
) const
851 wxASSERT(nStart
<= length());
852 size_type strLen
= length() - nStart
;
853 nLen
= strLen
< nLen
? strLen
: nLen
;
856 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
858 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
859 if ( str
.len
== npos
)
860 str
.len
= wxStringStrlen(str
.data
);
862 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
865 int wxString::compare(size_t nStart
, size_t nLen
,
866 const wchar_t* sz
, size_t nCount
) const
868 wxASSERT(nStart
<= length());
869 size_type strLen
= length() - nStart
;
870 nLen
= strLen
< nLen
? strLen
: nLen
;
873 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
875 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
876 if ( str
.len
== npos
)
877 str
.len
= wxStringStrlen(str
.data
);
879 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
882 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
885 // ---------------------------------------------------------------------------
886 // find_{first,last}_[not]_of functions
887 // ---------------------------------------------------------------------------
889 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
891 // NB: All these functions are implemented with the argument being wxChar*,
892 // i.e. widechar string in any Unicode build, even though native string
893 // representation is char* in the UTF-8 build. This is because we couldn't
894 // use memchr() to determine if a character is in a set encoded as UTF-8.
896 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
898 return find_first_of(sz
, nStart
, wxStrlen(sz
));
901 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
903 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
906 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
908 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
911 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
913 if ( wxTmemchr(sz
, *i
, n
) )
920 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
922 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
925 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
927 if ( !wxTmemchr(sz
, *i
, n
) )
935 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
937 return find_last_of(sz
, nStart
, wxStrlen(sz
));
940 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
942 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
945 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
947 size_t len
= length();
949 if ( nStart
== npos
)
955 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
959 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
960 i
!= rend(); --idx
, ++i
)
962 if ( wxTmemchr(sz
, *i
, n
) )
969 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
971 size_t len
= length();
973 if ( nStart
== npos
)
979 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
983 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
984 i
!= rend(); --idx
, ++i
)
986 if ( !wxTmemchr(sz
, *i
, n
) )
993 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
995 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
998 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1007 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1009 size_t len
= length();
1011 if ( nStart
== npos
)
1017 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1020 size_t idx
= nStart
;
1021 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1022 i
!= rend(); --idx
, ++i
)
1031 // the functions above were implemented for wchar_t* arguments in Unicode
1032 // build and char* in ANSI build; below are implementations for the other
1035 #define wxOtherCharType char
1036 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1038 #define wxOtherCharType wchar_t
1039 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1042 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1043 { return find_first_of(STRCONV(sz
), nStart
); }
1045 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1047 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1048 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1049 { return find_last_of(STRCONV(sz
), nStart
); }
1050 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1052 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1053 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1054 { return find_first_not_of(STRCONV(sz
), nStart
); }
1055 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1057 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1058 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1059 { return find_last_not_of(STRCONV(sz
), nStart
); }
1060 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1062 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1064 #undef wxOtherCharType
1067 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1069 // ===========================================================================
1070 // other common string functions
1071 // ===========================================================================
1073 int wxString::CmpNoCase(const wxString
& s
) const
1075 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1077 const_iterator i1
= begin();
1078 const_iterator end1
= end();
1079 const_iterator i2
= s
.begin();
1080 const_iterator end2
= s
.end();
1082 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1084 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1085 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1086 if ( lower1
!= lower2
)
1087 return lower1
< lower2
? -1 : 1;
1090 size_t len1
= length();
1091 size_t len2
= s
.length();
1095 else if ( len1
> len2
)
1104 #ifndef __SCHAR_MAX__
1105 #define __SCHAR_MAX__ 127
1109 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1111 if (!ascii
|| len
== 0)
1112 return wxEmptyString
;
1117 wxStringInternalBuffer
buf(res
, len
);
1118 wxStringCharType
*dest
= buf
;
1120 for ( ; len
> 0; --len
)
1122 unsigned char c
= (unsigned char)*ascii
++;
1123 wxASSERT_MSG( c
< 0x80,
1124 _T("Non-ASCII value passed to FromAscii().") );
1126 *dest
++ = (wchar_t)c
;
1133 wxString
wxString::FromAscii(const char *ascii
)
1135 return FromAscii(ascii
, wxStrlen(ascii
));
1138 wxString
wxString::FromAscii(char ascii
)
1140 // What do we do with '\0' ?
1142 unsigned char c
= (unsigned char)ascii
;
1144 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
1146 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1147 return wxString(wxUniChar((wchar_t)c
));
1150 const wxCharBuffer
wxString::ToAscii() const
1152 // this will allocate enough space for the terminating NUL too
1153 wxCharBuffer
buffer(length());
1154 char *dest
= buffer
.data();
1156 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1159 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1160 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1162 // the output string can't have embedded NULs anyhow, so we can safely
1163 // stop at first of them even if we do have any
1171 #endif // wxUSE_UNICODE
1173 // extract string of length nCount starting at nFirst
1174 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1176 size_t nLen
= length();
1178 // default value of nCount is npos and means "till the end"
1179 if ( nCount
== npos
)
1181 nCount
= nLen
- nFirst
;
1184 // out-of-bounds requests return sensible things
1185 if ( nFirst
+ nCount
> nLen
)
1187 nCount
= nLen
- nFirst
;
1190 if ( nFirst
> nLen
)
1192 // AllocCopy() will return empty string
1193 return wxEmptyString
;
1196 wxString
dest(*this, nFirst
, nCount
);
1197 if ( dest
.length() != nCount
)
1199 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1205 // check that the string starts with prefix and return the rest of the string
1206 // in the provided pointer if it is not NULL, otherwise return false
1207 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1209 if ( compare(0, prefix
.length(), prefix
) != 0 )
1214 // put the rest of the string into provided pointer
1215 rest
->assign(*this, prefix
.length(), npos
);
1222 // check that the string ends with suffix and return the rest of it in the
1223 // provided pointer if it is not NULL, otherwise return false
1224 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1226 int start
= length() - suffix
.length();
1228 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1233 // put the rest of the string into provided pointer
1234 rest
->assign(*this, 0, start
);
1241 // extract nCount last (rightmost) characters
1242 wxString
wxString::Right(size_t nCount
) const
1244 if ( nCount
> length() )
1247 wxString
dest(*this, length() - nCount
, nCount
);
1248 if ( dest
.length() != nCount
) {
1249 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1254 // get all characters after the last occurence of ch
1255 // (returns the whole string if ch not found)
1256 wxString
wxString::AfterLast(wxUniChar ch
) const
1259 int iPos
= Find(ch
, true);
1260 if ( iPos
== wxNOT_FOUND
)
1263 str
= wx_str() + iPos
+ 1;
1268 // extract nCount first (leftmost) characters
1269 wxString
wxString::Left(size_t nCount
) const
1271 if ( nCount
> length() )
1274 wxString
dest(*this, 0, nCount
);
1275 if ( dest
.length() != nCount
) {
1276 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1281 // get all characters before the first occurence of ch
1282 // (returns the whole string if ch not found)
1283 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1285 int iPos
= Find(ch
);
1286 if ( iPos
== wxNOT_FOUND
) iPos
= length();
1287 return wxString(*this, 0, iPos
);
1290 /// get all characters before the last occurence of ch
1291 /// (returns empty string if ch not found)
1292 wxString
wxString::BeforeLast(wxUniChar ch
) const
1295 int iPos
= Find(ch
, true);
1296 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1297 str
= wxString(c_str(), iPos
);
1302 /// get all characters after the first occurence of ch
1303 /// (returns empty string if ch not found)
1304 wxString
wxString::AfterFirst(wxUniChar ch
) const
1307 int iPos
= Find(ch
);
1308 if ( iPos
!= wxNOT_FOUND
)
1309 str
= wx_str() + iPos
+ 1;
1314 // replace first (or all) occurences of some substring with another one
1315 size_t wxString::Replace(const wxString
& strOld
,
1316 const wxString
& strNew
, bool bReplaceAll
)
1318 // if we tried to replace an empty string we'd enter an infinite loop below
1319 wxCHECK_MSG( !strOld
.empty(), 0,
1320 _T("wxString::Replace(): invalid parameter") );
1322 wxSTRING_INVALIDATE_CACHE();
1324 size_t uiCount
= 0; // count of replacements made
1326 // optimize the special common case: replacement of one character by
1327 // another one (in UTF-8 case we can only do this for ASCII characters)
1329 // benchmarks show that this special version is around 3 times faster
1330 // (depending on the proportion of matching characters and UTF-8/wchar_t
1332 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1334 const wxStringCharType chOld
= strOld
.m_impl
[0],
1335 chNew
= strNew
.m_impl
[0];
1337 // this loop is the simplified version of the one below
1338 for ( size_t pos
= 0; ; )
1340 pos
= m_impl
.find(chOld
, pos
);
1344 m_impl
[pos
++] = chNew
;
1352 else // general case
1354 const size_t uiOldLen
= strOld
.m_impl
.length();
1355 const size_t uiNewLen
= strNew
.m_impl
.length();
1357 for ( size_t pos
= 0; ; )
1359 pos
= m_impl
.find(strOld
.m_impl
, pos
);
1363 // replace this occurrence of the old string with the new one
1364 m_impl
.replace(pos
, uiOldLen
, strNew
.m_impl
);
1366 // move up pos past the string that was replaced
1369 // increase replace count
1372 // stop after the first one?
1381 bool wxString::IsAscii() const
1383 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1385 if ( !(*i
).IsAscii() )
1392 bool wxString::IsWord() const
1394 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1396 if ( !wxIsalpha(*i
) )
1403 bool wxString::IsNumber() const
1408 const_iterator i
= begin();
1410 if ( *i
== _T('-') || *i
== _T('+') )
1413 for ( ; i
!= end(); ++i
)
1415 if ( !wxIsdigit(*i
) )
1422 wxString
wxString::Strip(stripType w
) const
1425 if ( w
& leading
) s
.Trim(false);
1426 if ( w
& trailing
) s
.Trim(true);
1430 // ---------------------------------------------------------------------------
1432 // ---------------------------------------------------------------------------
1434 wxString
& wxString::MakeUpper()
1436 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1437 *it
= (wxChar
)wxToupper(*it
);
1442 wxString
& wxString::MakeLower()
1444 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1445 *it
= (wxChar
)wxTolower(*it
);
1450 wxString
& wxString::MakeCapitalized()
1452 const iterator en
= end();
1453 iterator it
= begin();
1456 *it
= (wxChar
)wxToupper(*it
);
1457 for ( ++it
; it
!= en
; ++it
)
1458 *it
= (wxChar
)wxTolower(*it
);
1464 // ---------------------------------------------------------------------------
1465 // trimming and padding
1466 // ---------------------------------------------------------------------------
1468 // some compilers (VC++ 6.0 not to name them) return true for a call to
1469 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1470 // to live with this by checking that the character is a 7 bit one - even if
1471 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1472 // space-like symbols somewhere except in the first 128 chars), it is arguably
1473 // still better than trimming away accented letters
1474 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1476 // trims spaces (in the sense of isspace) from left or right side
1477 wxString
& wxString::Trim(bool bFromRight
)
1479 // first check if we're going to modify the string at all
1482 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1483 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1489 // find last non-space character
1490 reverse_iterator psz
= rbegin();
1491 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1494 // truncate at trailing space start
1495 erase(psz
.base(), end());
1499 // find first non-space character
1500 iterator psz
= begin();
1501 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1504 // fix up data and length
1505 erase(begin(), psz
);
1512 // adds nCount characters chPad to the string from either side
1513 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1515 wxString
s(chPad
, nCount
);
1528 // truncate the string
1529 wxString
& wxString::Truncate(size_t uiLen
)
1531 if ( uiLen
< length() )
1533 erase(begin() + uiLen
, end());
1535 //else: nothing to do, string is already short enough
1540 // ---------------------------------------------------------------------------
1541 // finding (return wxNOT_FOUND if not found and index otherwise)
1542 // ---------------------------------------------------------------------------
1545 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1547 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1549 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1552 // ----------------------------------------------------------------------------
1553 // conversion to numbers
1554 // ----------------------------------------------------------------------------
1556 // The implementation of all the functions below is exactly the same so factor
1557 // it out. Note that number extraction works correctly on UTF-8 strings, so
1558 // we can use wxStringCharType and wx_str() for maximum efficiency.
1561 #define DO_IF_NOT_WINCE(x) x
1563 #define DO_IF_NOT_WINCE(x)
1566 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1567 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1568 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1570 DO_IF_NOT_WINCE( errno = 0; ) \
1572 const wxStringCharType *start = wx_str(); \
1573 wxStringCharType *end; \
1574 T val = func(start, &end, base); \
1576 /* return true only if scan was stopped by the terminating NUL and */ \
1577 /* if the string was not empty to start with and no under/overflow */ \
1579 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1584 bool wxString::ToLong(long *pVal
, int base
) const
1586 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtol
, long);
1589 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1591 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoul
, unsigned long);
1594 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1596 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoll
, wxLongLong_t
);
1599 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1601 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoull
, wxULongLong_t
);
1604 bool wxString::ToDouble(double *pVal
) const
1606 wxCHECK_MSG( pVal
, false, _T("NULL output pointer") );
1608 DO_IF_NOT_WINCE( errno
= 0; )
1610 const wxChar
*start
= c_str();
1612 double val
= wxStrtod(start
, &end
);
1614 // return true only if scan was stopped by the terminating NUL and if the
1615 // string was not empty to start with and no under/overflow occurred
1616 if ( *end
|| end
== start
DO_IF_NOT_WINCE(|| errno
== ERANGE
) )
1624 // ---------------------------------------------------------------------------
1626 // ---------------------------------------------------------------------------
1628 #if !wxUSE_UTF8_LOCALE_ONLY
1630 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1631 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1633 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1637 va_start(argptr
, format
);
1640 s
.PrintfV(format
, argptr
);
1646 #endif // !wxUSE_UTF8_LOCALE_ONLY
1648 #if wxUSE_UNICODE_UTF8
1650 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1653 va_start(argptr
, format
);
1656 s
.PrintfV(format
, argptr
);
1662 #endif // wxUSE_UNICODE_UTF8
1665 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1668 s
.PrintfV(format
, argptr
);
1672 #if !wxUSE_UTF8_LOCALE_ONLY
1673 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1674 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1676 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1680 va_start(argptr
, format
);
1682 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1683 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1684 // because it's the only cast that works safely for downcasting when
1685 // multiple inheritance is used:
1686 wxString
*str
= static_cast<wxString
*>(this);
1688 wxString
*str
= this;
1691 int iLen
= str
->PrintfV(format
, argptr
);
1697 #endif // !wxUSE_UTF8_LOCALE_ONLY
1699 #if wxUSE_UNICODE_UTF8
1700 int wxString::DoPrintfUtf8(const char *format
, ...)
1703 va_start(argptr
, format
);
1705 int iLen
= PrintfV(format
, argptr
);
1711 #endif // wxUSE_UNICODE_UTF8
1714 Uses wxVsnprintf and places the result into the this string.
1716 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1717 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1718 the ISO C99 (and thus SUSv3) standard the return value for the case of
1719 an undersized buffer is inconsistent. For conforming vsnprintf
1720 implementations the function must return the number of characters that
1721 would have been printed had the buffer been large enough. For conforming
1722 vswprintf implementations the function must return a negative number
1725 What vswprintf sets errno to is undefined but Darwin seems to set it to
1726 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1727 those are defined in the standard and backed up by several conformance
1728 statements. Note that ENOMEM mentioned in the manual page does not
1729 apply to swprintf, only wprintf and fwprintf.
1731 Official manual page:
1732 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1734 Some conformance statements (AIX, Solaris):
1735 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1736 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1738 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1739 EILSEQ and EINVAL are specifically defined to mean the error is other than
1740 an undersized buffer and no other errno are defined we treat those two
1741 as meaning hard errors and everything else gets the old behavior which
1742 is to keep looping and increasing buffer size until the function succeeds.
1744 In practice it's impossible to determine before compilation which behavior
1745 may be used. The vswprintf function may have vsnprintf-like behavior or
1746 vice-versa. Behavior detected on one release can theoretically change
1747 with an updated release. Not to mention that configure testing for it
1748 would require the test to be run on the host system, not the build system
1749 which makes cross compilation difficult. Therefore, we make no assumptions
1750 about behavior and try our best to handle every known case, including the
1751 case where wxVsnprintf returns a negative number and fails to set errno.
1753 There is yet one more non-standard implementation and that is our own.
1754 Fortunately, that can be detected at compile-time.
1756 On top of all that, ISO C99 explicitly defines snprintf to write a null
1757 character to the last position of the specified buffer. That would be at
1758 at the given buffer size minus 1. It is supposed to do this even if it
1759 turns out that the buffer is sized too small.
1761 Darwin (tested on 10.5) follows the C99 behavior exactly.
1763 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1764 errno even when it fails. However, it only seems to ever fail due
1765 to an undersized buffer.
1767 #if wxUSE_UNICODE_UTF8
1768 template<typename BufferType
>
1770 // we only need one version in non-UTF8 builds and at least two Windows
1771 // compilers have problems with this function template, so use just one
1772 // normal function here
1774 static int DoStringPrintfV(wxString
& str
,
1775 const wxString
& format
, va_list argptr
)
1781 #if wxUSE_UNICODE_UTF8
1782 BufferType
tmp(str
, size
+ 1);
1783 typename
BufferType::CharType
*buf
= tmp
;
1785 wxStringBuffer
tmp(str
, size
+ 1);
1793 // in UTF-8 build, leaving uninitialized junk in the buffer
1794 // could result in invalid non-empty UTF-8 string, so just
1795 // reset the string to empty on failure:
1800 // wxVsnprintf() may modify the original arg pointer, so pass it
1803 wxVaCopy(argptrcopy
, argptr
);
1806 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1809 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1812 // some implementations of vsnprintf() don't NUL terminate
1813 // the string if there is not enough space for it so
1814 // always do it manually
1815 // FIXME: This really seems to be the wrong and would be an off-by-one
1816 // bug except the code above allocates an extra character.
1817 buf
[size
] = _T('\0');
1819 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1820 // total number of characters which would have been written if the
1821 // buffer were large enough (newer standards such as Unix98)
1824 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1825 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1826 // is true if *both* of them use our own implementation,
1827 // otherwise we can't be sure
1828 #if wxUSE_WXVSNPRINTF
1829 // we know that our own implementation of wxVsnprintf() returns -1
1830 // only for a format error - thus there's something wrong with
1831 // the user's format string
1834 #else // possibly using system version
1835 // assume it only returns error if there is not enough space, but
1836 // as we don't know how much we need, double the current size of
1839 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1840 // If errno was set to one of the two well-known hard errors
1841 // then fail immediately to avoid an infinite loop.
1844 #endif // __WXWINCE__
1845 // still not enough, as we don't know how much we need, double the
1846 // current size of the buffer
1848 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1850 else if ( len
>= size
)
1852 #if wxUSE_WXVSNPRINTF
1853 // we know that our own implementation of wxVsnprintf() returns
1854 // size+1 when there's not enough space but that's not the size
1855 // of the required buffer!
1856 size
*= 2; // so we just double the current size of the buffer
1858 // some vsnprintf() implementations NUL-terminate the buffer and
1859 // some don't in len == size case, to be safe always add 1
1860 // FIXME: I don't quite understand this comment. The vsnprintf
1861 // function is specifically defined to return the number of
1862 // characters printed not including the null terminator.
1863 // So OF COURSE you need to add 1 to get the right buffer size.
1864 // The following line is definitely correct, no question.
1868 else // ok, there was enough space
1874 // we could have overshot
1877 return str
.length();
1880 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
1882 #if wxUSE_UNICODE_UTF8
1883 #if wxUSE_STL_BASED_WXSTRING
1884 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
1886 typedef wxStringInternalBuffer Utf8Buffer
;
1890 #if wxUSE_UTF8_LOCALE_ONLY
1891 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1893 #if wxUSE_UNICODE_UTF8
1894 if ( wxLocaleIsUtf8
)
1895 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1898 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
1900 return DoStringPrintfV(*this, format
, argptr
);
1901 #endif // UTF8/WCHAR
1905 // ----------------------------------------------------------------------------
1906 // misc other operations
1907 // ----------------------------------------------------------------------------
1909 // returns true if the string matches the pattern which may contain '*' and
1910 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1912 bool wxString::Matches(const wxString
& mask
) const
1914 // I disable this code as it doesn't seem to be faster (in fact, it seems
1915 // to be much slower) than the old, hand-written code below and using it
1916 // here requires always linking with libregex even if the user code doesn't
1918 #if 0 // wxUSE_REGEX
1919 // first translate the shell-like mask into a regex
1921 pattern
.reserve(wxStrlen(pszMask
));
1933 pattern
+= _T(".*");
1944 // these characters are special in a RE, quote them
1945 // (however note that we don't quote '[' and ']' to allow
1946 // using them for Unix shell like matching)
1947 pattern
+= _T('\\');
1951 pattern
+= *pszMask
;
1959 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
1960 #else // !wxUSE_REGEX
1961 // TODO: this is, of course, awfully inefficient...
1963 // FIXME-UTF8: implement using iterators, remove #if
1964 #if wxUSE_UNICODE_UTF8
1965 wxWCharBuffer maskBuf
= mask
.wc_str();
1966 wxWCharBuffer txtBuf
= wc_str();
1967 const wxChar
*pszMask
= maskBuf
.data();
1968 const wxChar
*pszTxt
= txtBuf
.data();
1970 const wxChar
*pszMask
= mask
.wx_str();
1971 // the char currently being checked
1972 const wxChar
*pszTxt
= wx_str();
1975 // the last location where '*' matched
1976 const wxChar
*pszLastStarInText
= NULL
;
1977 const wxChar
*pszLastStarInMask
= NULL
;
1980 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
1981 switch ( *pszMask
) {
1983 if ( *pszTxt
== wxT('\0') )
1986 // pszTxt and pszMask will be incremented in the loop statement
1992 // remember where we started to be able to backtrack later
1993 pszLastStarInText
= pszTxt
;
1994 pszLastStarInMask
= pszMask
;
1996 // ignore special chars immediately following this one
1997 // (should this be an error?)
1998 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2001 // if there is nothing more, match
2002 if ( *pszMask
== wxT('\0') )
2005 // are there any other metacharacters in the mask?
2007 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2009 if ( pEndMask
!= NULL
) {
2010 // we have to match the string between two metachars
2011 uiLenMask
= pEndMask
- pszMask
;
2014 // we have to match the remainder of the string
2015 uiLenMask
= wxStrlen(pszMask
);
2018 wxString
strToMatch(pszMask
, uiLenMask
);
2019 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2020 if ( pMatch
== NULL
)
2023 // -1 to compensate "++" in the loop
2024 pszTxt
= pMatch
+ uiLenMask
- 1;
2025 pszMask
+= uiLenMask
- 1;
2030 if ( *pszMask
!= *pszTxt
)
2036 // match only if nothing left
2037 if ( *pszTxt
== wxT('\0') )
2040 // if we failed to match, backtrack if we can
2041 if ( pszLastStarInText
) {
2042 pszTxt
= pszLastStarInText
+ 1;
2043 pszMask
= pszLastStarInMask
;
2045 pszLastStarInText
= NULL
;
2047 // don't bother resetting pszLastStarInMask, it's unnecessary
2053 #endif // wxUSE_REGEX/!wxUSE_REGEX
2056 // Count the number of chars
2057 int wxString::Freq(wxUniChar ch
) const
2060 for ( const_iterator i
= begin(); i
!= end(); ++i
)
2068 // ----------------------------------------------------------------------------
2069 // wxUTF8StringBuffer
2070 // ----------------------------------------------------------------------------
2072 #if wxUSE_UNICODE_WCHAR
2073 wxUTF8StringBuffer::~wxUTF8StringBuffer()
2075 wxMBConvStrictUTF8 conv
;
2076 size_t wlen
= conv
.ToWChar(NULL
, 0, m_buf
);
2077 wxCHECK_RET( wlen
!= wxCONV_FAILED
, "invalid UTF-8 data in string buffer?" );
2079 wxStringInternalBuffer
wbuf(m_str
, wlen
);
2080 conv
.ToWChar(wbuf
, wlen
, m_buf
);
2083 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2085 wxCHECK_RET(m_lenSet
, "length not set");
2087 wxMBConvStrictUTF8 conv
;
2088 size_t wlen
= conv
.ToWChar(NULL
, 0, m_buf
, m_len
);
2089 wxCHECK_RET( wlen
!= wxCONV_FAILED
, "invalid UTF-8 data in string buffer?" );
2091 wxStringInternalBufferLength
wbuf(m_str
, wlen
);
2092 conv
.ToWChar(wbuf
, wlen
, m_buf
, m_len
);
2093 wbuf
.SetLength(wlen
);
2095 #endif // wxUSE_UNICODE_WCHAR
2097 // ----------------------------------------------------------------------------
2098 // wxCharBufferType<T>
2099 // ----------------------------------------------------------------------------
2102 wxCharTypeBuffer
<char>::Data
2103 wxCharTypeBuffer
<char>::NullData(NULL
);
2106 wxCharTypeBuffer
<wchar_t>::Data
2107 wxCharTypeBuffer
<wchar_t>::NullData(NULL
);