1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
38 #include "wx/hashmap.h"
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
54 // ---------------------------------------------------------------------------
55 // static class variables definition
56 // ---------------------------------------------------------------------------
58 //According to STL _must_ be a -1 size_t
59 const size_t wxString::npos
= (size_t) -1;
61 #if wxUSE_STRING_POS_CACHE
63 struct wxStrCacheInitializer
65 wxStrCacheInitializer()
67 // calling this function triggers s_cache initialization in it, and
68 // from now on it becomes safe to call from multiple threads
73 static wxStrCacheInitializer gs_stringCacheInit
;
75 // gdb seems to be unable to display thread-local variables correctly, at least
76 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
79 struct wxStrCacheDumper
83 puts("*** wxString cache dump:");
84 for ( unsigned n
= 0; n
< wxString::Cache::SIZE
; n
++ )
86 const wxString::Cache::Element
&
87 c
= wxString::GetCacheBegin()[n
];
89 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
91 n
== wxString::LastUsedCacheElement() ? " [*]" : "",
94 (unsigned long)c
.impl
,
100 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
102 #endif // __WXDEBUG__
104 #ifdef wxPROFILE_STRING_CACHE
106 wxString::CacheStats
wxString::ms_cacheStats
;
108 struct wxStrCacheStatsDumper
110 ~wxStrCacheStatsDumper()
112 const wxString::CacheStats
& stats
= wxString::ms_cacheStats
;
116 puts("*** wxString cache statistics:");
117 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
119 printf("\tHits %u (of which %u not used) or %.2f%%\n",
122 100.*float(stats
.poshits
- stats
.mishits
)/stats
.postot
);
123 printf("\tAverage position requested: %.2f\n",
124 float(stats
.sumpos
) / stats
.postot
);
125 printf("\tAverage offset after cached hint: %.2f\n",
126 float(stats
.sumofs
) / stats
.postot
);
131 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
132 stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
);
137 static wxStrCacheStatsDumper s_showCacheStats
;
139 #endif // wxPROFILE_STRING_CACHE
141 #endif // wxUSE_STRING_POS_CACHE
143 // ----------------------------------------------------------------------------
145 // ----------------------------------------------------------------------------
147 #if wxUSE_STD_IOSTREAM
151 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
153 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
154 return os
<< (const char *)str
.AsCharBuf();
156 return os
<< str
.AsInternal();
160 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
162 return os
<< str
.c_str();
165 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCharBuffer
& str
)
167 return os
<< str
.data();
171 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxWCharBuffer
& str
)
173 return os
<< str
.data();
177 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
179 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
181 return wos
<< str
.wc_str();
184 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
186 return wos
<< str
.AsWChar();
189 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxWCharBuffer
& str
)
191 return wos
<< str
.data();
194 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
196 #endif // wxUSE_STD_IOSTREAM
198 // ===========================================================================
199 // wxString class core
200 // ===========================================================================
202 #if wxUSE_UNICODE_UTF8
204 void wxString::PosLenToImpl(size_t pos
, size_t len
,
205 size_t *implPos
, size_t *implLen
) const
211 else // have valid start position
213 const const_iterator b
= GetIterForNthChar(pos
);
214 *implPos
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin();
219 else // have valid length too
221 // we need to handle the case of length specifying a substring
222 // going beyond the end of the string, just as std::string does
223 const const_iterator
e(end());
225 while ( len
&& i
<= e
)
231 *implLen
= i
.impl() - b
.impl();
236 #endif // wxUSE_UNICODE_UTF8
238 // ----------------------------------------------------------------------------
239 // wxCStrData converted strings caching
240 // ----------------------------------------------------------------------------
242 // FIXME-UTF8: temporarily disabled because it doesn't work with global
243 // string objects; re-enable after fixing this bug and benchmarking
244 // performance to see if using a hash is a good idea at all
247 // For backward compatibility reasons, it must be possible to assign the value
248 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
249 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
250 // because the memory would be freed immediately, but it has to be valid as long
251 // as the string is not modified, so that code like this still works:
253 // const wxChar *s = str.c_str();
254 // while ( s ) { ... }
256 // FIXME-UTF8: not thread safe!
257 // FIXME-UTF8: we currently clear the cached conversion only when the string is
258 // destroyed, but we should do it when the string is modified, to
259 // keep memory usage down
260 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
261 // invalidated the cache on every change, we could keep the previous
263 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
264 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
267 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
269 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
270 if ( i
!= hash
.end() )
278 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
279 // so we have to use wxString* here and const-cast when used
280 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
281 wxStringCharConversionCache
);
282 static wxStringCharConversionCache gs_stringsCharCache
;
284 const char* wxCStrData::AsChar() const
286 // remove previously cache value, if any (see FIXMEs above):
287 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
289 // convert the string and keep it:
290 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
291 m_str
->mb_str().release();
295 #endif // wxUSE_UNICODE
297 #if !wxUSE_UNICODE_WCHAR
298 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
299 wxStringWCharConversionCache
);
300 static wxStringWCharConversionCache gs_stringsWCharCache
;
302 const wchar_t* wxCStrData::AsWChar() const
304 // remove previously cache value, if any (see FIXMEs above):
305 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
307 // convert the string and keep it:
308 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
309 m_str
->wc_str().release();
313 #endif // !wxUSE_UNICODE_WCHAR
315 wxString::~wxString()
318 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
319 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
321 #if !wxUSE_UNICODE_WCHAR
322 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
327 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
328 const char* wxCStrData::AsChar() const
330 #if wxUSE_UNICODE_UTF8
331 if ( wxLocaleIsUtf8
)
334 // under non-UTF8 locales, we have to convert the internal UTF-8
335 // representation using wxConvLibc and cache the result
337 wxString
*str
= wxConstCast(m_str
, wxString
);
339 // convert the string:
341 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
342 // have it) but it's unfortunately not obvious to implement
343 // because we don't know how big buffer do we need for the
344 // given string length (in case of multibyte encodings, e.g.
345 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
347 // One idea would be to store more than just m_convertedToChar
348 // in wxString: then we could record the length of the string
349 // which was converted the last time and try to reuse the same
350 // buffer if the current length is not greater than it (this
351 // could still fail because string could have been modified in
352 // place but it would work most of the time, so we'd do it and
353 // only allocate the new buffer if in-place conversion returned
354 // an error). We could also store a bit saying if the string
355 // was modified since the last conversion (and update it in all
356 // operation modifying the string, of course) to avoid unneeded
357 // consequential conversions. But both of these ideas require
358 // adding more fields to wxString and require profiling results
359 // to be sure that we really gain enough from them to justify
361 wxCharBuffer
buf(str
->mb_str());
363 // if it failed, return empty string and not NULL to avoid crashes in code
364 // written with either wxWidgets 2 wxString or std::string behaviour in
365 // mind: neither of them ever returns NULL and so we shouldn't neither
369 if ( str
->m_convertedToChar
&&
370 strlen(buf
) == strlen(str
->m_convertedToChar
) )
372 // keep the same buffer for as long as possible, so that several calls
373 // to c_str() in a row still work:
374 strcpy(str
->m_convertedToChar
, buf
);
378 str
->m_convertedToChar
= buf
.release();
382 return str
->m_convertedToChar
+ m_offset
;
384 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
386 #if !wxUSE_UNICODE_WCHAR
387 const wchar_t* wxCStrData::AsWChar() const
389 wxString
*str
= wxConstCast(m_str
, wxString
);
391 // convert the string:
392 wxWCharBuffer
buf(str
->wc_str());
394 // notice that here, unlike above in AsChar(), conversion can't fail as our
395 // internal UTF-8 is always well-formed -- or the string was corrupted and
396 // all bets are off anyhow
398 // FIXME-UTF8: do the conversion in-place in the existing buffer
399 if ( str
->m_convertedToWChar
&&
400 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
402 // keep the same buffer for as long as possible, so that several calls
403 // to c_str() in a row still work:
404 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
408 str
->m_convertedToWChar
= buf
.release();
412 return str
->m_convertedToWChar
+ m_offset
;
414 #endif // !wxUSE_UNICODE_WCHAR
416 // ===========================================================================
417 // wxString class core
418 // ===========================================================================
420 // ---------------------------------------------------------------------------
421 // construction and conversion
422 // ---------------------------------------------------------------------------
424 #if wxUSE_UNICODE_WCHAR
426 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
427 const wxMBConv
& conv
)
430 if ( !psz
|| nLength
== 0 )
431 return SubstrBufFromMB(L
"", 0);
433 if ( nLength
== npos
)
437 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
439 return SubstrBufFromMB(_T(""), 0);
441 return SubstrBufFromMB(wcBuf
, wcLen
);
443 #endif // wxUSE_UNICODE_WCHAR
445 #if wxUSE_UNICODE_UTF8
447 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
448 const wxMBConv
& conv
)
451 if ( !psz
|| nLength
== 0 )
452 return SubstrBufFromMB("", 0);
454 // if psz is already in UTF-8, we don't have to do the roundtrip to
455 // wchar_t* and back:
458 // we need to validate the input because UTF8 iterators assume valid
459 // UTF-8 sequence and psz may be invalid:
460 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
462 // we must pass the real string length to SubstrBufFromMB ctor
463 if ( nLength
== npos
)
464 nLength
= psz
? strlen(psz
) : 0;
465 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz
), nLength
);
467 // else: do the roundtrip through wchar_t*
470 if ( nLength
== npos
)
473 // first convert to wide string:
475 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
477 return SubstrBufFromMB("", 0);
479 // and then to UTF-8:
480 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
481 // widechar -> UTF-8 conversion isn't supposed to ever fail:
482 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
486 #endif // wxUSE_UNICODE_UTF8
488 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
490 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
491 const wxMBConv
& conv
)
494 if ( !pwz
|| nLength
== 0 )
495 return SubstrBufFromWC("", 0);
497 if ( nLength
== npos
)
501 wxCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
503 return SubstrBufFromWC("", 0);
505 return SubstrBufFromWC(mbBuf
, mbLen
);
507 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
510 #if wxUSE_UNICODE_WCHAR
512 //Convert wxString in Unicode mode to a multi-byte string
513 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
515 return conv
.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL
);
518 #elif wxUSE_UNICODE_UTF8
520 const wxWCharBuffer
wxString::wc_str() const
522 return wxMBConvStrictUTF8().cMB2WC
525 m_impl
.length() + 1, // size, not length
530 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
533 return wxCharBuffer::CreateNonOwned(m_impl
.c_str());
535 // FIXME-UTF8: use wc_str() here once we have buffers with length
538 wxWCharBuffer
wcBuf(wxMBConvStrictUTF8().cMB2WC
541 m_impl
.length() + 1, // size
545 return wxCharBuffer("");
547 return conv
.cWC2MB(wcBuf
, wcLen
+1, NULL
);
552 //Converts this string to a wide character string if unicode
553 //mode is not enabled and wxUSE_WCHAR_T is enabled
554 const wxWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
556 return conv
.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL
);
559 #endif // Unicode/ANSI
561 // shrink to minimal size (releasing extra memory)
562 bool wxString::Shrink()
564 wxString
tmp(begin(), end());
566 return tmp
.length() == length();
569 // deprecated compatibility code:
570 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
571 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
573 return DoGetWriteBuf(nLen
);
576 void wxString::UngetWriteBuf()
581 void wxString::UngetWriteBuf(size_t nLen
)
583 DoUngetWriteBuf(nLen
);
585 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
588 // ---------------------------------------------------------------------------
590 // ---------------------------------------------------------------------------
592 // all functions are inline in string.h
594 // ---------------------------------------------------------------------------
595 // concatenation operators
596 // ---------------------------------------------------------------------------
599 * concatenation functions come in 5 flavours:
601 * char + string and string + char
602 * C str + string and string + C str
605 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
607 #if !wxUSE_STL_BASED_WXSTRING
608 wxASSERT( str1
.IsValid() );
609 wxASSERT( str2
.IsValid() );
618 wxString
operator+(const wxString
& str
, wxUniChar ch
)
620 #if !wxUSE_STL_BASED_WXSTRING
621 wxASSERT( str
.IsValid() );
630 wxString
operator+(wxUniChar ch
, const wxString
& str
)
632 #if !wxUSE_STL_BASED_WXSTRING
633 wxASSERT( str
.IsValid() );
642 wxString
operator+(const wxString
& str
, const char *psz
)
644 #if !wxUSE_STL_BASED_WXSTRING
645 wxASSERT( str
.IsValid() );
649 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
650 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
658 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
660 #if !wxUSE_STL_BASED_WXSTRING
661 wxASSERT( str
.IsValid() );
665 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
666 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
674 wxString
operator+(const char *psz
, const wxString
& str
)
676 #if !wxUSE_STL_BASED_WXSTRING
677 wxASSERT( str
.IsValid() );
681 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
682 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
690 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
692 #if !wxUSE_STL_BASED_WXSTRING
693 wxASSERT( str
.IsValid() );
697 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
698 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
706 // ---------------------------------------------------------------------------
708 // ---------------------------------------------------------------------------
710 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
712 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
713 : wxToupper(GetChar(0u)) == wxToupper(c
));
716 #ifdef HAVE_STD_STRING_COMPARE
718 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
719 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
720 // sort strings in characters code point order by sorting the byte sequence
721 // in byte values order (i.e. what strcmp() and memcmp() do).
723 int wxString::compare(const wxString
& str
) const
725 return m_impl
.compare(str
.m_impl
);
728 int wxString::compare(size_t nStart
, size_t nLen
,
729 const wxString
& str
) const
732 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
733 return m_impl
.compare(pos
, len
, str
.m_impl
);
736 int wxString::compare(size_t nStart
, size_t nLen
,
738 size_t nStart2
, size_t nLen2
) const
741 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
744 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
746 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
749 int wxString::compare(const char* sz
) const
751 return m_impl
.compare(ImplStr(sz
));
754 int wxString::compare(const wchar_t* sz
) const
756 return m_impl
.compare(ImplStr(sz
));
759 int wxString::compare(size_t nStart
, size_t nLen
,
760 const char* sz
, size_t nCount
) const
763 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
765 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
767 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
770 int wxString::compare(size_t nStart
, size_t nLen
,
771 const wchar_t* sz
, size_t nCount
) const
774 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
776 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
778 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
781 #else // !HAVE_STD_STRING_COMPARE
783 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
784 const wxStringCharType
* s2
, size_t l2
)
787 return wxStringMemcmp(s1
, s2
, l1
);
790 int ret
= wxStringMemcmp(s1
, s2
, l1
);
791 return ret
== 0 ? -1 : ret
;
795 int ret
= wxStringMemcmp(s1
, s2
, l2
);
796 return ret
== 0 ? +1 : ret
;
800 int wxString::compare(const wxString
& str
) const
802 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
803 str
.m_impl
.data(), str
.m_impl
.length());
806 int wxString::compare(size_t nStart
, size_t nLen
,
807 const wxString
& str
) const
809 wxASSERT(nStart
<= length());
810 size_type strLen
= length() - nStart
;
811 nLen
= strLen
< nLen
? strLen
: nLen
;
814 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
816 return ::wxDoCmp(m_impl
.data() + pos
, len
,
817 str
.m_impl
.data(), str
.m_impl
.length());
820 int wxString::compare(size_t nStart
, size_t nLen
,
822 size_t nStart2
, size_t nLen2
) const
824 wxASSERT(nStart
<= length());
825 wxASSERT(nStart2
<= str
.length());
826 size_type strLen
= length() - nStart
,
827 strLen2
= str
.length() - nStart2
;
828 nLen
= strLen
< nLen
? strLen
: nLen
;
829 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
832 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
834 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
836 return ::wxDoCmp(m_impl
.data() + pos
, len
,
837 str
.m_impl
.data() + pos2
, len2
);
840 int wxString::compare(const char* sz
) const
842 SubstrBufFromMB
str(ImplStr(sz
, npos
));
843 if ( str
.len
== npos
)
844 str
.len
= wxStringStrlen(str
.data
);
845 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
848 int wxString::compare(const wchar_t* sz
) const
850 SubstrBufFromWC
str(ImplStr(sz
, npos
));
851 if ( str
.len
== npos
)
852 str
.len
= wxStringStrlen(str
.data
);
853 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
856 int wxString::compare(size_t nStart
, size_t nLen
,
857 const char* sz
, size_t nCount
) const
859 wxASSERT(nStart
<= length());
860 size_type strLen
= length() - nStart
;
861 nLen
= strLen
< nLen
? strLen
: nLen
;
864 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
866 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
867 if ( str
.len
== npos
)
868 str
.len
= wxStringStrlen(str
.data
);
870 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
873 int wxString::compare(size_t nStart
, size_t nLen
,
874 const wchar_t* sz
, size_t nCount
) const
876 wxASSERT(nStart
<= length());
877 size_type strLen
= length() - nStart
;
878 nLen
= strLen
< nLen
? strLen
: nLen
;
881 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
883 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
884 if ( str
.len
== npos
)
885 str
.len
= wxStringStrlen(str
.data
);
887 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
890 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
893 // ---------------------------------------------------------------------------
894 // find_{first,last}_[not]_of functions
895 // ---------------------------------------------------------------------------
897 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
899 // NB: All these functions are implemented with the argument being wxChar*,
900 // i.e. widechar string in any Unicode build, even though native string
901 // representation is char* in the UTF-8 build. This is because we couldn't
902 // use memchr() to determine if a character is in a set encoded as UTF-8.
904 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
906 return find_first_of(sz
, nStart
, wxStrlen(sz
));
909 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
911 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
914 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
916 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
919 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
921 if ( wxTmemchr(sz
, *i
, n
) )
928 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
930 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
933 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
935 if ( !wxTmemchr(sz
, *i
, n
) )
943 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
945 return find_last_of(sz
, nStart
, wxStrlen(sz
));
948 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
950 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
953 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
955 size_t len
= length();
957 if ( nStart
== npos
)
963 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
967 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
968 i
!= rend(); --idx
, ++i
)
970 if ( wxTmemchr(sz
, *i
, n
) )
977 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
979 size_t len
= length();
981 if ( nStart
== npos
)
987 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
991 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
992 i
!= rend(); --idx
, ++i
)
994 if ( !wxTmemchr(sz
, *i
, n
) )
1001 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
1003 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
1005 size_t idx
= nStart
;
1006 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1015 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1017 size_t len
= length();
1019 if ( nStart
== npos
)
1025 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1028 size_t idx
= nStart
;
1029 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1030 i
!= rend(); --idx
, ++i
)
1039 // the functions above were implemented for wchar_t* arguments in Unicode
1040 // build and char* in ANSI build; below are implementations for the other
1043 #define wxOtherCharType char
1044 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1046 #define wxOtherCharType wchar_t
1047 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1050 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1051 { return find_first_of(STRCONV(sz
), nStart
); }
1053 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1055 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1056 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1057 { return find_last_of(STRCONV(sz
), nStart
); }
1058 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1060 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1061 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1062 { return find_first_not_of(STRCONV(sz
), nStart
); }
1063 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1065 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1066 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1067 { return find_last_not_of(STRCONV(sz
), nStart
); }
1068 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1070 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1072 #undef wxOtherCharType
1075 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1077 // ===========================================================================
1078 // other common string functions
1079 // ===========================================================================
1081 int wxString::CmpNoCase(const wxString
& s
) const
1083 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1085 const_iterator i1
= begin();
1086 const_iterator end1
= end();
1087 const_iterator i2
= s
.begin();
1088 const_iterator end2
= s
.end();
1090 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1092 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1093 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1094 if ( lower1
!= lower2
)
1095 return lower1
< lower2
? -1 : 1;
1098 size_t len1
= length();
1099 size_t len2
= s
.length();
1103 else if ( len1
> len2
)
1112 #ifndef __SCHAR_MAX__
1113 #define __SCHAR_MAX__ 127
1117 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1119 if (!ascii
|| len
== 0)
1120 return wxEmptyString
;
1125 wxStringInternalBuffer
buf(res
, len
);
1126 wxStringCharType
*dest
= buf
;
1128 for ( ; len
> 0; --len
)
1130 unsigned char c
= (unsigned char)*ascii
++;
1131 wxASSERT_MSG( c
< 0x80,
1132 _T("Non-ASCII value passed to FromAscii().") );
1134 *dest
++ = (wchar_t)c
;
1141 wxString
wxString::FromAscii(const char *ascii
)
1143 return FromAscii(ascii
, wxStrlen(ascii
));
1146 wxString
wxString::FromAscii(char ascii
)
1148 // What do we do with '\0' ?
1150 unsigned char c
= (unsigned char)ascii
;
1152 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
1154 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1155 return wxString(wxUniChar((wchar_t)c
));
1158 const wxCharBuffer
wxString::ToAscii() const
1160 // this will allocate enough space for the terminating NUL too
1161 wxCharBuffer
buffer(length());
1162 char *dest
= buffer
.data();
1164 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1167 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1168 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1170 // the output string can't have embedded NULs anyhow, so we can safely
1171 // stop at first of them even if we do have any
1179 #endif // wxUSE_UNICODE
1181 // extract string of length nCount starting at nFirst
1182 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1184 size_t nLen
= length();
1186 // default value of nCount is npos and means "till the end"
1187 if ( nCount
== npos
)
1189 nCount
= nLen
- nFirst
;
1192 // out-of-bounds requests return sensible things
1193 if ( nFirst
+ nCount
> nLen
)
1195 nCount
= nLen
- nFirst
;
1198 if ( nFirst
> nLen
)
1200 // AllocCopy() will return empty string
1201 return wxEmptyString
;
1204 wxString
dest(*this, nFirst
, nCount
);
1205 if ( dest
.length() != nCount
)
1207 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1213 // check that the string starts with prefix and return the rest of the string
1214 // in the provided pointer if it is not NULL, otherwise return false
1215 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1217 if ( compare(0, prefix
.length(), prefix
) != 0 )
1222 // put the rest of the string into provided pointer
1223 rest
->assign(*this, prefix
.length(), npos
);
1230 // check that the string ends with suffix and return the rest of it in the
1231 // provided pointer if it is not NULL, otherwise return false
1232 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1234 int start
= length() - suffix
.length();
1236 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1241 // put the rest of the string into provided pointer
1242 rest
->assign(*this, 0, start
);
1249 // extract nCount last (rightmost) characters
1250 wxString
wxString::Right(size_t nCount
) const
1252 if ( nCount
> length() )
1255 wxString
dest(*this, length() - nCount
, nCount
);
1256 if ( dest
.length() != nCount
) {
1257 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1262 // get all characters after the last occurence of ch
1263 // (returns the whole string if ch not found)
1264 wxString
wxString::AfterLast(wxUniChar ch
) const
1267 int iPos
= Find(ch
, true);
1268 if ( iPos
== wxNOT_FOUND
)
1271 str
= wx_str() + iPos
+ 1;
1276 // extract nCount first (leftmost) characters
1277 wxString
wxString::Left(size_t nCount
) const
1279 if ( nCount
> length() )
1282 wxString
dest(*this, 0, nCount
);
1283 if ( dest
.length() != nCount
) {
1284 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1289 // get all characters before the first occurence of ch
1290 // (returns the whole string if ch not found)
1291 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1293 int iPos
= Find(ch
);
1294 if ( iPos
== wxNOT_FOUND
) iPos
= length();
1295 return wxString(*this, 0, iPos
);
1298 /// get all characters before the last occurence of ch
1299 /// (returns empty string if ch not found)
1300 wxString
wxString::BeforeLast(wxUniChar ch
) const
1303 int iPos
= Find(ch
, true);
1304 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1305 str
= wxString(c_str(), iPos
);
1310 /// get all characters after the first occurence of ch
1311 /// (returns empty string if ch not found)
1312 wxString
wxString::AfterFirst(wxUniChar ch
) const
1315 int iPos
= Find(ch
);
1316 if ( iPos
!= wxNOT_FOUND
)
1317 str
= wx_str() + iPos
+ 1;
1322 // replace first (or all) occurences of some substring with another one
1323 size_t wxString::Replace(const wxString
& strOld
,
1324 const wxString
& strNew
, bool bReplaceAll
)
1326 // if we tried to replace an empty string we'd enter an infinite loop below
1327 wxCHECK_MSG( !strOld
.empty(), 0,
1328 _T("wxString::Replace(): invalid parameter") );
1330 wxSTRING_INVALIDATE_CACHE();
1332 size_t uiCount
= 0; // count of replacements made
1334 // optimize the special common case: replacement of one character by
1335 // another one (in UTF-8 case we can only do this for ASCII characters)
1337 // benchmarks show that this special version is around 3 times faster
1338 // (depending on the proportion of matching characters and UTF-8/wchar_t
1340 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1342 const wxStringCharType chOld
= strOld
.m_impl
[0],
1343 chNew
= strNew
.m_impl
[0];
1345 // this loop is the simplified version of the one below
1346 for ( size_t pos
= 0; ; )
1348 pos
= m_impl
.find(chOld
, pos
);
1352 m_impl
[pos
++] = chNew
;
1360 else // general case
1362 const size_t uiOldLen
= strOld
.m_impl
.length();
1363 const size_t uiNewLen
= strNew
.m_impl
.length();
1365 for ( size_t pos
= 0; ; )
1367 pos
= m_impl
.find(strOld
.m_impl
, pos
);
1371 // replace this occurrence of the old string with the new one
1372 m_impl
.replace(pos
, uiOldLen
, strNew
.m_impl
);
1374 // move up pos past the string that was replaced
1377 // increase replace count
1380 // stop after the first one?
1389 bool wxString::IsAscii() const
1391 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1393 if ( !(*i
).IsAscii() )
1400 bool wxString::IsWord() const
1402 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1404 if ( !wxIsalpha(*i
) )
1411 bool wxString::IsNumber() const
1416 const_iterator i
= begin();
1418 if ( *i
== _T('-') || *i
== _T('+') )
1421 for ( ; i
!= end(); ++i
)
1423 if ( !wxIsdigit(*i
) )
1430 wxString
wxString::Strip(stripType w
) const
1433 if ( w
& leading
) s
.Trim(false);
1434 if ( w
& trailing
) s
.Trim(true);
1438 // ---------------------------------------------------------------------------
1440 // ---------------------------------------------------------------------------
1442 wxString
& wxString::MakeUpper()
1444 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1445 *it
= (wxChar
)wxToupper(*it
);
1450 wxString
& wxString::MakeLower()
1452 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1453 *it
= (wxChar
)wxTolower(*it
);
1458 wxString
& wxString::MakeCapitalized()
1460 const iterator en
= end();
1461 iterator it
= begin();
1464 *it
= (wxChar
)wxToupper(*it
);
1465 for ( ++it
; it
!= en
; ++it
)
1466 *it
= (wxChar
)wxTolower(*it
);
1472 // ---------------------------------------------------------------------------
1473 // trimming and padding
1474 // ---------------------------------------------------------------------------
1476 // some compilers (VC++ 6.0 not to name them) return true for a call to
1477 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1478 // to live with this by checking that the character is a 7 bit one - even if
1479 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1480 // space-like symbols somewhere except in the first 128 chars), it is arguably
1481 // still better than trimming away accented letters
1482 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1484 // trims spaces (in the sense of isspace) from left or right side
1485 wxString
& wxString::Trim(bool bFromRight
)
1487 // first check if we're going to modify the string at all
1490 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1491 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1497 // find last non-space character
1498 reverse_iterator psz
= rbegin();
1499 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1502 // truncate at trailing space start
1503 erase(psz
.base(), end());
1507 // find first non-space character
1508 iterator psz
= begin();
1509 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1512 // fix up data and length
1513 erase(begin(), psz
);
1520 // adds nCount characters chPad to the string from either side
1521 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1523 wxString
s(chPad
, nCount
);
1536 // truncate the string
1537 wxString
& wxString::Truncate(size_t uiLen
)
1539 if ( uiLen
< length() )
1541 erase(begin() + uiLen
, end());
1543 //else: nothing to do, string is already short enough
1548 // ---------------------------------------------------------------------------
1549 // finding (return wxNOT_FOUND if not found and index otherwise)
1550 // ---------------------------------------------------------------------------
1553 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1555 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1557 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1560 // ----------------------------------------------------------------------------
1561 // conversion to numbers
1562 // ----------------------------------------------------------------------------
1564 // The implementation of all the functions below is exactly the same so factor
1565 // it out. Note that number extraction works correctly on UTF-8 strings, so
1566 // we can use wxStringCharType and wx_str() for maximum efficiency.
1569 #define DO_IF_NOT_WINCE(x) x
1571 #define DO_IF_NOT_WINCE(x)
1574 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1575 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1576 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1578 DO_IF_NOT_WINCE( errno = 0; ) \
1580 const wxStringCharType *start = wx_str(); \
1581 wxStringCharType *end; \
1582 T val = func(start, &end, base); \
1584 /* return true only if scan was stopped by the terminating NUL and */ \
1585 /* if the string was not empty to start with and no under/overflow */ \
1587 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1592 bool wxString::ToLong(long *pVal
, int base
) const
1594 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtol
, long);
1597 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1599 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoul
, unsigned long);
1602 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1604 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoll
, wxLongLong_t
);
1607 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1609 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoull
, wxULongLong_t
);
1612 bool wxString::ToDouble(double *pVal
) const
1614 wxCHECK_MSG( pVal
, false, _T("NULL output pointer") );
1616 DO_IF_NOT_WINCE( errno
= 0; )
1618 const wxChar
*start
= c_str();
1620 double val
= wxStrtod(start
, &end
);
1622 // return true only if scan was stopped by the terminating NUL and if the
1623 // string was not empty to start with and no under/overflow occurred
1624 if ( *end
|| end
== start
DO_IF_NOT_WINCE(|| errno
== ERANGE
) )
1632 // ---------------------------------------------------------------------------
1634 // ---------------------------------------------------------------------------
1636 #if !wxUSE_UTF8_LOCALE_ONLY
1638 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1639 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1641 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1645 va_start(argptr
, format
);
1648 s
.PrintfV(format
, argptr
);
1654 #endif // !wxUSE_UTF8_LOCALE_ONLY
1656 #if wxUSE_UNICODE_UTF8
1658 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1661 va_start(argptr
, format
);
1664 s
.PrintfV(format
, argptr
);
1670 #endif // wxUSE_UNICODE_UTF8
1673 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1676 s
.PrintfV(format
, argptr
);
1680 #if !wxUSE_UTF8_LOCALE_ONLY
1681 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1682 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1684 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1688 va_start(argptr
, format
);
1690 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1691 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1692 // because it's the only cast that works safely for downcasting when
1693 // multiple inheritance is used:
1694 wxString
*str
= static_cast<wxString
*>(this);
1696 wxString
*str
= this;
1699 int iLen
= str
->PrintfV(format
, argptr
);
1705 #endif // !wxUSE_UTF8_LOCALE_ONLY
1707 #if wxUSE_UNICODE_UTF8
1708 int wxString::DoPrintfUtf8(const char *format
, ...)
1711 va_start(argptr
, format
);
1713 int iLen
= PrintfV(format
, argptr
);
1719 #endif // wxUSE_UNICODE_UTF8
1722 Uses wxVsnprintf and places the result into the this string.
1724 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1725 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1726 the ISO C99 (and thus SUSv3) standard the return value for the case of
1727 an undersized buffer is inconsistent. For conforming vsnprintf
1728 implementations the function must return the number of characters that
1729 would have been printed had the buffer been large enough. For conforming
1730 vswprintf implementations the function must return a negative number
1733 What vswprintf sets errno to is undefined but Darwin seems to set it to
1734 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1735 those are defined in the standard and backed up by several conformance
1736 statements. Note that ENOMEM mentioned in the manual page does not
1737 apply to swprintf, only wprintf and fwprintf.
1739 Official manual page:
1740 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1742 Some conformance statements (AIX, Solaris):
1743 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1744 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1746 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1747 EILSEQ and EINVAL are specifically defined to mean the error is other than
1748 an undersized buffer and no other errno are defined we treat those two
1749 as meaning hard errors and everything else gets the old behavior which
1750 is to keep looping and increasing buffer size until the function succeeds.
1752 In practice it's impossible to determine before compilation which behavior
1753 may be used. The vswprintf function may have vsnprintf-like behavior or
1754 vice-versa. Behavior detected on one release can theoretically change
1755 with an updated release. Not to mention that configure testing for it
1756 would require the test to be run on the host system, not the build system
1757 which makes cross compilation difficult. Therefore, we make no assumptions
1758 about behavior and try our best to handle every known case, including the
1759 case where wxVsnprintf returns a negative number and fails to set errno.
1761 There is yet one more non-standard implementation and that is our own.
1762 Fortunately, that can be detected at compile-time.
1764 On top of all that, ISO C99 explicitly defines snprintf to write a null
1765 character to the last position of the specified buffer. That would be at
1766 at the given buffer size minus 1. It is supposed to do this even if it
1767 turns out that the buffer is sized too small.
1769 Darwin (tested on 10.5) follows the C99 behavior exactly.
1771 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1772 errno even when it fails. However, it only seems to ever fail due
1773 to an undersized buffer.
1775 #if wxUSE_UNICODE_UTF8
1776 template<typename BufferType
>
1778 // we only need one version in non-UTF8 builds and at least two Windows
1779 // compilers have problems with this function template, so use just one
1780 // normal function here
1782 static int DoStringPrintfV(wxString
& str
,
1783 const wxString
& format
, va_list argptr
)
1789 #if wxUSE_UNICODE_UTF8
1790 BufferType
tmp(str
, size
+ 1);
1791 typename
BufferType::CharType
*buf
= tmp
;
1793 wxStringBuffer
tmp(str
, size
+ 1);
1801 // in UTF-8 build, leaving uninitialized junk in the buffer
1802 // could result in invalid non-empty UTF-8 string, so just
1803 // reset the string to empty on failure:
1808 // wxVsnprintf() may modify the original arg pointer, so pass it
1811 wxVaCopy(argptrcopy
, argptr
);
1814 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1817 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1820 // some implementations of vsnprintf() don't NUL terminate
1821 // the string if there is not enough space for it so
1822 // always do it manually
1823 // FIXME: This really seems to be the wrong and would be an off-by-one
1824 // bug except the code above allocates an extra character.
1825 buf
[size
] = _T('\0');
1827 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1828 // total number of characters which would have been written if the
1829 // buffer were large enough (newer standards such as Unix98)
1832 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1833 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1834 // is true if *both* of them use our own implementation,
1835 // otherwise we can't be sure
1836 #if wxUSE_WXVSNPRINTF
1837 // we know that our own implementation of wxVsnprintf() returns -1
1838 // only for a format error - thus there's something wrong with
1839 // the user's format string
1842 #else // possibly using system version
1843 // assume it only returns error if there is not enough space, but
1844 // as we don't know how much we need, double the current size of
1847 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1848 // If errno was set to one of the two well-known hard errors
1849 // then fail immediately to avoid an infinite loop.
1852 #endif // __WXWINCE__
1853 // still not enough, as we don't know how much we need, double the
1854 // current size of the buffer
1856 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1858 else if ( len
>= size
)
1860 #if wxUSE_WXVSNPRINTF
1861 // we know that our own implementation of wxVsnprintf() returns
1862 // size+1 when there's not enough space but that's not the size
1863 // of the required buffer!
1864 size
*= 2; // so we just double the current size of the buffer
1866 // some vsnprintf() implementations NUL-terminate the buffer and
1867 // some don't in len == size case, to be safe always add 1
1868 // FIXME: I don't quite understand this comment. The vsnprintf
1869 // function is specifically defined to return the number of
1870 // characters printed not including the null terminator.
1871 // So OF COURSE you need to add 1 to get the right buffer size.
1872 // The following line is definitely correct, no question.
1876 else // ok, there was enough space
1882 // we could have overshot
1885 return str
.length();
1888 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
1890 #if wxUSE_UNICODE_UTF8
1891 #if wxUSE_STL_BASED_WXSTRING
1892 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
1894 typedef wxStringInternalBuffer Utf8Buffer
;
1898 #if wxUSE_UTF8_LOCALE_ONLY
1899 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1901 #if wxUSE_UNICODE_UTF8
1902 if ( wxLocaleIsUtf8
)
1903 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1906 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
1908 return DoStringPrintfV(*this, format
, argptr
);
1909 #endif // UTF8/WCHAR
1913 // ----------------------------------------------------------------------------
1914 // misc other operations
1915 // ----------------------------------------------------------------------------
1917 // returns true if the string matches the pattern which may contain '*' and
1918 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1920 bool wxString::Matches(const wxString
& mask
) const
1922 // I disable this code as it doesn't seem to be faster (in fact, it seems
1923 // to be much slower) than the old, hand-written code below and using it
1924 // here requires always linking with libregex even if the user code doesn't
1926 #if 0 // wxUSE_REGEX
1927 // first translate the shell-like mask into a regex
1929 pattern
.reserve(wxStrlen(pszMask
));
1941 pattern
+= _T(".*");
1952 // these characters are special in a RE, quote them
1953 // (however note that we don't quote '[' and ']' to allow
1954 // using them for Unix shell like matching)
1955 pattern
+= _T('\\');
1959 pattern
+= *pszMask
;
1967 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
1968 #else // !wxUSE_REGEX
1969 // TODO: this is, of course, awfully inefficient...
1971 // FIXME-UTF8: implement using iterators, remove #if
1972 #if wxUSE_UNICODE_UTF8
1973 wxWCharBuffer maskBuf
= mask
.wc_str();
1974 wxWCharBuffer txtBuf
= wc_str();
1975 const wxChar
*pszMask
= maskBuf
.data();
1976 const wxChar
*pszTxt
= txtBuf
.data();
1978 const wxChar
*pszMask
= mask
.wx_str();
1979 // the char currently being checked
1980 const wxChar
*pszTxt
= wx_str();
1983 // the last location where '*' matched
1984 const wxChar
*pszLastStarInText
= NULL
;
1985 const wxChar
*pszLastStarInMask
= NULL
;
1988 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
1989 switch ( *pszMask
) {
1991 if ( *pszTxt
== wxT('\0') )
1994 // pszTxt and pszMask will be incremented in the loop statement
2000 // remember where we started to be able to backtrack later
2001 pszLastStarInText
= pszTxt
;
2002 pszLastStarInMask
= pszMask
;
2004 // ignore special chars immediately following this one
2005 // (should this be an error?)
2006 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2009 // if there is nothing more, match
2010 if ( *pszMask
== wxT('\0') )
2013 // are there any other metacharacters in the mask?
2015 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2017 if ( pEndMask
!= NULL
) {
2018 // we have to match the string between two metachars
2019 uiLenMask
= pEndMask
- pszMask
;
2022 // we have to match the remainder of the string
2023 uiLenMask
= wxStrlen(pszMask
);
2026 wxString
strToMatch(pszMask
, uiLenMask
);
2027 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2028 if ( pMatch
== NULL
)
2031 // -1 to compensate "++" in the loop
2032 pszTxt
= pMatch
+ uiLenMask
- 1;
2033 pszMask
+= uiLenMask
- 1;
2038 if ( *pszMask
!= *pszTxt
)
2044 // match only if nothing left
2045 if ( *pszTxt
== wxT('\0') )
2048 // if we failed to match, backtrack if we can
2049 if ( pszLastStarInText
) {
2050 pszTxt
= pszLastStarInText
+ 1;
2051 pszMask
= pszLastStarInMask
;
2053 pszLastStarInText
= NULL
;
2055 // don't bother resetting pszLastStarInMask, it's unnecessary
2061 #endif // wxUSE_REGEX/!wxUSE_REGEX
2064 // Count the number of chars
2065 int wxString::Freq(wxUniChar ch
) const
2068 for ( const_iterator i
= begin(); i
!= end(); ++i
)
2076 // ----------------------------------------------------------------------------
2077 // wxUTF8StringBuffer
2078 // ----------------------------------------------------------------------------
2080 #if wxUSE_UNICODE_WCHAR
2081 wxUTF8StringBuffer::~wxUTF8StringBuffer()
2083 wxMBConvStrictUTF8 conv
;
2084 size_t wlen
= conv
.ToWChar(NULL
, 0, m_buf
);
2085 wxCHECK_RET( wlen
!= wxCONV_FAILED
, "invalid UTF-8 data in string buffer?" );
2087 wxStringInternalBuffer
wbuf(m_str
, wlen
);
2088 conv
.ToWChar(wbuf
, wlen
, m_buf
);
2091 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2093 wxCHECK_RET(m_lenSet
, "length not set");
2095 wxMBConvStrictUTF8 conv
;
2096 size_t wlen
= conv
.ToWChar(NULL
, 0, m_buf
, m_len
);
2097 wxCHECK_RET( wlen
!= wxCONV_FAILED
, "invalid UTF-8 data in string buffer?" );
2099 wxStringInternalBufferLength
wbuf(m_str
, wlen
);
2100 conv
.ToWChar(wbuf
, wlen
, m_buf
, m_len
);
2101 wbuf
.SetLength(wlen
);
2103 #endif // wxUSE_UNICODE_WCHAR
2105 // ----------------------------------------------------------------------------
2106 // wxCharBufferType<T>
2107 // ----------------------------------------------------------------------------
2110 wxCharTypeBuffer
<char>::Data
2111 wxCharTypeBuffer
<char>::NullData(NULL
);
2114 wxCharTypeBuffer
<wchar_t>::Data
2115 wxCharTypeBuffer
<wchar_t>::NullData(NULL
);