1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
38 #include "wx/hashmap.h"
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
54 // ---------------------------------------------------------------------------
55 // static class variables definition
56 // ---------------------------------------------------------------------------
58 //According to STL _must_ be a -1 size_t
59 const size_t wxString::npos
= (size_t) -1;
61 #if wxUSE_UNICODE_UTF8
62 wxString::PosToImplCache
wxString::ms_cache
;
63 #endif // wxUSE_UNICODE_UTF8
65 // ----------------------------------------------------------------------------
67 // ----------------------------------------------------------------------------
69 #if wxUSE_STD_IOSTREAM
73 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
75 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
76 return os
<< (const char *)str
.AsCharBuf();
78 return os
<< str
.AsInternal();
82 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
84 return os
<< str
.c_str();
87 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCharBuffer
& str
)
89 return os
<< str
.data();
93 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxWCharBuffer
& str
)
95 return os
<< str
.data();
99 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
101 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
103 return wos
<< str
.wc_str();
106 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
108 return wos
<< str
.AsWChar();
111 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxWCharBuffer
& str
)
113 return wos
<< str
.data();
116 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
118 #endif // wxUSE_STD_IOSTREAM
120 // ===========================================================================
121 // wxString class core
122 // ===========================================================================
124 #if wxUSE_UNICODE_UTF8
126 void wxString::PosLenToImpl(size_t pos
, size_t len
,
127 size_t *implPos
, size_t *implLen
) const
133 const_iterator i
= begin() + pos
;
134 *implPos
= wxStringImpl::const_iterator(i
.impl()) - m_impl
.begin();
139 // too large length is interpreted as "to the end of the string"
140 // FIXME-UTF8: verify this is the case in std::string, assert
142 if ( pos
+ len
> length() )
143 len
= length() - pos
;
145 *implLen
= (i
+ len
).impl() - i
.impl();
150 #endif // wxUSE_UNICODE_UTF8
152 // ----------------------------------------------------------------------------
153 // wxCStrData converted strings caching
154 // ----------------------------------------------------------------------------
156 // FIXME-UTF8: temporarily disabled because it doesn't work with global
157 // string objects; re-enable after fixing this bug and benchmarking
158 // performance to see if using a hash is a good idea at all
161 // For backward compatibility reasons, it must be possible to assign the value
162 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
163 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
164 // because the memory would be freed immediately, but it has to be valid as long
165 // as the string is not modified, so that code like this still works:
167 // const wxChar *s = str.c_str();
168 // while ( s ) { ... }
170 // FIXME-UTF8: not thread safe!
171 // FIXME-UTF8: we currently clear the cached conversion only when the string is
172 // destroyed, but we should do it when the string is modified, to
173 // keep memory usage down
174 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
175 // invalidated the cache on every change, we could keep the previous
177 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
178 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
181 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
183 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
184 if ( i
!= hash
.end() )
192 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
193 // so we have to use wxString* here and const-cast when used
194 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
195 wxStringCharConversionCache
);
196 static wxStringCharConversionCache gs_stringsCharCache
;
198 const char* wxCStrData::AsChar() const
200 // remove previously cache value, if any (see FIXMEs above):
201 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
203 // convert the string and keep it:
204 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
205 m_str
->mb_str().release();
209 #endif // wxUSE_UNICODE
211 #if !wxUSE_UNICODE_WCHAR
212 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
213 wxStringWCharConversionCache
);
214 static wxStringWCharConversionCache gs_stringsWCharCache
;
216 const wchar_t* wxCStrData::AsWChar() const
218 // remove previously cache value, if any (see FIXMEs above):
219 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
221 // convert the string and keep it:
222 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
223 m_str
->wc_str().release();
227 #endif // !wxUSE_UNICODE_WCHAR
229 wxString::~wxString()
232 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
233 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
235 #if !wxUSE_UNICODE_WCHAR
236 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
241 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
242 const char* wxCStrData::AsChar() const
244 #if wxUSE_UNICODE_UTF8
245 if ( wxLocaleIsUtf8
)
248 // under non-UTF8 locales, we have to convert the internal UTF-8
249 // representation using wxConvLibc and cache the result
251 wxString
*str
= wxConstCast(m_str
, wxString
);
253 // convert the string:
255 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
256 // have it) but it's unfortunately not obvious to implement
257 // because we don't know how big buffer do we need for the
258 // given string length (in case of multibyte encodings, e.g.
259 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
261 // One idea would be to store more than just m_convertedToChar
262 // in wxString: then we could record the length of the string
263 // which was converted the last time and try to reuse the same
264 // buffer if the current length is not greater than it (this
265 // could still fail because string could have been modified in
266 // place but it would work most of the time, so we'd do it and
267 // only allocate the new buffer if in-place conversion returned
268 // an error). We could also store a bit saying if the string
269 // was modified since the last conversion (and update it in all
270 // operation modifying the string, of course) to avoid unneeded
271 // consequential conversions. But both of these ideas require
272 // adding more fields to wxString and require profiling results
273 // to be sure that we really gain enough from them to justify
275 wxCharBuffer
buf(str
->mb_str());
277 // if it failed, return empty string and not NULL to avoid crashes in code
278 // written with either wxWidgets 2 wxString or std::string behaviour in
279 // mind: neither of them ever returns NULL and so we shouldn't neither
283 if ( str
->m_convertedToChar
&&
284 strlen(buf
) == strlen(str
->m_convertedToChar
) )
286 // keep the same buffer for as long as possible, so that several calls
287 // to c_str() in a row still work:
288 strcpy(str
->m_convertedToChar
, buf
);
292 str
->m_convertedToChar
= buf
.release();
296 return str
->m_convertedToChar
+ m_offset
;
298 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
300 #if !wxUSE_UNICODE_WCHAR
301 const wchar_t* wxCStrData::AsWChar() const
303 wxString
*str
= wxConstCast(m_str
, wxString
);
305 // convert the string:
306 wxWCharBuffer
buf(str
->wc_str());
308 // notice that here, unlike above in AsChar(), conversion can't fail as our
309 // internal UTF-8 is always well-formed -- or the string was corrupted and
310 // all bets are off anyhow
312 // FIXME-UTF8: do the conversion in-place in the existing buffer
313 if ( str
->m_convertedToWChar
&&
314 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
316 // keep the same buffer for as long as possible, so that several calls
317 // to c_str() in a row still work:
318 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
322 str
->m_convertedToWChar
= buf
.release();
326 return str
->m_convertedToWChar
+ m_offset
;
328 #endif // !wxUSE_UNICODE_WCHAR
330 // ===========================================================================
331 // wxString class core
332 // ===========================================================================
334 // ---------------------------------------------------------------------------
335 // construction and conversion
336 // ---------------------------------------------------------------------------
338 #if wxUSE_UNICODE_WCHAR
340 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
341 const wxMBConv
& conv
)
344 if ( !psz
|| nLength
== 0 )
345 return SubstrBufFromMB(L
"", 0);
347 if ( nLength
== npos
)
351 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
353 return SubstrBufFromMB(_T(""), 0);
355 return SubstrBufFromMB(wcBuf
, wcLen
);
357 #endif // wxUSE_UNICODE_WCHAR
359 #if wxUSE_UNICODE_UTF8
361 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
362 const wxMBConv
& conv
)
365 if ( !psz
|| nLength
== 0 )
366 return SubstrBufFromMB("", 0);
368 // if psz is already in UTF-8, we don't have to do the roundtrip to
369 // wchar_t* and back:
372 // we need to validate the input because UTF8 iterators assume valid
373 // UTF-8 sequence and psz may be invalid:
374 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
376 // we must pass the real string length to SubstrBufFromMB ctor
377 if ( nLength
== npos
)
378 nLength
= psz
? strlen(psz
) : 0;
379 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz
), nLength
);
381 // else: do the roundtrip through wchar_t*
384 if ( nLength
== npos
)
387 // first convert to wide string:
389 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
391 return SubstrBufFromMB("", 0);
393 // and then to UTF-8:
394 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
395 // widechar -> UTF-8 conversion isn't supposed to ever fail:
396 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
400 #endif // wxUSE_UNICODE_UTF8
402 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
404 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
405 const wxMBConv
& conv
)
408 if ( !pwz
|| nLength
== 0 )
409 return SubstrBufFromWC("", 0);
411 if ( nLength
== npos
)
415 wxCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
417 return SubstrBufFromWC("", 0);
419 return SubstrBufFromWC(mbBuf
, mbLen
);
421 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
424 #if wxUSE_UNICODE_WCHAR
426 //Convert wxString in Unicode mode to a multi-byte string
427 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
429 return conv
.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL
);
432 #elif wxUSE_UNICODE_UTF8
434 const wxWCharBuffer
wxString::wc_str() const
436 return wxMBConvStrictUTF8().cMB2WC
439 m_impl
.length() + 1, // size, not length
444 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
447 return wxCharBuffer::CreateNonOwned(m_impl
.c_str());
449 // FIXME-UTF8: use wc_str() here once we have buffers with length
452 wxWCharBuffer
wcBuf(wxMBConvStrictUTF8().cMB2WC
455 m_impl
.length() + 1, // size
459 return wxCharBuffer("");
461 return conv
.cWC2MB(wcBuf
, wcLen
+1, NULL
);
466 //Converts this string to a wide character string if unicode
467 //mode is not enabled and wxUSE_WCHAR_T is enabled
468 const wxWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
470 return conv
.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL
);
473 #endif // Unicode/ANSI
475 // shrink to minimal size (releasing extra memory)
476 bool wxString::Shrink()
478 wxString
tmp(begin(), end());
480 return tmp
.length() == length();
483 // deprecated compatibility code:
484 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
485 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
487 return DoGetWriteBuf(nLen
);
490 void wxString::UngetWriteBuf()
495 void wxString::UngetWriteBuf(size_t nLen
)
497 DoUngetWriteBuf(nLen
);
499 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
502 // ---------------------------------------------------------------------------
504 // ---------------------------------------------------------------------------
506 // all functions are inline in string.h
508 // ---------------------------------------------------------------------------
509 // concatenation operators
510 // ---------------------------------------------------------------------------
513 * concatenation functions come in 5 flavours:
515 * char + string and string + char
516 * C str + string and string + C str
519 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
521 #if !wxUSE_STL_BASED_WXSTRING
522 wxASSERT( str1
.IsValid() );
523 wxASSERT( str2
.IsValid() );
532 wxString
operator+(const wxString
& str
, wxUniChar ch
)
534 #if !wxUSE_STL_BASED_WXSTRING
535 wxASSERT( str
.IsValid() );
544 wxString
operator+(wxUniChar ch
, const wxString
& str
)
546 #if !wxUSE_STL_BASED_WXSTRING
547 wxASSERT( str
.IsValid() );
556 wxString
operator+(const wxString
& str
, const char *psz
)
558 #if !wxUSE_STL_BASED_WXSTRING
559 wxASSERT( str
.IsValid() );
563 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
564 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
572 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
574 #if !wxUSE_STL_BASED_WXSTRING
575 wxASSERT( str
.IsValid() );
579 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
580 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
588 wxString
operator+(const char *psz
, const wxString
& str
)
590 #if !wxUSE_STL_BASED_WXSTRING
591 wxASSERT( str
.IsValid() );
595 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
596 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
604 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
606 #if !wxUSE_STL_BASED_WXSTRING
607 wxASSERT( str
.IsValid() );
611 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
612 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
620 // ---------------------------------------------------------------------------
622 // ---------------------------------------------------------------------------
624 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
626 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
627 : wxToupper(GetChar(0u)) == wxToupper(c
));
630 #ifdef HAVE_STD_STRING_COMPARE
632 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
633 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
634 // sort strings in characters code point order by sorting the byte sequence
635 // in byte values order (i.e. what strcmp() and memcmp() do).
637 int wxString::compare(const wxString
& str
) const
639 return m_impl
.compare(str
.m_impl
);
642 int wxString::compare(size_t nStart
, size_t nLen
,
643 const wxString
& str
) const
646 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
647 return m_impl
.compare(pos
, len
, str
.m_impl
);
650 int wxString::compare(size_t nStart
, size_t nLen
,
652 size_t nStart2
, size_t nLen2
) const
655 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
658 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
660 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
663 int wxString::compare(const char* sz
) const
665 return m_impl
.compare(ImplStr(sz
));
668 int wxString::compare(const wchar_t* sz
) const
670 return m_impl
.compare(ImplStr(sz
));
673 int wxString::compare(size_t nStart
, size_t nLen
,
674 const char* sz
, size_t nCount
) const
677 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
679 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
681 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
684 int wxString::compare(size_t nStart
, size_t nLen
,
685 const wchar_t* sz
, size_t nCount
) const
688 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
690 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
692 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
695 #else // !HAVE_STD_STRING_COMPARE
697 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
698 const wxStringCharType
* s2
, size_t l2
)
701 return wxStringMemcmp(s1
, s2
, l1
);
704 int ret
= wxStringMemcmp(s1
, s2
, l1
);
705 return ret
== 0 ? -1 : ret
;
709 int ret
= wxStringMemcmp(s1
, s2
, l2
);
710 return ret
== 0 ? +1 : ret
;
714 int wxString::compare(const wxString
& str
) const
716 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
717 str
.m_impl
.data(), str
.m_impl
.length());
720 int wxString::compare(size_t nStart
, size_t nLen
,
721 const wxString
& str
) const
723 wxASSERT(nStart
<= length());
724 size_type strLen
= length() - nStart
;
725 nLen
= strLen
< nLen
? strLen
: nLen
;
728 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
730 return ::wxDoCmp(m_impl
.data() + pos
, len
,
731 str
.m_impl
.data(), str
.m_impl
.length());
734 int wxString::compare(size_t nStart
, size_t nLen
,
736 size_t nStart2
, size_t nLen2
) const
738 wxASSERT(nStart
<= length());
739 wxASSERT(nStart2
<= str
.length());
740 size_type strLen
= length() - nStart
,
741 strLen2
= str
.length() - nStart2
;
742 nLen
= strLen
< nLen
? strLen
: nLen
;
743 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
746 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
748 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
750 return ::wxDoCmp(m_impl
.data() + pos
, len
,
751 str
.m_impl
.data() + pos2
, len2
);
754 int wxString::compare(const char* sz
) const
756 SubstrBufFromMB
str(ImplStr(sz
, npos
));
757 if ( str
.len
== npos
)
758 str
.len
= wxStringStrlen(str
.data
);
759 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
762 int wxString::compare(const wchar_t* sz
) const
764 SubstrBufFromWC
str(ImplStr(sz
, npos
));
765 if ( str
.len
== npos
)
766 str
.len
= wxStringStrlen(str
.data
);
767 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
770 int wxString::compare(size_t nStart
, size_t nLen
,
771 const char* sz
, size_t nCount
) const
773 wxASSERT(nStart
<= length());
774 size_type strLen
= length() - nStart
;
775 nLen
= strLen
< nLen
? strLen
: nLen
;
778 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
780 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
781 if ( str
.len
== npos
)
782 str
.len
= wxStringStrlen(str
.data
);
784 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
787 int wxString::compare(size_t nStart
, size_t nLen
,
788 const wchar_t* sz
, size_t nCount
) const
790 wxASSERT(nStart
<= length());
791 size_type strLen
= length() - nStart
;
792 nLen
= strLen
< nLen
? strLen
: nLen
;
795 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
797 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
798 if ( str
.len
== npos
)
799 str
.len
= wxStringStrlen(str
.data
);
801 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
804 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
807 // ---------------------------------------------------------------------------
808 // find_{first,last}_[not]_of functions
809 // ---------------------------------------------------------------------------
811 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
813 // NB: All these functions are implemented with the argument being wxChar*,
814 // i.e. widechar string in any Unicode build, even though native string
815 // representation is char* in the UTF-8 build. This is because we couldn't
816 // use memchr() to determine if a character is in a set encoded as UTF-8.
818 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
820 return find_first_of(sz
, nStart
, wxStrlen(sz
));
823 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
825 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
828 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
830 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
833 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
835 if ( wxTmemchr(sz
, *i
, n
) )
842 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
844 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
847 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
849 if ( !wxTmemchr(sz
, *i
, n
) )
857 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
859 return find_last_of(sz
, nStart
, wxStrlen(sz
));
862 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
864 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
867 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
869 size_t len
= length();
871 if ( nStart
== npos
)
877 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
881 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
882 i
!= rend(); --idx
, ++i
)
884 if ( wxTmemchr(sz
, *i
, n
) )
891 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
893 size_t len
= length();
895 if ( nStart
== npos
)
901 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
905 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
906 i
!= rend(); --idx
, ++i
)
908 if ( !wxTmemchr(sz
, *i
, n
) )
915 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
917 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
920 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
929 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
931 size_t len
= length();
933 if ( nStart
== npos
)
939 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
943 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
944 i
!= rend(); --idx
, ++i
)
953 // the functions above were implemented for wchar_t* arguments in Unicode
954 // build and char* in ANSI build; below are implementations for the other
957 #define wxOtherCharType char
958 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
960 #define wxOtherCharType wchar_t
961 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
964 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
965 { return find_first_of(STRCONV(sz
), nStart
); }
967 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
969 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
970 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
971 { return find_last_of(STRCONV(sz
), nStart
); }
972 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
974 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
975 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
976 { return find_first_not_of(STRCONV(sz
), nStart
); }
977 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
979 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
980 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
981 { return find_last_not_of(STRCONV(sz
), nStart
); }
982 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
984 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
986 #undef wxOtherCharType
989 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
991 // ===========================================================================
992 // other common string functions
993 // ===========================================================================
995 int wxString::CmpNoCase(const wxString
& s
) const
997 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
999 const_iterator i1
= begin();
1000 const_iterator end1
= end();
1001 const_iterator i2
= s
.begin();
1002 const_iterator end2
= s
.end();
1004 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1006 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1007 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1008 if ( lower1
!= lower2
)
1009 return lower1
< lower2
? -1 : 1;
1012 size_t len1
= length();
1013 size_t len2
= s
.length();
1017 else if ( len1
> len2
)
1026 #ifndef __SCHAR_MAX__
1027 #define __SCHAR_MAX__ 127
1031 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1033 if (!ascii
|| len
== 0)
1034 return wxEmptyString
;
1039 wxStringInternalBuffer
buf(res
, len
);
1040 wxStringCharType
*dest
= buf
;
1042 for ( ; len
> 0; --len
)
1044 unsigned char c
= (unsigned char)*ascii
++;
1045 wxASSERT_MSG( c
< 0x80,
1046 _T("Non-ASCII value passed to FromAscii().") );
1048 *dest
++ = (wchar_t)c
;
1055 wxString
wxString::FromAscii(const char *ascii
)
1057 return FromAscii(ascii
, wxStrlen(ascii
));
1060 wxString
wxString::FromAscii(char ascii
)
1062 // What do we do with '\0' ?
1064 unsigned char c
= (unsigned char)ascii
;
1066 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
1068 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1069 return wxString(wxUniChar((wchar_t)c
));
1072 const wxCharBuffer
wxString::ToAscii() const
1074 // this will allocate enough space for the terminating NUL too
1075 wxCharBuffer
buffer(length());
1076 char *dest
= buffer
.data();
1078 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1081 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1082 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1084 // the output string can't have embedded NULs anyhow, so we can safely
1085 // stop at first of them even if we do have any
1093 #endif // wxUSE_UNICODE
1095 // extract string of length nCount starting at nFirst
1096 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1098 size_t nLen
= length();
1100 // default value of nCount is npos and means "till the end"
1101 if ( nCount
== npos
)
1103 nCount
= nLen
- nFirst
;
1106 // out-of-bounds requests return sensible things
1107 if ( nFirst
+ nCount
> nLen
)
1109 nCount
= nLen
- nFirst
;
1112 if ( nFirst
> nLen
)
1114 // AllocCopy() will return empty string
1115 return wxEmptyString
;
1118 wxString
dest(*this, nFirst
, nCount
);
1119 if ( dest
.length() != nCount
)
1121 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1127 // check that the string starts with prefix and return the rest of the string
1128 // in the provided pointer if it is not NULL, otherwise return false
1129 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1131 if ( compare(0, prefix
.length(), prefix
) != 0 )
1136 // put the rest of the string into provided pointer
1137 rest
->assign(*this, prefix
.length(), npos
);
1144 // check that the string ends with suffix and return the rest of it in the
1145 // provided pointer if it is not NULL, otherwise return false
1146 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1148 int start
= length() - suffix
.length();
1150 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1155 // put the rest of the string into provided pointer
1156 rest
->assign(*this, 0, start
);
1163 // extract nCount last (rightmost) characters
1164 wxString
wxString::Right(size_t nCount
) const
1166 if ( nCount
> length() )
1169 wxString
dest(*this, length() - nCount
, nCount
);
1170 if ( dest
.length() != nCount
) {
1171 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1176 // get all characters after the last occurence of ch
1177 // (returns the whole string if ch not found)
1178 wxString
wxString::AfterLast(wxUniChar ch
) const
1181 int iPos
= Find(ch
, true);
1182 if ( iPos
== wxNOT_FOUND
)
1185 str
= wx_str() + iPos
+ 1;
1190 // extract nCount first (leftmost) characters
1191 wxString
wxString::Left(size_t nCount
) const
1193 if ( nCount
> length() )
1196 wxString
dest(*this, 0, nCount
);
1197 if ( dest
.length() != nCount
) {
1198 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1203 // get all characters before the first occurence of ch
1204 // (returns the whole string if ch not found)
1205 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1207 int iPos
= Find(ch
);
1208 if ( iPos
== wxNOT_FOUND
) iPos
= length();
1209 return wxString(*this, 0, iPos
);
1212 /// get all characters before the last occurence of ch
1213 /// (returns empty string if ch not found)
1214 wxString
wxString::BeforeLast(wxUniChar ch
) const
1217 int iPos
= Find(ch
, true);
1218 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1219 str
= wxString(c_str(), iPos
);
1224 /// get all characters after the first occurence of ch
1225 /// (returns empty string if ch not found)
1226 wxString
wxString::AfterFirst(wxUniChar ch
) const
1229 int iPos
= Find(ch
);
1230 if ( iPos
!= wxNOT_FOUND
)
1231 str
= wx_str() + iPos
+ 1;
1236 // replace first (or all) occurences of some substring with another one
1237 size_t wxString::Replace(const wxString
& strOld
,
1238 const wxString
& strNew
, bool bReplaceAll
)
1240 // if we tried to replace an empty string we'd enter an infinite loop below
1241 wxCHECK_MSG( !strOld
.empty(), 0,
1242 _T("wxString::Replace(): invalid parameter") );
1244 wxSTRING_INVALIDATE_INDEX_CACHE();
1246 size_t uiCount
= 0; // count of replacements made
1248 // optimize the special common case: replacement of one character by
1249 // another one (in UTF-8 case we can only do this for ASCII characters)
1251 // benchmarks show that this special version is around 3 times faster
1252 // (depending on the proportion of matching characters and UTF-8/wchar_t
1254 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1256 const wxStringCharType chOld
= strOld
.m_impl
[0],
1257 chNew
= strNew
.m_impl
[0];
1259 // this loop is the simplified version of the one below
1260 for ( size_t pos
= 0; ; )
1262 pos
= m_impl
.find(chOld
, pos
);
1266 m_impl
[pos
++] = chNew
;
1274 else // general case
1276 const size_t uiOldLen
= strOld
.m_impl
.length();
1277 const size_t uiNewLen
= strNew
.m_impl
.length();
1279 for ( size_t pos
= 0; ; )
1281 pos
= m_impl
.find(strOld
.m_impl
, pos
);
1285 // replace this occurrence of the old string with the new one
1286 m_impl
.replace(pos
, uiOldLen
, strNew
.m_impl
);
1288 // move up pos past the string that was replaced
1291 // increase replace count
1294 // stop after the first one?
1303 bool wxString::IsAscii() const
1305 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1307 if ( !(*i
).IsAscii() )
1314 bool wxString::IsWord() const
1316 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1318 if ( !wxIsalpha(*i
) )
1325 bool wxString::IsNumber() const
1330 const_iterator i
= begin();
1332 if ( *i
== _T('-') || *i
== _T('+') )
1335 for ( ; i
!= end(); ++i
)
1337 if ( !wxIsdigit(*i
) )
1344 wxString
wxString::Strip(stripType w
) const
1347 if ( w
& leading
) s
.Trim(false);
1348 if ( w
& trailing
) s
.Trim(true);
1352 // ---------------------------------------------------------------------------
1354 // ---------------------------------------------------------------------------
1356 wxString
& wxString::MakeUpper()
1358 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1359 *it
= (wxChar
)wxToupper(*it
);
1364 wxString
& wxString::MakeLower()
1366 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1367 *it
= (wxChar
)wxTolower(*it
);
1372 wxString
& wxString::MakeCapitalized()
1374 const iterator en
= end();
1375 iterator it
= begin();
1378 *it
= (wxChar
)wxToupper(*it
);
1379 for ( ++it
; it
!= en
; ++it
)
1380 *it
= (wxChar
)wxTolower(*it
);
1386 // ---------------------------------------------------------------------------
1387 // trimming and padding
1388 // ---------------------------------------------------------------------------
1390 // some compilers (VC++ 6.0 not to name them) return true for a call to
1391 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1392 // to live with this by checking that the character is a 7 bit one - even if
1393 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1394 // space-like symbols somewhere except in the first 128 chars), it is arguably
1395 // still better than trimming away accented letters
1396 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1398 // trims spaces (in the sense of isspace) from left or right side
1399 wxString
& wxString::Trim(bool bFromRight
)
1401 // first check if we're going to modify the string at all
1404 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1405 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1411 // find last non-space character
1412 reverse_iterator psz
= rbegin();
1413 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1416 // truncate at trailing space start
1417 erase(psz
.base(), end());
1421 // find first non-space character
1422 iterator psz
= begin();
1423 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1426 // fix up data and length
1427 erase(begin(), psz
);
1434 // adds nCount characters chPad to the string from either side
1435 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1437 wxString
s(chPad
, nCount
);
1450 // truncate the string
1451 wxString
& wxString::Truncate(size_t uiLen
)
1453 if ( uiLen
< length() )
1455 erase(begin() + uiLen
, end());
1457 //else: nothing to do, string is already short enough
1462 // ---------------------------------------------------------------------------
1463 // finding (return wxNOT_FOUND if not found and index otherwise)
1464 // ---------------------------------------------------------------------------
1467 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1469 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1471 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1474 // ----------------------------------------------------------------------------
1475 // conversion to numbers
1476 // ----------------------------------------------------------------------------
1478 // The implementation of all the functions below is exactly the same so factor
1479 // it out. Note that number extraction works correctly on UTF-8 strings, so
1480 // we can use wxStringCharType and wx_str() for maximum efficiency.
1483 #define DO_IF_NOT_WINCE(x) x
1485 #define DO_IF_NOT_WINCE(x)
1488 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1489 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1490 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1492 DO_IF_NOT_WINCE( errno = 0; ) \
1494 const wxStringCharType *start = wx_str(); \
1495 wxStringCharType *end; \
1496 T val = func(start, &end, base); \
1498 /* return true only if scan was stopped by the terminating NUL and */ \
1499 /* if the string was not empty to start with and no under/overflow */ \
1501 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1506 bool wxString::ToLong(long *pVal
, int base
) const
1508 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtol
, long);
1511 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1513 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoul
, unsigned long);
1516 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1518 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoll
, wxLongLong_t
);
1521 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1523 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoull
, wxULongLong_t
);
1526 bool wxString::ToDouble(double *pVal
) const
1528 wxCHECK_MSG( pVal
, false, _T("NULL output pointer") );
1530 DO_IF_NOT_WINCE( errno
= 0; )
1532 const wxChar
*start
= c_str();
1534 double val
= wxStrtod(start
, &end
);
1536 // return true only if scan was stopped by the terminating NUL and if the
1537 // string was not empty to start with and no under/overflow occurred
1538 if ( *end
|| end
== start
DO_IF_NOT_WINCE(|| errno
== ERANGE
) )
1546 // ---------------------------------------------------------------------------
1548 // ---------------------------------------------------------------------------
1550 #if !wxUSE_UTF8_LOCALE_ONLY
1552 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1553 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1555 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1559 va_start(argptr
, format
);
1562 s
.PrintfV(format
, argptr
);
1568 #endif // !wxUSE_UTF8_LOCALE_ONLY
1570 #if wxUSE_UNICODE_UTF8
1572 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1575 va_start(argptr
, format
);
1578 s
.PrintfV(format
, argptr
);
1584 #endif // wxUSE_UNICODE_UTF8
1587 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1590 s
.PrintfV(format
, argptr
);
1594 #if !wxUSE_UTF8_LOCALE_ONLY
1595 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1596 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1598 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1602 va_start(argptr
, format
);
1604 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1605 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1606 // because it's the only cast that works safely for downcasting when
1607 // multiple inheritance is used:
1608 wxString
*str
= static_cast<wxString
*>(this);
1610 wxString
*str
= this;
1613 int iLen
= str
->PrintfV(format
, argptr
);
1619 #endif // !wxUSE_UTF8_LOCALE_ONLY
1621 #if wxUSE_UNICODE_UTF8
1622 int wxString::DoPrintfUtf8(const char *format
, ...)
1625 va_start(argptr
, format
);
1627 int iLen
= PrintfV(format
, argptr
);
1633 #endif // wxUSE_UNICODE_UTF8
1636 Uses wxVsnprintf and places the result into the this string.
1638 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1639 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1640 the ISO C99 (and thus SUSv3) standard the return value for the case of
1641 an undersized buffer is inconsistent. For conforming vsnprintf
1642 implementations the function must return the number of characters that
1643 would have been printed had the buffer been large enough. For conforming
1644 vswprintf implementations the function must return a negative number
1647 What vswprintf sets errno to is undefined but Darwin seems to set it to
1648 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1649 those are defined in the standard and backed up by several conformance
1650 statements. Note that ENOMEM mentioned in the manual page does not
1651 apply to swprintf, only wprintf and fwprintf.
1653 Official manual page:
1654 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1656 Some conformance statements (AIX, Solaris):
1657 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1658 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1660 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1661 EILSEQ and EINVAL are specifically defined to mean the error is other than
1662 an undersized buffer and no other errno are defined we treat those two
1663 as meaning hard errors and everything else gets the old behavior which
1664 is to keep looping and increasing buffer size until the function succeeds.
1666 In practice it's impossible to determine before compilation which behavior
1667 may be used. The vswprintf function may have vsnprintf-like behavior or
1668 vice-versa. Behavior detected on one release can theoretically change
1669 with an updated release. Not to mention that configure testing for it
1670 would require the test to be run on the host system, not the build system
1671 which makes cross compilation difficult. Therefore, we make no assumptions
1672 about behavior and try our best to handle every known case, including the
1673 case where wxVsnprintf returns a negative number and fails to set errno.
1675 There is yet one more non-standard implementation and that is our own.
1676 Fortunately, that can be detected at compile-time.
1678 On top of all that, ISO C99 explicitly defines snprintf to write a null
1679 character to the last position of the specified buffer. That would be at
1680 at the given buffer size minus 1. It is supposed to do this even if it
1681 turns out that the buffer is sized too small.
1683 Darwin (tested on 10.5) follows the C99 behavior exactly.
1685 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1686 errno even when it fails. However, it only seems to ever fail due
1687 to an undersized buffer.
1689 #if wxUSE_UNICODE_UTF8
1690 template<typename BufferType
>
1692 // we only need one version in non-UTF8 builds and at least two Windows
1693 // compilers have problems with this function template, so use just one
1694 // normal function here
1696 static int DoStringPrintfV(wxString
& str
,
1697 const wxString
& format
, va_list argptr
)
1703 #if wxUSE_UNICODE_UTF8
1704 BufferType
tmp(str
, size
+ 1);
1705 typename
BufferType::CharType
*buf
= tmp
;
1707 wxStringBuffer
tmp(str
, size
+ 1);
1715 // in UTF-8 build, leaving uninitialized junk in the buffer
1716 // could result in invalid non-empty UTF-8 string, so just
1717 // reset the string to empty on failure:
1722 // wxVsnprintf() may modify the original arg pointer, so pass it
1725 wxVaCopy(argptrcopy
, argptr
);
1728 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1731 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1734 // some implementations of vsnprintf() don't NUL terminate
1735 // the string if there is not enough space for it so
1736 // always do it manually
1737 // FIXME: This really seems to be the wrong and would be an off-by-one
1738 // bug except the code above allocates an extra character.
1739 buf
[size
] = _T('\0');
1741 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1742 // total number of characters which would have been written if the
1743 // buffer were large enough (newer standards such as Unix98)
1746 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1747 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1748 // is true if *both* of them use our own implementation,
1749 // otherwise we can't be sure
1750 #if wxUSE_WXVSNPRINTF
1751 // we know that our own implementation of wxVsnprintf() returns -1
1752 // only for a format error - thus there's something wrong with
1753 // the user's format string
1756 #else // possibly using system version
1757 // assume it only returns error if there is not enough space, but
1758 // as we don't know how much we need, double the current size of
1761 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1762 // If errno was set to one of the two well-known hard errors
1763 // then fail immediately to avoid an infinite loop.
1766 #endif // __WXWINCE__
1767 // still not enough, as we don't know how much we need, double the
1768 // current size of the buffer
1770 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1772 else if ( len
>= size
)
1774 #if wxUSE_WXVSNPRINTF
1775 // we know that our own implementation of wxVsnprintf() returns
1776 // size+1 when there's not enough space but that's not the size
1777 // of the required buffer!
1778 size
*= 2; // so we just double the current size of the buffer
1780 // some vsnprintf() implementations NUL-terminate the buffer and
1781 // some don't in len == size case, to be safe always add 1
1782 // FIXME: I don't quite understand this comment. The vsnprintf
1783 // function is specifically defined to return the number of
1784 // characters printed not including the null terminator.
1785 // So OF COURSE you need to add 1 to get the right buffer size.
1786 // The following line is definitely correct, no question.
1790 else // ok, there was enough space
1796 // we could have overshot
1799 return str
.length();
1802 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
1804 #if wxUSE_UNICODE_UTF8
1805 #if wxUSE_STL_BASED_WXSTRING
1806 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
1808 typedef wxStringInternalBuffer Utf8Buffer
;
1812 #if wxUSE_UTF8_LOCALE_ONLY
1813 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1815 #if wxUSE_UNICODE_UTF8
1816 if ( wxLocaleIsUtf8
)
1817 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1820 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
1822 return DoStringPrintfV(*this, format
, argptr
);
1823 #endif // UTF8/WCHAR
1827 // ----------------------------------------------------------------------------
1828 // misc other operations
1829 // ----------------------------------------------------------------------------
1831 // returns true if the string matches the pattern which may contain '*' and
1832 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1834 bool wxString::Matches(const wxString
& mask
) const
1836 // I disable this code as it doesn't seem to be faster (in fact, it seems
1837 // to be much slower) than the old, hand-written code below and using it
1838 // here requires always linking with libregex even if the user code doesn't
1840 #if 0 // wxUSE_REGEX
1841 // first translate the shell-like mask into a regex
1843 pattern
.reserve(wxStrlen(pszMask
));
1855 pattern
+= _T(".*");
1866 // these characters are special in a RE, quote them
1867 // (however note that we don't quote '[' and ']' to allow
1868 // using them for Unix shell like matching)
1869 pattern
+= _T('\\');
1873 pattern
+= *pszMask
;
1881 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
1882 #else // !wxUSE_REGEX
1883 // TODO: this is, of course, awfully inefficient...
1885 // FIXME-UTF8: implement using iterators, remove #if
1886 #if wxUSE_UNICODE_UTF8
1887 wxWCharBuffer maskBuf
= mask
.wc_str();
1888 wxWCharBuffer txtBuf
= wc_str();
1889 const wxChar
*pszMask
= maskBuf
.data();
1890 const wxChar
*pszTxt
= txtBuf
.data();
1892 const wxChar
*pszMask
= mask
.wx_str();
1893 // the char currently being checked
1894 const wxChar
*pszTxt
= wx_str();
1897 // the last location where '*' matched
1898 const wxChar
*pszLastStarInText
= NULL
;
1899 const wxChar
*pszLastStarInMask
= NULL
;
1902 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
1903 switch ( *pszMask
) {
1905 if ( *pszTxt
== wxT('\0') )
1908 // pszTxt and pszMask will be incremented in the loop statement
1914 // remember where we started to be able to backtrack later
1915 pszLastStarInText
= pszTxt
;
1916 pszLastStarInMask
= pszMask
;
1918 // ignore special chars immediately following this one
1919 // (should this be an error?)
1920 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
1923 // if there is nothing more, match
1924 if ( *pszMask
== wxT('\0') )
1927 // are there any other metacharacters in the mask?
1929 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
1931 if ( pEndMask
!= NULL
) {
1932 // we have to match the string between two metachars
1933 uiLenMask
= pEndMask
- pszMask
;
1936 // we have to match the remainder of the string
1937 uiLenMask
= wxStrlen(pszMask
);
1940 wxString
strToMatch(pszMask
, uiLenMask
);
1941 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
1942 if ( pMatch
== NULL
)
1945 // -1 to compensate "++" in the loop
1946 pszTxt
= pMatch
+ uiLenMask
- 1;
1947 pszMask
+= uiLenMask
- 1;
1952 if ( *pszMask
!= *pszTxt
)
1958 // match only if nothing left
1959 if ( *pszTxt
== wxT('\0') )
1962 // if we failed to match, backtrack if we can
1963 if ( pszLastStarInText
) {
1964 pszTxt
= pszLastStarInText
+ 1;
1965 pszMask
= pszLastStarInMask
;
1967 pszLastStarInText
= NULL
;
1969 // don't bother resetting pszLastStarInMask, it's unnecessary
1975 #endif // wxUSE_REGEX/!wxUSE_REGEX
1978 // Count the number of chars
1979 int wxString::Freq(wxUniChar ch
) const
1982 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1990 // ----------------------------------------------------------------------------
1991 // wxUTF8StringBuffer
1992 // ----------------------------------------------------------------------------
1994 #if wxUSE_UNICODE_WCHAR
1995 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1997 wxMBConvStrictUTF8 conv
;
1998 size_t wlen
= conv
.ToWChar(NULL
, 0, m_buf
);
1999 wxCHECK_RET( wlen
!= wxCONV_FAILED
, "invalid UTF-8 data in string buffer?" );
2001 wxStringInternalBuffer
wbuf(m_str
, wlen
);
2002 conv
.ToWChar(wbuf
, wlen
, m_buf
);
2005 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2007 wxCHECK_RET(m_lenSet
, "length not set");
2009 wxMBConvStrictUTF8 conv
;
2010 size_t wlen
= conv
.ToWChar(NULL
, 0, m_buf
, m_len
);
2011 wxCHECK_RET( wlen
!= wxCONV_FAILED
, "invalid UTF-8 data in string buffer?" );
2013 wxStringInternalBufferLength
wbuf(m_str
, wlen
);
2014 conv
.ToWChar(wbuf
, wlen
, m_buf
, m_len
);
2015 wbuf
.SetLength(wlen
);
2017 #endif // wxUSE_UNICODE_WCHAR