1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
38 #include "wx/hashmap.h"
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
54 // ---------------------------------------------------------------------------
55 // static class variables definition
56 // ---------------------------------------------------------------------------
58 //According to STL _must_ be a -1 size_t
59 const size_t wxString::npos
= (size_t) -1;
61 // ----------------------------------------------------------------------------
63 // ----------------------------------------------------------------------------
65 #if wxUSE_STD_IOSTREAM
69 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
71 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
72 return os
<< (const char *)str
.AsCharBuf();
74 return os
<< str
.AsInternal();
78 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
80 return os
<< str
.c_str();
83 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCharBuffer
& str
)
85 return os
<< str
.data();
89 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxWCharBuffer
& str
)
91 return os
<< str
.data();
95 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
97 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
99 return wos
<< str
.wc_str();
102 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
104 return wos
<< str
.AsWChar();
107 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxWCharBuffer
& str
)
109 return wos
<< str
.data();
112 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
114 #endif // wxUSE_STD_IOSTREAM
116 // ===========================================================================
117 // wxString class core
118 // ===========================================================================
120 #if wxUSE_UNICODE_UTF8
122 void wxString::PosLenToImpl(size_t pos
, size_t len
,
123 size_t *implPos
, size_t *implLen
) const
129 const_iterator i
= begin() + pos
;
130 *implPos
= wxStringImpl::const_iterator(i
.impl()) - m_impl
.begin();
135 // too large length is interpreted as "to the end of the string"
136 // FIXME-UTF8: verify this is the case in std::string, assert
138 if ( pos
+ len
> length() )
139 len
= length() - pos
;
141 *implLen
= (i
+ len
).impl() - i
.impl();
146 #endif // wxUSE_UNICODE_UTF8
148 // ----------------------------------------------------------------------------
149 // wxCStrData converted strings caching
150 // ----------------------------------------------------------------------------
152 // FIXME-UTF8: temporarily disabled because it doesn't work with global
153 // string objects; re-enable after fixing this bug and benchmarking
154 // performance to see if using a hash is a good idea at all
157 // For backward compatibility reasons, it must be possible to assign the value
158 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
159 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
160 // because the memory would be freed immediately, but it has to be valid as long
161 // as the string is not modified, so that code like this still works:
163 // const wxChar *s = str.c_str();
164 // while ( s ) { ... }
166 // FIXME-UTF8: not thread safe!
167 // FIXME-UTF8: we currently clear the cached conversion only when the string is
168 // destroyed, but we should do it when the string is modified, to
169 // keep memory usage down
170 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
171 // invalidated the cache on every change, we could keep the previous
173 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
174 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
177 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
179 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
180 if ( i
!= hash
.end() )
188 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
189 // so we have to use wxString* here and const-cast when used
190 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
191 wxStringCharConversionCache
);
192 static wxStringCharConversionCache gs_stringsCharCache
;
194 const char* wxCStrData::AsChar() const
196 // remove previously cache value, if any (see FIXMEs above):
197 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
199 // convert the string and keep it:
200 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
201 m_str
->mb_str().release();
205 #endif // wxUSE_UNICODE
207 #if !wxUSE_UNICODE_WCHAR
208 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
209 wxStringWCharConversionCache
);
210 static wxStringWCharConversionCache gs_stringsWCharCache
;
212 const wchar_t* wxCStrData::AsWChar() const
214 // remove previously cache value, if any (see FIXMEs above):
215 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
217 // convert the string and keep it:
218 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
219 m_str
->wc_str().release();
223 #endif // !wxUSE_UNICODE_WCHAR
225 wxString::~wxString()
228 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
229 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
231 #if !wxUSE_UNICODE_WCHAR
232 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
237 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
238 const char* wxCStrData::AsChar() const
240 #if wxUSE_UNICODE_UTF8
241 if ( wxLocaleIsUtf8
)
244 // under non-UTF8 locales, we have to convert the internal UTF-8
245 // representation using wxConvLibc and cache the result
247 wxString
*str
= wxConstCast(m_str
, wxString
);
249 // convert the string:
251 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
252 // have it) but it's unfortunately not obvious to implement
253 // because we don't know how big buffer do we need for the
254 // given string length (in case of multibyte encodings, e.g.
255 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
257 // One idea would be to store more than just m_convertedToChar
258 // in wxString: then we could record the length of the string
259 // which was converted the last time and try to reuse the same
260 // buffer if the current length is not greater than it (this
261 // could still fail because string could have been modified in
262 // place but it would work most of the time, so we'd do it and
263 // only allocate the new buffer if in-place conversion returned
264 // an error). We could also store a bit saying if the string
265 // was modified since the last conversion (and update it in all
266 // operation modifying the string, of course) to avoid unneeded
267 // consequential conversions. But both of these ideas require
268 // adding more fields to wxString and require profiling results
269 // to be sure that we really gain enough from them to justify
271 wxCharBuffer
buf(str
->mb_str());
273 // if it failed, return empty string and not NULL to avoid crashes in code
274 // written with either wxWidgets 2 wxString or std::string behaviour in
275 // mind: neither of them ever returns NULL and so we shouldn't neither
279 if ( str
->m_convertedToChar
&&
280 strlen(buf
) == strlen(str
->m_convertedToChar
) )
282 // keep the same buffer for as long as possible, so that several calls
283 // to c_str() in a row still work:
284 strcpy(str
->m_convertedToChar
, buf
);
288 str
->m_convertedToChar
= buf
.release();
292 return str
->m_convertedToChar
+ m_offset
;
294 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
296 #if !wxUSE_UNICODE_WCHAR
297 const wchar_t* wxCStrData::AsWChar() const
299 wxString
*str
= wxConstCast(m_str
, wxString
);
301 // convert the string:
302 wxWCharBuffer
buf(str
->wc_str());
304 // notice that here, unlike above in AsChar(), conversion can't fail as our
305 // internal UTF-8 is always well-formed -- or the string was corrupted and
306 // all bets are off anyhow
308 // FIXME-UTF8: do the conversion in-place in the existing buffer
309 if ( str
->m_convertedToWChar
&&
310 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
312 // keep the same buffer for as long as possible, so that several calls
313 // to c_str() in a row still work:
314 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
318 str
->m_convertedToWChar
= buf
.release();
322 return str
->m_convertedToWChar
+ m_offset
;
324 #endif // !wxUSE_UNICODE_WCHAR
326 // ===========================================================================
327 // wxString class core
328 // ===========================================================================
330 // ---------------------------------------------------------------------------
331 // construction and conversion
332 // ---------------------------------------------------------------------------
334 #if wxUSE_UNICODE_WCHAR
336 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
337 const wxMBConv
& conv
)
340 if ( !psz
|| nLength
== 0 )
341 return SubstrBufFromMB(L
"", 0);
343 if ( nLength
== npos
)
347 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
349 return SubstrBufFromMB(_T(""), 0);
351 return SubstrBufFromMB(wcBuf
, wcLen
);
353 #endif // wxUSE_UNICODE_WCHAR
355 #if wxUSE_UNICODE_UTF8
357 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
358 const wxMBConv
& conv
)
361 if ( !psz
|| nLength
== 0 )
362 return SubstrBufFromMB("", 0);
364 // if psz is already in UTF-8, we don't have to do the roundtrip to
365 // wchar_t* and back:
368 // we need to validate the input because UTF8 iterators assume valid
369 // UTF-8 sequence and psz may be invalid:
370 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
372 // we must pass the real string length to SubstrBufFromMB ctor
373 if ( nLength
== npos
)
374 nLength
= psz
? strlen(psz
) : 0;
375 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz
), nLength
);
377 // else: do the roundtrip through wchar_t*
380 if ( nLength
== npos
)
383 // first convert to wide string:
385 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
387 return SubstrBufFromMB("", 0);
389 // and then to UTF-8:
390 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
391 // widechar -> UTF-8 conversion isn't supposed to ever fail:
392 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
396 #endif // wxUSE_UNICODE_UTF8
398 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
400 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
401 const wxMBConv
& conv
)
404 if ( !pwz
|| nLength
== 0 )
405 return SubstrBufFromWC("", 0);
407 if ( nLength
== npos
)
411 wxCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
413 return SubstrBufFromWC("", 0);
415 return SubstrBufFromWC(mbBuf
, mbLen
);
417 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
420 #if wxUSE_UNICODE_WCHAR
422 //Convert wxString in Unicode mode to a multi-byte string
423 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
425 return conv
.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL
);
428 #elif wxUSE_UNICODE_UTF8
430 const wxWCharBuffer
wxString::wc_str() const
432 return wxMBConvStrictUTF8().cMB2WC
435 m_impl
.length() + 1, // size, not length
440 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
443 return wxCharBuffer::CreateNonOwned(m_impl
.c_str());
445 // FIXME-UTF8: use wc_str() here once we have buffers with length
448 wxWCharBuffer
wcBuf(wxMBConvStrictUTF8().cMB2WC
451 m_impl
.length() + 1, // size
455 return wxCharBuffer("");
457 return conv
.cWC2MB(wcBuf
, wcLen
+1, NULL
);
462 //Converts this string to a wide character string if unicode
463 //mode is not enabled and wxUSE_WCHAR_T is enabled
464 const wxWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
466 return conv
.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL
);
469 #endif // Unicode/ANSI
471 // shrink to minimal size (releasing extra memory)
472 bool wxString::Shrink()
474 wxString
tmp(begin(), end());
476 return tmp
.length() == length();
479 // deprecated compatibility code:
480 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
481 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
483 return DoGetWriteBuf(nLen
);
486 void wxString::UngetWriteBuf()
491 void wxString::UngetWriteBuf(size_t nLen
)
493 DoUngetWriteBuf(nLen
);
495 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
498 // ---------------------------------------------------------------------------
500 // ---------------------------------------------------------------------------
502 // all functions are inline in string.h
504 // ---------------------------------------------------------------------------
505 // concatenation operators
506 // ---------------------------------------------------------------------------
509 * concatenation functions come in 5 flavours:
511 * char + string and string + char
512 * C str + string and string + C str
515 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
517 #if !wxUSE_STL_BASED_WXSTRING
518 wxASSERT( str1
.IsValid() );
519 wxASSERT( str2
.IsValid() );
528 wxString
operator+(const wxString
& str
, wxUniChar ch
)
530 #if !wxUSE_STL_BASED_WXSTRING
531 wxASSERT( str
.IsValid() );
540 wxString
operator+(wxUniChar ch
, const wxString
& str
)
542 #if !wxUSE_STL_BASED_WXSTRING
543 wxASSERT( str
.IsValid() );
552 wxString
operator+(const wxString
& str
, const char *psz
)
554 #if !wxUSE_STL_BASED_WXSTRING
555 wxASSERT( str
.IsValid() );
559 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
560 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
568 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
570 #if !wxUSE_STL_BASED_WXSTRING
571 wxASSERT( str
.IsValid() );
575 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
576 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
584 wxString
operator+(const char *psz
, const wxString
& str
)
586 #if !wxUSE_STL_BASED_WXSTRING
587 wxASSERT( str
.IsValid() );
591 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
592 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
600 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
602 #if !wxUSE_STL_BASED_WXSTRING
603 wxASSERT( str
.IsValid() );
607 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
608 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
616 // ---------------------------------------------------------------------------
618 // ---------------------------------------------------------------------------
620 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
622 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
623 : wxToupper(GetChar(0u)) == wxToupper(c
));
626 #ifdef HAVE_STD_STRING_COMPARE
628 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
629 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
630 // sort strings in characters code point order by sorting the byte sequence
631 // in byte values order (i.e. what strcmp() and memcmp() do).
633 int wxString::compare(const wxString
& str
) const
635 return m_impl
.compare(str
.m_impl
);
638 int wxString::compare(size_t nStart
, size_t nLen
,
639 const wxString
& str
) const
642 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
643 return m_impl
.compare(pos
, len
, str
.m_impl
);
646 int wxString::compare(size_t nStart
, size_t nLen
,
648 size_t nStart2
, size_t nLen2
) const
651 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
654 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
656 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
659 int wxString::compare(const char* sz
) const
661 return m_impl
.compare(ImplStr(sz
));
664 int wxString::compare(const wchar_t* sz
) const
666 return m_impl
.compare(ImplStr(sz
));
669 int wxString::compare(size_t nStart
, size_t nLen
,
670 const char* sz
, size_t nCount
) const
673 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
675 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
677 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
680 int wxString::compare(size_t nStart
, size_t nLen
,
681 const wchar_t* sz
, size_t nCount
) const
684 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
686 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
688 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
691 #else // !HAVE_STD_STRING_COMPARE
693 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
694 const wxStringCharType
* s2
, size_t l2
)
697 return wxStringMemcmp(s1
, s2
, l1
);
700 int ret
= wxStringMemcmp(s1
, s2
, l1
);
701 return ret
== 0 ? -1 : ret
;
705 int ret
= wxStringMemcmp(s1
, s2
, l2
);
706 return ret
== 0 ? +1 : ret
;
710 int wxString::compare(const wxString
& str
) const
712 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
713 str
.m_impl
.data(), str
.m_impl
.length());
716 int wxString::compare(size_t nStart
, size_t nLen
,
717 const wxString
& str
) const
719 wxASSERT(nStart
<= length());
720 size_type strLen
= length() - nStart
;
721 nLen
= strLen
< nLen
? strLen
: nLen
;
724 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
726 return ::wxDoCmp(m_impl
.data() + pos
, len
,
727 str
.m_impl
.data(), str
.m_impl
.length());
730 int wxString::compare(size_t nStart
, size_t nLen
,
732 size_t nStart2
, size_t nLen2
) const
734 wxASSERT(nStart
<= length());
735 wxASSERT(nStart2
<= str
.length());
736 size_type strLen
= length() - nStart
,
737 strLen2
= str
.length() - nStart2
;
738 nLen
= strLen
< nLen
? strLen
: nLen
;
739 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
742 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
744 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
746 return ::wxDoCmp(m_impl
.data() + pos
, len
,
747 str
.m_impl
.data() + pos2
, len2
);
750 int wxString::compare(const char* sz
) const
752 SubstrBufFromMB
str(ImplStr(sz
, npos
));
753 if ( str
.len
== npos
)
754 str
.len
= wxStringStrlen(str
.data
);
755 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
758 int wxString::compare(const wchar_t* sz
) const
760 SubstrBufFromWC
str(ImplStr(sz
, npos
));
761 if ( str
.len
== npos
)
762 str
.len
= wxStringStrlen(str
.data
);
763 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
766 int wxString::compare(size_t nStart
, size_t nLen
,
767 const char* sz
, size_t nCount
) const
769 wxASSERT(nStart
<= length());
770 size_type strLen
= length() - nStart
;
771 nLen
= strLen
< nLen
? strLen
: nLen
;
774 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
776 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
777 if ( str
.len
== npos
)
778 str
.len
= wxStringStrlen(str
.data
);
780 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
783 int wxString::compare(size_t nStart
, size_t nLen
,
784 const wchar_t* sz
, size_t nCount
) const
786 wxASSERT(nStart
<= length());
787 size_type strLen
= length() - nStart
;
788 nLen
= strLen
< nLen
? strLen
: nLen
;
791 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
793 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
794 if ( str
.len
== npos
)
795 str
.len
= wxStringStrlen(str
.data
);
797 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
800 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
803 // ---------------------------------------------------------------------------
804 // find_{first,last}_[not]_of functions
805 // ---------------------------------------------------------------------------
807 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
809 // NB: All these functions are implemented with the argument being wxChar*,
810 // i.e. widechar string in any Unicode build, even though native string
811 // representation is char* in the UTF-8 build. This is because we couldn't
812 // use memchr() to determine if a character is in a set encoded as UTF-8.
814 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
816 return find_first_of(sz
, nStart
, wxStrlen(sz
));
819 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
821 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
824 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
826 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
829 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
831 if ( wxTmemchr(sz
, *i
, n
) )
838 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
840 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
843 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
845 if ( !wxTmemchr(sz
, *i
, n
) )
853 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
855 return find_last_of(sz
, nStart
, wxStrlen(sz
));
858 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
860 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
863 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
865 size_t len
= length();
867 if ( nStart
== npos
)
873 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
877 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
878 i
!= rend(); --idx
, ++i
)
880 if ( wxTmemchr(sz
, *i
, n
) )
887 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
889 size_t len
= length();
891 if ( nStart
== npos
)
897 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
901 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
902 i
!= rend(); --idx
, ++i
)
904 if ( !wxTmemchr(sz
, *i
, n
) )
911 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
913 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
916 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
925 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
927 size_t len
= length();
929 if ( nStart
== npos
)
935 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
939 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
940 i
!= rend(); --idx
, ++i
)
949 // the functions above were implemented for wchar_t* arguments in Unicode
950 // build and char* in ANSI build; below are implementations for the other
953 #define wxOtherCharType char
954 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
956 #define wxOtherCharType wchar_t
957 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
960 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
961 { return find_first_of(STRCONV(sz
), nStart
); }
963 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
965 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
966 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
967 { return find_last_of(STRCONV(sz
), nStart
); }
968 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
970 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
971 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
972 { return find_first_not_of(STRCONV(sz
), nStart
); }
973 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
975 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
976 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
977 { return find_last_not_of(STRCONV(sz
), nStart
); }
978 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
980 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
982 #undef wxOtherCharType
985 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
987 // ===========================================================================
988 // other common string functions
989 // ===========================================================================
991 int wxString::CmpNoCase(const wxString
& s
) const
993 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
995 const_iterator i1
= begin();
996 const_iterator end1
= end();
997 const_iterator i2
= s
.begin();
998 const_iterator end2
= s
.end();
1000 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1002 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1003 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1004 if ( lower1
!= lower2
)
1005 return lower1
< lower2
? -1 : 1;
1008 size_t len1
= length();
1009 size_t len2
= s
.length();
1013 else if ( len1
> len2
)
1022 #ifndef __SCHAR_MAX__
1023 #define __SCHAR_MAX__ 127
1027 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1029 if (!ascii
|| len
== 0)
1030 return wxEmptyString
;
1035 wxStringInternalBuffer
buf(res
, len
);
1036 wxStringCharType
*dest
= buf
;
1038 for ( ; len
> 0; --len
)
1040 unsigned char c
= (unsigned char)*ascii
++;
1041 wxASSERT_MSG( c
< 0x80,
1042 _T("Non-ASCII value passed to FromAscii().") );
1044 *dest
++ = (wchar_t)c
;
1051 wxString
wxString::FromAscii(const char *ascii
)
1053 return FromAscii(ascii
, wxStrlen(ascii
));
1056 wxString
wxString::FromAscii(char ascii
)
1058 // What do we do with '\0' ?
1060 unsigned char c
= (unsigned char)ascii
;
1062 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
1064 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1065 return wxString(wxUniChar((wchar_t)c
));
1068 const wxCharBuffer
wxString::ToAscii() const
1070 // this will allocate enough space for the terminating NUL too
1071 wxCharBuffer
buffer(length());
1072 char *dest
= buffer
.data();
1074 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1077 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1078 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1080 // the output string can't have embedded NULs anyhow, so we can safely
1081 // stop at first of them even if we do have any
1089 #endif // wxUSE_UNICODE
1091 // extract string of length nCount starting at nFirst
1092 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1094 size_t nLen
= length();
1096 // default value of nCount is npos and means "till the end"
1097 if ( nCount
== npos
)
1099 nCount
= nLen
- nFirst
;
1102 // out-of-bounds requests return sensible things
1103 if ( nFirst
+ nCount
> nLen
)
1105 nCount
= nLen
- nFirst
;
1108 if ( nFirst
> nLen
)
1110 // AllocCopy() will return empty string
1111 return wxEmptyString
;
1114 wxString
dest(*this, nFirst
, nCount
);
1115 if ( dest
.length() != nCount
)
1117 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1123 // check that the string starts with prefix and return the rest of the string
1124 // in the provided pointer if it is not NULL, otherwise return false
1125 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1127 if ( compare(0, prefix
.length(), prefix
) != 0 )
1132 // put the rest of the string into provided pointer
1133 rest
->assign(*this, prefix
.length(), npos
);
1140 // check that the string ends with suffix and return the rest of it in the
1141 // provided pointer if it is not NULL, otherwise return false
1142 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1144 int start
= length() - suffix
.length();
1146 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1151 // put the rest of the string into provided pointer
1152 rest
->assign(*this, 0, start
);
1159 // extract nCount last (rightmost) characters
1160 wxString
wxString::Right(size_t nCount
) const
1162 if ( nCount
> length() )
1165 wxString
dest(*this, length() - nCount
, nCount
);
1166 if ( dest
.length() != nCount
) {
1167 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1172 // get all characters after the last occurence of ch
1173 // (returns the whole string if ch not found)
1174 wxString
wxString::AfterLast(wxUniChar ch
) const
1177 int iPos
= Find(ch
, true);
1178 if ( iPos
== wxNOT_FOUND
)
1181 str
= wx_str() + iPos
+ 1;
1186 // extract nCount first (leftmost) characters
1187 wxString
wxString::Left(size_t nCount
) const
1189 if ( nCount
> length() )
1192 wxString
dest(*this, 0, nCount
);
1193 if ( dest
.length() != nCount
) {
1194 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1199 // get all characters before the first occurence of ch
1200 // (returns the whole string if ch not found)
1201 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1203 int iPos
= Find(ch
);
1204 if ( iPos
== wxNOT_FOUND
) iPos
= length();
1205 return wxString(*this, 0, iPos
);
1208 /// get all characters before the last occurence of ch
1209 /// (returns empty string if ch not found)
1210 wxString
wxString::BeforeLast(wxUniChar ch
) const
1213 int iPos
= Find(ch
, true);
1214 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1215 str
= wxString(c_str(), iPos
);
1220 /// get all characters after the first occurence of ch
1221 /// (returns empty string if ch not found)
1222 wxString
wxString::AfterFirst(wxUniChar ch
) const
1225 int iPos
= Find(ch
);
1226 if ( iPos
!= wxNOT_FOUND
)
1227 str
= wx_str() + iPos
+ 1;
1232 // replace first (or all) occurences of some substring with another one
1233 size_t wxString::Replace(const wxString
& strOld
,
1234 const wxString
& strNew
, bool bReplaceAll
)
1236 // if we tried to replace an empty string we'd enter an infinite loop below
1237 wxCHECK_MSG( !strOld
.empty(), 0,
1238 _T("wxString::Replace(): invalid parameter") );
1240 size_t uiCount
= 0; // count of replacements made
1242 // optimize the special common case: replacement of one character by
1243 // another one (in UTF-8 case we can only do this for ASCII characters)
1245 // benchmarks show that this special version is around 3 times faster
1246 // (depending on the proportion of matching characters and UTF-8/wchar_t
1248 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1250 const wxStringCharType chOld
= strOld
.m_impl
[0],
1251 chNew
= strNew
.m_impl
[0];
1253 // this loop is the simplified version of the one below
1254 for ( size_t pos
= 0; ; )
1256 pos
= m_impl
.find(chOld
, pos
);
1260 m_impl
[pos
++] = chNew
;
1268 else // general case
1270 const size_t uiOldLen
= strOld
.m_impl
.length();
1271 const size_t uiNewLen
= strNew
.m_impl
.length();
1273 for ( size_t pos
= 0; ; )
1275 pos
= m_impl
.find(strOld
.m_impl
, pos
);
1279 // replace this occurrence of the old string with the new one
1280 m_impl
.replace(pos
, uiOldLen
, strNew
.m_impl
);
1282 // move up pos past the string that was replaced
1285 // increase replace count
1288 // stop after the first one?
1297 bool wxString::IsAscii() const
1299 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1301 if ( !(*i
).IsAscii() )
1308 bool wxString::IsWord() const
1310 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1312 if ( !wxIsalpha(*i
) )
1319 bool wxString::IsNumber() const
1324 const_iterator i
= begin();
1326 if ( *i
== _T('-') || *i
== _T('+') )
1329 for ( ; i
!= end(); ++i
)
1331 if ( !wxIsdigit(*i
) )
1338 wxString
wxString::Strip(stripType w
) const
1341 if ( w
& leading
) s
.Trim(false);
1342 if ( w
& trailing
) s
.Trim(true);
1346 // ---------------------------------------------------------------------------
1348 // ---------------------------------------------------------------------------
1350 wxString
& wxString::MakeUpper()
1352 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1353 *it
= (wxChar
)wxToupper(*it
);
1358 wxString
& wxString::MakeLower()
1360 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1361 *it
= (wxChar
)wxTolower(*it
);
1366 wxString
& wxString::MakeCapitalized()
1368 const iterator en
= end();
1369 iterator it
= begin();
1372 *it
= (wxChar
)wxToupper(*it
);
1373 for ( ++it
; it
!= en
; ++it
)
1374 *it
= (wxChar
)wxTolower(*it
);
1380 // ---------------------------------------------------------------------------
1381 // trimming and padding
1382 // ---------------------------------------------------------------------------
1384 // some compilers (VC++ 6.0 not to name them) return true for a call to
1385 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1386 // to live with this by checking that the character is a 7 bit one - even if
1387 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1388 // space-like symbols somewhere except in the first 128 chars), it is arguably
1389 // still better than trimming away accented letters
1390 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1392 // trims spaces (in the sense of isspace) from left or right side
1393 wxString
& wxString::Trim(bool bFromRight
)
1395 // first check if we're going to modify the string at all
1398 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1399 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1405 // find last non-space character
1406 reverse_iterator psz
= rbegin();
1407 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1410 // truncate at trailing space start
1411 erase(psz
.base(), end());
1415 // find first non-space character
1416 iterator psz
= begin();
1417 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1420 // fix up data and length
1421 erase(begin(), psz
);
1428 // adds nCount characters chPad to the string from either side
1429 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1431 wxString
s(chPad
, nCount
);
1444 // truncate the string
1445 wxString
& wxString::Truncate(size_t uiLen
)
1447 if ( uiLen
< length() )
1449 erase(begin() + uiLen
, end());
1451 //else: nothing to do, string is already short enough
1456 // ---------------------------------------------------------------------------
1457 // finding (return wxNOT_FOUND if not found and index otherwise)
1458 // ---------------------------------------------------------------------------
1461 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1463 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1465 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1468 // ----------------------------------------------------------------------------
1469 // conversion to numbers
1470 // ----------------------------------------------------------------------------
1472 // The implementation of all the functions below is exactly the same so factor
1473 // it out. Note that number extraction works correctly on UTF-8 strings, so
1474 // we can use wxStringCharType and wx_str() for maximum efficiency.
1477 #define DO_IF_NOT_WINCE(x) x
1479 #define DO_IF_NOT_WINCE(x)
1482 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1483 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1484 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1486 DO_IF_NOT_WINCE( errno = 0; ) \
1488 const wxStringCharType *start = wx_str(); \
1489 wxStringCharType *end; \
1490 T val = func(start, &end, base); \
1492 /* return true only if scan was stopped by the terminating NUL and */ \
1493 /* if the string was not empty to start with and no under/overflow */ \
1495 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1500 bool wxString::ToLong(long *pVal
, int base
) const
1502 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtol
, long);
1505 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1507 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoul
, unsigned long);
1510 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1512 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoll
, wxLongLong_t
);
1515 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1517 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoull
, wxULongLong_t
);
1520 bool wxString::ToDouble(double *pVal
) const
1522 wxCHECK_MSG( pVal
, false, _T("NULL output pointer") );
1524 DO_IF_NOT_WINCE( errno
= 0; )
1526 const wxChar
*start
= c_str();
1528 double val
= wxStrtod(start
, &end
);
1530 // return true only if scan was stopped by the terminating NUL and if the
1531 // string was not empty to start with and no under/overflow occurred
1532 if ( *end
|| end
== start
DO_IF_NOT_WINCE(|| errno
== ERANGE
) )
1540 // ---------------------------------------------------------------------------
1542 // ---------------------------------------------------------------------------
1544 #if !wxUSE_UTF8_LOCALE_ONLY
1546 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1547 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1549 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1553 va_start(argptr
, format
);
1556 s
.PrintfV(format
, argptr
);
1562 #endif // !wxUSE_UTF8_LOCALE_ONLY
1564 #if wxUSE_UNICODE_UTF8
1566 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1569 va_start(argptr
, format
);
1572 s
.PrintfV(format
, argptr
);
1578 #endif // wxUSE_UNICODE_UTF8
1581 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1584 s
.PrintfV(format
, argptr
);
1588 #if !wxUSE_UTF8_LOCALE_ONLY
1589 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1590 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1592 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1596 va_start(argptr
, format
);
1598 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1599 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1600 // because it's the only cast that works safely for downcasting when
1601 // multiple inheritance is used:
1602 wxString
*str
= static_cast<wxString
*>(this);
1604 wxString
*str
= this;
1607 int iLen
= str
->PrintfV(format
, argptr
);
1613 #endif // !wxUSE_UTF8_LOCALE_ONLY
1615 #if wxUSE_UNICODE_UTF8
1616 int wxString::DoPrintfUtf8(const char *format
, ...)
1619 va_start(argptr
, format
);
1621 int iLen
= PrintfV(format
, argptr
);
1627 #endif // wxUSE_UNICODE_UTF8
1630 Uses wxVsnprintf and places the result into the this string.
1632 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1633 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1634 the ISO C99 (and thus SUSv3) standard the return value for the case of
1635 an undersized buffer is inconsistent. For conforming vsnprintf
1636 implementations the function must return the number of characters that
1637 would have been printed had the buffer been large enough. For conforming
1638 vswprintf implementations the function must return a negative number
1641 What vswprintf sets errno to is undefined but Darwin seems to set it to
1642 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1643 those are defined in the standard and backed up by several conformance
1644 statements. Note that ENOMEM mentioned in the manual page does not
1645 apply to swprintf, only wprintf and fwprintf.
1647 Official manual page:
1648 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1650 Some conformance statements (AIX, Solaris):
1651 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1652 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1654 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1655 EILSEQ and EINVAL are specifically defined to mean the error is other than
1656 an undersized buffer and no other errno are defined we treat those two
1657 as meaning hard errors and everything else gets the old behavior which
1658 is to keep looping and increasing buffer size until the function succeeds.
1660 In practice it's impossible to determine before compilation which behavior
1661 may be used. The vswprintf function may have vsnprintf-like behavior or
1662 vice-versa. Behavior detected on one release can theoretically change
1663 with an updated release. Not to mention that configure testing for it
1664 would require the test to be run on the host system, not the build system
1665 which makes cross compilation difficult. Therefore, we make no assumptions
1666 about behavior and try our best to handle every known case, including the
1667 case where wxVsnprintf returns a negative number and fails to set errno.
1669 There is yet one more non-standard implementation and that is our own.
1670 Fortunately, that can be detected at compile-time.
1672 On top of all that, ISO C99 explicitly defines snprintf to write a null
1673 character to the last position of the specified buffer. That would be at
1674 at the given buffer size minus 1. It is supposed to do this even if it
1675 turns out that the buffer is sized too small.
1677 Darwin (tested on 10.5) follows the C99 behavior exactly.
1679 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1680 errno even when it fails. However, it only seems to ever fail due
1681 to an undersized buffer.
1683 #if wxUSE_UNICODE_UTF8
1684 template<typename BufferType
>
1686 // we only need one version in non-UTF8 builds and at least two Windows
1687 // compilers have problems with this function template, so use just one
1688 // normal function here
1690 static int DoStringPrintfV(wxString
& str
,
1691 const wxString
& format
, va_list argptr
)
1697 #if wxUSE_UNICODE_UTF8
1698 BufferType
tmp(str
, size
+ 1);
1699 typename
BufferType::CharType
*buf
= tmp
;
1701 wxStringBuffer
tmp(str
, size
+ 1);
1709 // in UTF-8 build, leaving uninitialized junk in the buffer
1710 // could result in invalid non-empty UTF-8 string, so just
1711 // reset the string to empty on failure:
1716 // wxVsnprintf() may modify the original arg pointer, so pass it
1719 wxVaCopy(argptrcopy
, argptr
);
1722 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1725 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1728 // some implementations of vsnprintf() don't NUL terminate
1729 // the string if there is not enough space for it so
1730 // always do it manually
1731 // FIXME: This really seems to be the wrong and would be an off-by-one
1732 // bug except the code above allocates an extra character.
1733 buf
[size
] = _T('\0');
1735 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1736 // total number of characters which would have been written if the
1737 // buffer were large enough (newer standards such as Unix98)
1740 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1741 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1742 // is true if *both* of them use our own implementation,
1743 // otherwise we can't be sure
1744 #if wxUSE_WXVSNPRINTF
1745 // we know that our own implementation of wxVsnprintf() returns -1
1746 // only for a format error - thus there's something wrong with
1747 // the user's format string
1750 #else // possibly using system version
1751 // assume it only returns error if there is not enough space, but
1752 // as we don't know how much we need, double the current size of
1755 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1756 // If errno was set to one of the two well-known hard errors
1757 // then fail immediately to avoid an infinite loop.
1760 #endif // __WXWINCE__
1761 // still not enough, as we don't know how much we need, double the
1762 // current size of the buffer
1764 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1766 else if ( len
>= size
)
1768 #if wxUSE_WXVSNPRINTF
1769 // we know that our own implementation of wxVsnprintf() returns
1770 // size+1 when there's not enough space but that's not the size
1771 // of the required buffer!
1772 size
*= 2; // so we just double the current size of the buffer
1774 // some vsnprintf() implementations NUL-terminate the buffer and
1775 // some don't in len == size case, to be safe always add 1
1776 // FIXME: I don't quite understand this comment. The vsnprintf
1777 // function is specifically defined to return the number of
1778 // characters printed not including the null terminator.
1779 // So OF COURSE you need to add 1 to get the right buffer size.
1780 // The following line is definitely correct, no question.
1784 else // ok, there was enough space
1790 // we could have overshot
1793 return str
.length();
1796 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
1798 #if wxUSE_UNICODE_UTF8
1799 #if wxUSE_STL_BASED_WXSTRING
1800 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
1802 typedef wxStringInternalBuffer Utf8Buffer
;
1806 #if wxUSE_UTF8_LOCALE_ONLY
1807 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1809 #if wxUSE_UNICODE_UTF8
1810 if ( wxLocaleIsUtf8
)
1811 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1814 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
1816 return DoStringPrintfV(*this, format
, argptr
);
1817 #endif // UTF8/WCHAR
1821 // ----------------------------------------------------------------------------
1822 // misc other operations
1823 // ----------------------------------------------------------------------------
1825 // returns true if the string matches the pattern which may contain '*' and
1826 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1828 bool wxString::Matches(const wxString
& mask
) const
1830 // I disable this code as it doesn't seem to be faster (in fact, it seems
1831 // to be much slower) than the old, hand-written code below and using it
1832 // here requires always linking with libregex even if the user code doesn't
1834 #if 0 // wxUSE_REGEX
1835 // first translate the shell-like mask into a regex
1837 pattern
.reserve(wxStrlen(pszMask
));
1849 pattern
+= _T(".*");
1860 // these characters are special in a RE, quote them
1861 // (however note that we don't quote '[' and ']' to allow
1862 // using them for Unix shell like matching)
1863 pattern
+= _T('\\');
1867 pattern
+= *pszMask
;
1875 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
1876 #else // !wxUSE_REGEX
1877 // TODO: this is, of course, awfully inefficient...
1879 // FIXME-UTF8: implement using iterators, remove #if
1880 #if wxUSE_UNICODE_UTF8
1881 wxWCharBuffer maskBuf
= mask
.wc_str();
1882 wxWCharBuffer txtBuf
= wc_str();
1883 const wxChar
*pszMask
= maskBuf
.data();
1884 const wxChar
*pszTxt
= txtBuf
.data();
1886 const wxChar
*pszMask
= mask
.wx_str();
1887 // the char currently being checked
1888 const wxChar
*pszTxt
= wx_str();
1891 // the last location where '*' matched
1892 const wxChar
*pszLastStarInText
= NULL
;
1893 const wxChar
*pszLastStarInMask
= NULL
;
1896 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
1897 switch ( *pszMask
) {
1899 if ( *pszTxt
== wxT('\0') )
1902 // pszTxt and pszMask will be incremented in the loop statement
1908 // remember where we started to be able to backtrack later
1909 pszLastStarInText
= pszTxt
;
1910 pszLastStarInMask
= pszMask
;
1912 // ignore special chars immediately following this one
1913 // (should this be an error?)
1914 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
1917 // if there is nothing more, match
1918 if ( *pszMask
== wxT('\0') )
1921 // are there any other metacharacters in the mask?
1923 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
1925 if ( pEndMask
!= NULL
) {
1926 // we have to match the string between two metachars
1927 uiLenMask
= pEndMask
- pszMask
;
1930 // we have to match the remainder of the string
1931 uiLenMask
= wxStrlen(pszMask
);
1934 wxString
strToMatch(pszMask
, uiLenMask
);
1935 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
1936 if ( pMatch
== NULL
)
1939 // -1 to compensate "++" in the loop
1940 pszTxt
= pMatch
+ uiLenMask
- 1;
1941 pszMask
+= uiLenMask
- 1;
1946 if ( *pszMask
!= *pszTxt
)
1952 // match only if nothing left
1953 if ( *pszTxt
== wxT('\0') )
1956 // if we failed to match, backtrack if we can
1957 if ( pszLastStarInText
) {
1958 pszTxt
= pszLastStarInText
+ 1;
1959 pszMask
= pszLastStarInMask
;
1961 pszLastStarInText
= NULL
;
1963 // don't bother resetting pszLastStarInMask, it's unnecessary
1969 #endif // wxUSE_REGEX/!wxUSE_REGEX
1972 // Count the number of chars
1973 int wxString::Freq(wxUniChar ch
) const
1976 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1984 // ----------------------------------------------------------------------------
1985 // wxUTF8StringBuffer
1986 // ----------------------------------------------------------------------------
1988 #if wxUSE_UNICODE_WCHAR
1989 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1991 wxMBConvStrictUTF8 conv
;
1992 size_t wlen
= conv
.ToWChar(NULL
, 0, m_buf
);
1993 wxCHECK_RET( wlen
!= wxCONV_FAILED
, "invalid UTF-8 data in string buffer?" );
1995 wxStringInternalBuffer
wbuf(m_str
, wlen
);
1996 conv
.ToWChar(wbuf
, wlen
, m_buf
);
1999 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2001 wxCHECK_RET(m_lenSet
, "length not set");
2003 wxMBConvStrictUTF8 conv
;
2004 size_t wlen
= conv
.ToWChar(NULL
, 0, m_buf
, m_len
);
2005 wxCHECK_RET( wlen
!= wxCONV_FAILED
, "invalid UTF-8 data in string buffer?" );
2007 wxStringInternalBufferLength
wbuf(m_str
, wlen
);
2008 conv
.ToWChar(wbuf
, wlen
, m_buf
, m_len
);
2009 wbuf
.SetLength(wlen
);
2011 #endif // wxUSE_UNICODE_WCHAR