1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
42 #include "wx/hashmap.h"
44 // string handling functions used by wxString:
45 #if wxUSE_UNICODE_UTF8
46 #define wxStringMemcpy memcpy
47 #define wxStringMemcmp memcmp
48 #define wxStringMemchr memchr
49 #define wxStringStrlen strlen
51 #define wxStringMemcpy wxTmemcpy
52 #define wxStringMemcmp wxTmemcmp
53 #define wxStringMemchr wxTmemchr
54 #define wxStringStrlen wxStrlen
58 // ---------------------------------------------------------------------------
59 // static class variables definition
60 // ---------------------------------------------------------------------------
62 //According to STL _must_ be a -1 size_t
63 const size_t wxString::npos
= (size_t) -1;
65 // ----------------------------------------------------------------------------
67 // ----------------------------------------------------------------------------
69 #if wxUSE_STD_IOSTREAM
73 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
75 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
76 return os
<< (const char *)str
.AsCharBuf();
78 return os
<< str
.AsInternal();
82 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
84 return os
<< str
.c_str();
87 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCharBuffer
& str
)
89 return os
<< str
.data();
93 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxWCharBuffer
& str
)
95 return os
<< str
.data();
99 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
101 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
103 return wos
<< str
.wc_str();
106 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
108 return wos
<< str
.AsWChar();
111 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxWCharBuffer
& str
)
113 return wos
<< str
.data();
116 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
118 #endif // wxUSE_STD_IOSTREAM
120 // ===========================================================================
121 // wxString class core
122 // ===========================================================================
124 #if wxUSE_UNICODE_UTF8
126 void wxString::PosLenToImpl(size_t pos
, size_t len
,
127 size_t *implPos
, size_t *implLen
) const
133 const_iterator i
= begin() + pos
;
134 *implPos
= wxStringImpl::const_iterator(i
.impl()) - m_impl
.begin();
139 // too large length is interpreted as "to the end of the string"
140 // FIXME-UTF8: verify this is the case in std::string, assert
142 if ( pos
+ len
> length() )
143 len
= length() - pos
;
145 *implLen
= (i
+ len
).impl() - i
.impl();
150 #endif // wxUSE_UNICODE_UTF8
152 // ----------------------------------------------------------------------------
153 // wxCStrData converted strings caching
154 // ----------------------------------------------------------------------------
156 // FIXME-UTF8: temporarily disabled because it doesn't work with global
157 // string objects; re-enable after fixing this bug and benchmarking
158 // performance to see if using a hash is a good idea at all
161 // For backward compatibility reasons, it must be possible to assign the value
162 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
163 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
164 // because the memory would be freed immediately, but it has to be valid as long
165 // as the string is not modified, so that code like this still works:
167 // const wxChar *s = str.c_str();
168 // while ( s ) { ... }
170 // FIXME-UTF8: not thread safe!
171 // FIXME-UTF8: we currently clear the cached conversion only when the string is
172 // destroyed, but we should do it when the string is modified, to
173 // keep memory usage down
174 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
175 // invalidated the cache on every change, we could keep the previous
177 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
178 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
181 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
183 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
184 if ( i
!= hash
.end() )
192 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
193 // so we have to use wxString* here and const-cast when used
194 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
195 wxStringCharConversionCache
);
196 static wxStringCharConversionCache gs_stringsCharCache
;
198 const char* wxCStrData::AsChar() const
200 // remove previously cache value, if any (see FIXMEs above):
201 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
203 // convert the string and keep it:
204 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
205 m_str
->mb_str().release();
209 #endif // wxUSE_UNICODE
211 #if !wxUSE_UNICODE_WCHAR
212 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
213 wxStringWCharConversionCache
);
214 static wxStringWCharConversionCache gs_stringsWCharCache
;
216 const wchar_t* wxCStrData::AsWChar() const
218 // remove previously cache value, if any (see FIXMEs above):
219 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
221 // convert the string and keep it:
222 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
223 m_str
->wc_str().release();
227 #endif // !wxUSE_UNICODE_WCHAR
229 wxString::~wxString()
232 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
233 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
235 #if !wxUSE_UNICODE_WCHAR
236 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
241 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
242 const char* wxCStrData::AsChar() const
244 #if wxUSE_UNICODE_UTF8
245 if ( wxLocaleIsUtf8
)
248 // under non-UTF8 locales, we have to convert the internal UTF-8
249 // representation using wxConvLibc and cache the result
251 wxString
*str
= wxConstCast(m_str
, wxString
);
253 // convert the string:
255 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
256 // have it) but it's unfortunately not obvious to implement
257 // because we don't know how big buffer do we need for the
258 // given string length (in case of multibyte encodings, e.g.
259 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
261 // One idea would be to store more than just m_convertedToChar
262 // in wxString: then we could record the length of the string
263 // which was converted the last time and try to reuse the same
264 // buffer if the current length is not greater than it (this
265 // could still fail because string could have been modified in
266 // place but it would work most of the time, so we'd do it and
267 // only allocate the new buffer if in-place conversion returned
268 // an error). We could also store a bit saying if the string
269 // was modified since the last conversion (and update it in all
270 // operation modifying the string, of course) to avoid unneeded
271 // consequential conversions. But both of these ideas require
272 // adding more fields to wxString and require profiling results
273 // to be sure that we really gain enough from them to justify
275 wxCharBuffer
buf(str
->mb_str());
277 // if it failed, return empty string and not NULL to avoid crashes in code
278 // written with either wxWidgets 2 wxString or std::string behaviour in
279 // mind: neither of them ever returns NULL and so we shouldn't neither
283 if ( str
->m_convertedToChar
&&
284 strlen(buf
) == strlen(str
->m_convertedToChar
) )
286 // keep the same buffer for as long as possible, so that several calls
287 // to c_str() in a row still work:
288 strcpy(str
->m_convertedToChar
, buf
);
292 str
->m_convertedToChar
= buf
.release();
296 return str
->m_convertedToChar
+ m_offset
;
298 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
300 #if !wxUSE_UNICODE_WCHAR
301 const wchar_t* wxCStrData::AsWChar() const
303 wxString
*str
= wxConstCast(m_str
, wxString
);
305 // convert the string:
306 wxWCharBuffer
buf(str
->wc_str());
308 // notice that here, unlike above in AsChar(), conversion can't fail as our
309 // internal UTF-8 is always well-formed -- or the string was corrupted and
310 // all bets are off anyhow
312 // FIXME-UTF8: do the conversion in-place in the existing buffer
313 if ( str
->m_convertedToWChar
&&
314 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
316 // keep the same buffer for as long as possible, so that several calls
317 // to c_str() in a row still work:
318 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
322 str
->m_convertedToWChar
= buf
.release();
326 return str
->m_convertedToWChar
+ m_offset
;
328 #endif // !wxUSE_UNICODE_WCHAR
330 // ===========================================================================
331 // wxString class core
332 // ===========================================================================
334 // ---------------------------------------------------------------------------
335 // construction and conversion
336 // ---------------------------------------------------------------------------
338 #if wxUSE_UNICODE_WCHAR
340 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
341 const wxMBConv
& conv
)
344 if ( !psz
|| nLength
== 0 )
345 return SubstrBufFromMB(L
"", 0);
347 if ( nLength
== npos
)
351 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
353 return SubstrBufFromMB(_T(""), 0);
355 return SubstrBufFromMB(wcBuf
, wcLen
);
357 #endif // wxUSE_UNICODE_WCHAR
359 #if wxUSE_UNICODE_UTF8
361 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
362 const wxMBConv
& conv
)
365 if ( !psz
|| nLength
== 0 )
366 return SubstrBufFromMB("", 0);
368 // if psz is already in UTF-8, we don't have to do the roundtrip to
369 // wchar_t* and back:
372 // we need to validate the input because UTF8 iterators assume valid
373 // UTF-8 sequence and psz may be invalid:
374 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
376 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz
), nLength
);
378 // else: do the roundtrip through wchar_t*
381 if ( nLength
== npos
)
384 // first convert to wide string:
386 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
388 return SubstrBufFromMB("", 0);
390 // and then to UTF-8:
391 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
392 // widechar -> UTF-8 conversion isn't supposed to ever fail:
393 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
397 #endif // wxUSE_UNICODE_UTF8
399 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
401 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
402 const wxMBConv
& conv
)
405 if ( !pwz
|| nLength
== 0 )
406 return SubstrBufFromWC("", 0);
408 if ( nLength
== npos
)
412 wxCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
414 return SubstrBufFromWC("", 0);
416 return SubstrBufFromWC(mbBuf
, mbLen
);
418 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
421 #if wxUSE_UNICODE_WCHAR
423 //Convert wxString in Unicode mode to a multi-byte string
424 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
426 return conv
.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL
);
429 #elif wxUSE_UNICODE_UTF8
431 const wxWCharBuffer
wxString::wc_str() const
433 return wxMBConvStrictUTF8().cMB2WC
436 m_impl
.length() + 1, // size, not length
441 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
444 return wxCharBuffer::CreateNonOwned(m_impl
.c_str());
446 // FIXME-UTF8: use wc_str() here once we have buffers with length
449 wxWCharBuffer
wcBuf(wxMBConvStrictUTF8().cMB2WC
452 m_impl
.length() + 1, // size
456 return wxCharBuffer("");
458 return conv
.cWC2MB(wcBuf
, wcLen
+1, NULL
);
463 //Converts this string to a wide character string if unicode
464 //mode is not enabled and wxUSE_WCHAR_T is enabled
465 const wxWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
467 return conv
.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL
);
470 #endif // Unicode/ANSI
472 // shrink to minimal size (releasing extra memory)
473 bool wxString::Shrink()
475 wxString
tmp(begin(), end());
477 return tmp
.length() == length();
480 // deprecated compatibility code:
481 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
482 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
484 return DoGetWriteBuf(nLen
);
487 void wxString::UngetWriteBuf()
492 void wxString::UngetWriteBuf(size_t nLen
)
494 DoUngetWriteBuf(nLen
);
496 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
499 // ---------------------------------------------------------------------------
501 // ---------------------------------------------------------------------------
503 // all functions are inline in string.h
505 // ---------------------------------------------------------------------------
506 // concatenation operators
507 // ---------------------------------------------------------------------------
510 * concatenation functions come in 5 flavours:
512 * char + string and string + char
513 * C str + string and string + C str
516 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
518 #if !wxUSE_STL_BASED_WXSTRING
519 wxASSERT( str1
.IsValid() );
520 wxASSERT( str2
.IsValid() );
529 wxString
operator+(const wxString
& str
, wxUniChar ch
)
531 #if !wxUSE_STL_BASED_WXSTRING
532 wxASSERT( str
.IsValid() );
541 wxString
operator+(wxUniChar ch
, const wxString
& str
)
543 #if !wxUSE_STL_BASED_WXSTRING
544 wxASSERT( str
.IsValid() );
553 wxString
operator+(const wxString
& str
, const char *psz
)
555 #if !wxUSE_STL_BASED_WXSTRING
556 wxASSERT( str
.IsValid() );
560 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
561 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
569 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
571 #if !wxUSE_STL_BASED_WXSTRING
572 wxASSERT( str
.IsValid() );
576 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
577 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
585 wxString
operator+(const char *psz
, const wxString
& str
)
587 #if !wxUSE_STL_BASED_WXSTRING
588 wxASSERT( str
.IsValid() );
592 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
593 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
601 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
603 #if !wxUSE_STL_BASED_WXSTRING
604 wxASSERT( str
.IsValid() );
608 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
609 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
617 // ---------------------------------------------------------------------------
619 // ---------------------------------------------------------------------------
621 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
623 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
624 : wxToupper(GetChar(0u)) == wxToupper(c
));
627 #ifdef HAVE_STD_STRING_COMPARE
629 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
630 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
631 // sort strings in characters code point order by sorting the byte sequence
632 // in byte values order (i.e. what strcmp() and memcmp() do).
634 int wxString::compare(const wxString
& str
) const
636 return m_impl
.compare(str
.m_impl
);
639 int wxString::compare(size_t nStart
, size_t nLen
,
640 const wxString
& str
) const
643 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
644 return m_impl
.compare(pos
, len
, str
.m_impl
);
647 int wxString::compare(size_t nStart
, size_t nLen
,
649 size_t nStart2
, size_t nLen2
) const
652 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
655 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
657 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
660 int wxString::compare(const char* sz
) const
662 return m_impl
.compare(ImplStr(sz
));
665 int wxString::compare(const wchar_t* sz
) const
667 return m_impl
.compare(ImplStr(sz
));
670 int wxString::compare(size_t nStart
, size_t nLen
,
671 const char* sz
, size_t nCount
) const
674 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
676 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
678 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
681 int wxString::compare(size_t nStart
, size_t nLen
,
682 const wchar_t* sz
, size_t nCount
) const
685 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
687 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
689 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
692 #else // !HAVE_STD_STRING_COMPARE
694 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
695 const wxStringCharType
* s2
, size_t l2
)
698 return wxStringMemcmp(s1
, s2
, l1
);
701 int ret
= wxStringMemcmp(s1
, s2
, l1
);
702 return ret
== 0 ? -1 : ret
;
706 int ret
= wxStringMemcmp(s1
, s2
, l2
);
707 return ret
== 0 ? +1 : ret
;
711 int wxString::compare(const wxString
& str
) const
713 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
714 str
.m_impl
.data(), str
.m_impl
.length());
717 int wxString::compare(size_t nStart
, size_t nLen
,
718 const wxString
& str
) const
720 wxASSERT(nStart
<= length());
721 size_type strLen
= length() - nStart
;
722 nLen
= strLen
< nLen
? strLen
: nLen
;
725 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
727 return ::wxDoCmp(m_impl
.data() + pos
, len
,
728 str
.m_impl
.data(), str
.m_impl
.length());
731 int wxString::compare(size_t nStart
, size_t nLen
,
733 size_t nStart2
, size_t nLen2
) const
735 wxASSERT(nStart
<= length());
736 wxASSERT(nStart2
<= str
.length());
737 size_type strLen
= length() - nStart
,
738 strLen2
= str
.length() - nStart2
;
739 nLen
= strLen
< nLen
? strLen
: nLen
;
740 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
743 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
745 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
747 return ::wxDoCmp(m_impl
.data() + pos
, len
,
748 str
.m_impl
.data() + pos2
, len2
);
751 int wxString::compare(const char* sz
) const
753 SubstrBufFromMB
str(ImplStr(sz
, npos
));
754 if ( str
.len
== npos
)
755 str
.len
= wxStringStrlen(str
.data
);
756 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
759 int wxString::compare(const wchar_t* sz
) const
761 SubstrBufFromWC
str(ImplStr(sz
, npos
));
762 if ( str
.len
== npos
)
763 str
.len
= wxStringStrlen(str
.data
);
764 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
767 int wxString::compare(size_t nStart
, size_t nLen
,
768 const char* sz
, size_t nCount
) const
770 wxASSERT(nStart
<= length());
771 size_type strLen
= length() - nStart
;
772 nLen
= strLen
< nLen
? strLen
: nLen
;
775 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
777 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
778 if ( str
.len
== npos
)
779 str
.len
= wxStringStrlen(str
.data
);
781 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
784 int wxString::compare(size_t nStart
, size_t nLen
,
785 const wchar_t* sz
, size_t nCount
) const
787 wxASSERT(nStart
<= length());
788 size_type strLen
= length() - nStart
;
789 nLen
= strLen
< nLen
? strLen
: nLen
;
792 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
794 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
795 if ( str
.len
== npos
)
796 str
.len
= wxStringStrlen(str
.data
);
798 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
801 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
804 // ---------------------------------------------------------------------------
805 // find_{first,last}_[not]_of functions
806 // ---------------------------------------------------------------------------
808 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
810 // NB: All these functions are implemented with the argument being wxChar*,
811 // i.e. widechar string in any Unicode build, even though native string
812 // representation is char* in the UTF-8 build. This is because we couldn't
813 // use memchr() to determine if a character is in a set encoded as UTF-8.
815 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
817 return find_first_of(sz
, nStart
, wxStrlen(sz
));
820 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
822 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
825 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
827 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
830 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
832 if ( wxTmemchr(sz
, *i
, n
) )
839 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
841 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
844 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
846 if ( !wxTmemchr(sz
, *i
, n
) )
854 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
856 return find_last_of(sz
, nStart
, wxStrlen(sz
));
859 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
861 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
864 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
866 size_t len
= length();
868 if ( nStart
== npos
)
874 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
878 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
879 i
!= rend(); --idx
, ++i
)
881 if ( wxTmemchr(sz
, *i
, n
) )
888 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
890 size_t len
= length();
892 if ( nStart
== npos
)
898 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
902 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
903 i
!= rend(); --idx
, ++i
)
905 if ( !wxTmemchr(sz
, *i
, n
) )
912 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
914 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
917 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
926 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
928 size_t len
= length();
930 if ( nStart
== npos
)
936 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
940 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
941 i
!= rend(); --idx
, ++i
)
950 // the functions above were implemented for wchar_t* arguments in Unicode
951 // build and char* in ANSI build; below are implementations for the other
954 #define wxOtherCharType char
955 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
957 #define wxOtherCharType wchar_t
958 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
961 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
962 { return find_first_of(STRCONV(sz
), nStart
); }
964 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
966 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
967 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
968 { return find_last_of(STRCONV(sz
), nStart
); }
969 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
971 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
972 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
973 { return find_first_not_of(STRCONV(sz
), nStart
); }
974 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
976 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
977 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
978 { return find_last_not_of(STRCONV(sz
), nStart
); }
979 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
981 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
983 #undef wxOtherCharType
986 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
988 // ===========================================================================
989 // other common string functions
990 // ===========================================================================
992 int wxString::CmpNoCase(const wxString
& s
) const
994 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
996 const_iterator i1
= begin();
997 const_iterator end1
= end();
998 const_iterator i2
= s
.begin();
999 const_iterator end2
= s
.end();
1001 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1003 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1004 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1005 if ( lower1
!= lower2
)
1006 return lower1
< lower2
? -1 : 1;
1009 size_t len1
= length();
1010 size_t len2
= s
.length();
1014 else if ( len1
> len2
)
1023 #ifndef __SCHAR_MAX__
1024 #define __SCHAR_MAX__ 127
1028 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1030 if (!ascii
|| len
== 0)
1031 return wxEmptyString
;
1036 wxStringInternalBuffer
buf(res
, len
);
1037 wxStringCharType
*dest
= buf
;
1039 for ( ; len
> 0; --len
)
1041 unsigned char c
= (unsigned char)*ascii
++;
1042 wxASSERT_MSG( c
< 0x80,
1043 _T("Non-ASCII value passed to FromAscii().") );
1045 *dest
++ = (wchar_t)c
;
1052 wxString
wxString::FromAscii(const char *ascii
)
1054 return FromAscii(ascii
, wxStrlen(ascii
));
1057 wxString
wxString::FromAscii(char ascii
)
1059 // What do we do with '\0' ?
1061 unsigned char c
= (unsigned char)ascii
;
1063 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
1065 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1066 return wxString(wxUniChar((wchar_t)c
));
1069 const wxCharBuffer
wxString::ToAscii() const
1071 // this will allocate enough space for the terminating NUL too
1072 wxCharBuffer
buffer(length());
1073 char *dest
= buffer
.data();
1075 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1078 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1079 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1081 // the output string can't have embedded NULs anyhow, so we can safely
1082 // stop at first of them even if we do have any
1090 #endif // wxUSE_UNICODE
1092 // extract string of length nCount starting at nFirst
1093 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1095 size_t nLen
= length();
1097 // default value of nCount is npos and means "till the end"
1098 if ( nCount
== npos
)
1100 nCount
= nLen
- nFirst
;
1103 // out-of-bounds requests return sensible things
1104 if ( nFirst
+ nCount
> nLen
)
1106 nCount
= nLen
- nFirst
;
1109 if ( nFirst
> nLen
)
1111 // AllocCopy() will return empty string
1112 return wxEmptyString
;
1115 wxString
dest(*this, nFirst
, nCount
);
1116 if ( dest
.length() != nCount
)
1118 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1124 // check that the string starts with prefix and return the rest of the string
1125 // in the provided pointer if it is not NULL, otherwise return false
1126 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1128 if ( compare(0, prefix
.length(), prefix
) != 0 )
1133 // put the rest of the string into provided pointer
1134 rest
->assign(*this, prefix
.length(), npos
);
1141 // check that the string ends with suffix and return the rest of it in the
1142 // provided pointer if it is not NULL, otherwise return false
1143 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1145 int start
= length() - suffix
.length();
1147 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1152 // put the rest of the string into provided pointer
1153 rest
->assign(*this, 0, start
);
1160 // extract nCount last (rightmost) characters
1161 wxString
wxString::Right(size_t nCount
) const
1163 if ( nCount
> length() )
1166 wxString
dest(*this, length() - nCount
, nCount
);
1167 if ( dest
.length() != nCount
) {
1168 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1173 // get all characters after the last occurence of ch
1174 // (returns the whole string if ch not found)
1175 wxString
wxString::AfterLast(wxUniChar ch
) const
1178 int iPos
= Find(ch
, true);
1179 if ( iPos
== wxNOT_FOUND
)
1182 str
= wx_str() + iPos
+ 1;
1187 // extract nCount first (leftmost) characters
1188 wxString
wxString::Left(size_t nCount
) const
1190 if ( nCount
> length() )
1193 wxString
dest(*this, 0, nCount
);
1194 if ( dest
.length() != nCount
) {
1195 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1200 // get all characters before the first occurence of ch
1201 // (returns the whole string if ch not found)
1202 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1204 int iPos
= Find(ch
);
1205 if ( iPos
== wxNOT_FOUND
) iPos
= length();
1206 return wxString(*this, 0, iPos
);
1209 /// get all characters before the last occurence of ch
1210 /// (returns empty string if ch not found)
1211 wxString
wxString::BeforeLast(wxUniChar ch
) const
1214 int iPos
= Find(ch
, true);
1215 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1216 str
= wxString(c_str(), iPos
);
1221 /// get all characters after the first occurence of ch
1222 /// (returns empty string if ch not found)
1223 wxString
wxString::AfterFirst(wxUniChar ch
) const
1226 int iPos
= Find(ch
);
1227 if ( iPos
!= wxNOT_FOUND
)
1228 str
= wx_str() + iPos
+ 1;
1233 // replace first (or all) occurences of some substring with another one
1234 size_t wxString::Replace(const wxString
& strOld
,
1235 const wxString
& strNew
, bool bReplaceAll
)
1237 // if we tried to replace an empty string we'd enter an infinite loop below
1238 wxCHECK_MSG( !strOld
.empty(), 0,
1239 _T("wxString::Replace(): invalid parameter") );
1241 size_t uiCount
= 0; // count of replacements made
1243 size_t uiOldLen
= strOld
.length();
1244 size_t uiNewLen
= strNew
.length();
1248 while ( (*this)[dwPos
] != wxT('\0') )
1250 //DO NOT USE STRSTR HERE
1251 //this string can contain embedded null characters,
1252 //so strstr will function incorrectly
1253 dwPos
= find(strOld
, dwPos
);
1254 if ( dwPos
== npos
)
1255 break; // exit the loop
1258 //replace this occurance of the old string with the new one
1259 replace(dwPos
, uiOldLen
, strNew
, uiNewLen
);
1261 //move up pos past the string that was replaced
1264 //increase replace count
1269 break; // exit the loop
1276 bool wxString::IsAscii() const
1278 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1280 if ( !(*i
).IsAscii() )
1287 bool wxString::IsWord() const
1289 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1291 if ( !wxIsalpha(*i
) )
1298 bool wxString::IsNumber() const
1303 const_iterator i
= begin();
1305 if ( *i
== _T('-') || *i
== _T('+') )
1308 for ( ; i
!= end(); ++i
)
1310 if ( !wxIsdigit(*i
) )
1317 wxString
wxString::Strip(stripType w
) const
1320 if ( w
& leading
) s
.Trim(false);
1321 if ( w
& trailing
) s
.Trim(true);
1325 // ---------------------------------------------------------------------------
1327 // ---------------------------------------------------------------------------
1329 wxString
& wxString::MakeUpper()
1331 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1332 *it
= (wxChar
)wxToupper(*it
);
1337 wxString
& wxString::MakeLower()
1339 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1340 *it
= (wxChar
)wxTolower(*it
);
1345 // ---------------------------------------------------------------------------
1346 // trimming and padding
1347 // ---------------------------------------------------------------------------
1349 // some compilers (VC++ 6.0 not to name them) return true for a call to
1350 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1351 // live with this by checking that the character is a 7 bit one - even if this
1352 // may fail to detect some spaces (I don't know if Unicode doesn't have
1353 // space-like symbols somewhere except in the first 128 chars), it is arguably
1354 // still better than trimming away accented letters
1355 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1357 // trims spaces (in the sense of isspace) from left or right side
1358 wxString
& wxString::Trim(bool bFromRight
)
1360 // first check if we're going to modify the string at all
1363 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1364 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1370 // find last non-space character
1371 reverse_iterator psz
= rbegin();
1372 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1375 // truncate at trailing space start
1376 erase(psz
.base(), end());
1380 // find first non-space character
1381 iterator psz
= begin();
1382 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1385 // fix up data and length
1386 erase(begin(), psz
);
1393 // adds nCount characters chPad to the string from either side
1394 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1396 wxString
s(chPad
, nCount
);
1409 // truncate the string
1410 wxString
& wxString::Truncate(size_t uiLen
)
1412 if ( uiLen
< length() )
1414 erase(begin() + uiLen
, end());
1416 //else: nothing to do, string is already short enough
1421 // ---------------------------------------------------------------------------
1422 // finding (return wxNOT_FOUND if not found and index otherwise)
1423 // ---------------------------------------------------------------------------
1426 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1428 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1430 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1433 // ----------------------------------------------------------------------------
1434 // conversion to numbers
1435 // ----------------------------------------------------------------------------
1437 // The implementation of all the functions below is exactly the same so factor
1438 // it out. Note that number extraction works correctly on UTF-8 strings, so
1439 // we can use wxStringCharType and wx_str() for maximum efficiency.
1442 #define DO_IF_NOT_WINCE(x) x
1444 #define DO_IF_NOT_WINCE(x)
1447 #define WX_STRING_TO_INT_TYPE(val, base, func) \
1448 wxCHECK_MSG( val, false, _T("NULL output pointer") ); \
1449 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1451 DO_IF_NOT_WINCE( errno = 0; ) \
1453 const wxStringCharType *start = wx_str(); \
1454 wxStringCharType *end; \
1455 *val = func(start, &end, base); \
1457 /* return true only if scan was stopped by the terminating NUL and */ \
1458 /* if the string was not empty to start with and no under/overflow */ \
1460 return !*end && (end != start) \
1461 DO_IF_NOT_WINCE( && (errno != ERANGE) )
1463 bool wxString::ToLong(long *val
, int base
) const
1465 WX_STRING_TO_INT_TYPE(val
, base
, wxStrtol
);
1468 bool wxString::ToULong(unsigned long *val
, int base
) const
1470 WX_STRING_TO_INT_TYPE(val
, base
, wxStrtoul
);
1473 bool wxString::ToLongLong(wxLongLong_t
*val
, int base
) const
1475 WX_STRING_TO_INT_TYPE(val
, base
, wxStrtoll
);
1478 bool wxString::ToULongLong(wxULongLong_t
*val
, int base
) const
1480 WX_STRING_TO_INT_TYPE(val
, base
, wxStrtoull
);
1483 bool wxString::ToDouble(double *val
) const
1485 wxCHECK_MSG( val
, false, _T("NULL pointer in wxString::ToDouble") );
1491 const wxChar
*start
= c_str();
1493 *val
= wxStrtod(start
, &end
);
1495 // return true only if scan was stopped by the terminating NUL and if the
1496 // string was not empty to start with and no under/overflow occurred
1497 return !*end
&& (end
!= start
)
1499 && (errno
!= ERANGE
)
1504 // ---------------------------------------------------------------------------
1506 // ---------------------------------------------------------------------------
1508 #if !wxUSE_UTF8_LOCALE_ONLY
1510 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1511 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1513 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1517 va_start(argptr
, format
);
1520 s
.PrintfV(format
, argptr
);
1526 #endif // !wxUSE_UTF8_LOCALE_ONLY
1528 #if wxUSE_UNICODE_UTF8
1530 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1533 va_start(argptr
, format
);
1536 s
.PrintfV(format
, argptr
);
1542 #endif // wxUSE_UNICODE_UTF8
1545 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1548 s
.PrintfV(format
, argptr
);
1552 #if !wxUSE_UTF8_LOCALE_ONLY
1553 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1554 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1556 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1560 va_start(argptr
, format
);
1562 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1563 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1564 // because it's the only cast that works safely for downcasting when
1565 // multiple inheritance is used:
1566 wxString
*str
= static_cast<wxString
*>(this);
1568 wxString
*str
= this;
1571 int iLen
= str
->PrintfV(format
, argptr
);
1577 #endif // !wxUSE_UTF8_LOCALE_ONLY
1579 #if wxUSE_UNICODE_UTF8
1580 int wxString::DoPrintfUtf8(const char *format
, ...)
1583 va_start(argptr
, format
);
1585 int iLen
= PrintfV(format
, argptr
);
1591 #endif // wxUSE_UNICODE_UTF8
1594 Uses wxVsnprintf and places the result into the this string.
1596 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1597 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1598 the ISO C99 (and thus SUSv3) standard the return value for the case of
1599 an undersized buffer is inconsistent. For conforming vsnprintf
1600 implementations the function must return the number of characters that
1601 would have been printed had the buffer been large enough. For conforming
1602 vswprintf implementations the function must return a negative number
1605 What vswprintf sets errno to is undefined but Darwin seems to set it to
1606 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1607 those are defined in the standard and backed up by several conformance
1608 statements. Note that ENOMEM mentioned in the manual page does not
1609 apply to swprintf, only wprintf and fwprintf.
1611 Official manual page:
1612 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1614 Some conformance statements (AIX, Solaris):
1615 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1616 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1618 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1619 EILSEQ and EINVAL are specifically defined to mean the error is other than
1620 an undersized buffer and no other errno are defined we treat those two
1621 as meaning hard errors and everything else gets the old behavior which
1622 is to keep looping and increasing buffer size until the function succeeds.
1624 In practice it's impossible to determine before compilation which behavior
1625 may be used. The vswprintf function may have vsnprintf-like behavior or
1626 vice-versa. Behavior detected on one release can theoretically change
1627 with an updated release. Not to mention that configure testing for it
1628 would require the test to be run on the host system, not the build system
1629 which makes cross compilation difficult. Therefore, we make no assumptions
1630 about behavior and try our best to handle every known case, including the
1631 case where wxVsnprintf returns a negative number and fails to set errno.
1633 There is yet one more non-standard implementation and that is our own.
1634 Fortunately, that can be detected at compile-time.
1636 On top of all that, ISO C99 explicitly defines snprintf to write a null
1637 character to the last position of the specified buffer. That would be at
1638 at the given buffer size minus 1. It is supposed to do this even if it
1639 turns out that the buffer is sized too small.
1641 Darwin (tested on 10.5) follows the C99 behavior exactly.
1643 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1644 errno even when it fails. However, it only seems to ever fail due
1645 to an undersized buffer.
1647 #if wxUSE_UNICODE_UTF8
1648 template<typename BufferType
>
1650 // we only need one version in non-UTF8 builds and at least two Windows
1651 // compilers have problems with this function template, so use just one
1652 // normal function here
1654 static int DoStringPrintfV(wxString
& str
,
1655 const wxString
& format
, va_list argptr
)
1661 #if wxUSE_UNICODE_UTF8
1662 BufferType
tmp(str
, size
+ 1);
1663 typename
BufferType::CharType
*buf
= tmp
;
1665 wxStringBuffer
tmp(str
, size
+ 1);
1673 // in UTF-8 build, leaving uninitialized junk in the buffer
1674 // could result in invalid non-empty UTF-8 string, so just
1675 // reset the string to empty on failure:
1680 // wxVsnprintf() may modify the original arg pointer, so pass it
1683 wxVaCopy(argptrcopy
, argptr
);
1686 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1689 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1692 // some implementations of vsnprintf() don't NUL terminate
1693 // the string if there is not enough space for it so
1694 // always do it manually
1695 // FIXME: This really seems to be the wrong and would be an off-by-one
1696 // bug except the code above allocates an extra character.
1697 buf
[size
] = _T('\0');
1699 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1700 // total number of characters which would have been written if the
1701 // buffer were large enough (newer standards such as Unix98)
1704 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1705 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1706 // is true if *both* of them use our own implementation,
1707 // otherwise we can't be sure
1708 #if wxUSE_WXVSNPRINTF
1709 // we know that our own implementation of wxVsnprintf() returns -1
1710 // only for a format error - thus there's something wrong with
1711 // the user's format string
1714 #else // possibly using system version
1715 // assume it only returns error if there is not enough space, but
1716 // as we don't know how much we need, double the current size of
1719 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1720 // If errno was set to one of the two well-known hard errors
1721 // then fail immediately to avoid an infinite loop.
1724 #endif // __WXWINCE__
1725 // still not enough, as we don't know how much we need, double the
1726 // current size of the buffer
1728 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1730 else if ( len
>= size
)
1732 #if wxUSE_WXVSNPRINTF
1733 // we know that our own implementation of wxVsnprintf() returns
1734 // size+1 when there's not enough space but that's not the size
1735 // of the required buffer!
1736 size
*= 2; // so we just double the current size of the buffer
1738 // some vsnprintf() implementations NUL-terminate the buffer and
1739 // some don't in len == size case, to be safe always add 1
1740 // FIXME: I don't quite understand this comment. The vsnprintf
1741 // function is specifically defined to return the number of
1742 // characters printed not including the null terminator.
1743 // So OF COURSE you need to add 1 to get the right buffer size.
1744 // The following line is definitely correct, no question.
1748 else // ok, there was enough space
1754 // we could have overshot
1757 return str
.length();
1760 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
1762 #if wxUSE_UNICODE_UTF8
1763 #if wxUSE_STL_BASED_WXSTRING
1764 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
1766 typedef wxStringInternalBuffer Utf8Buffer
;
1770 #if wxUSE_UTF8_LOCALE_ONLY
1771 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1773 #if wxUSE_UNICODE_UTF8
1774 if ( wxLocaleIsUtf8
)
1775 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1778 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
1780 return DoStringPrintfV(*this, format
, argptr
);
1781 #endif // UTF8/WCHAR
1785 // ----------------------------------------------------------------------------
1786 // misc other operations
1787 // ----------------------------------------------------------------------------
1789 // returns true if the string matches the pattern which may contain '*' and
1790 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1792 bool wxString::Matches(const wxString
& mask
) const
1794 // I disable this code as it doesn't seem to be faster (in fact, it seems
1795 // to be much slower) than the old, hand-written code below and using it
1796 // here requires always linking with libregex even if the user code doesn't
1798 #if 0 // wxUSE_REGEX
1799 // first translate the shell-like mask into a regex
1801 pattern
.reserve(wxStrlen(pszMask
));
1813 pattern
+= _T(".*");
1824 // these characters are special in a RE, quote them
1825 // (however note that we don't quote '[' and ']' to allow
1826 // using them for Unix shell like matching)
1827 pattern
+= _T('\\');
1831 pattern
+= *pszMask
;
1839 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
1840 #else // !wxUSE_REGEX
1841 // TODO: this is, of course, awfully inefficient...
1843 // FIXME-UTF8: implement using iterators, remove #if
1844 #if wxUSE_UNICODE_UTF8
1845 wxWCharBuffer maskBuf
= mask
.wc_str();
1846 wxWCharBuffer txtBuf
= wc_str();
1847 const wxChar
*pszMask
= maskBuf
.data();
1848 const wxChar
*pszTxt
= txtBuf
.data();
1850 const wxChar
*pszMask
= mask
.wx_str();
1851 // the char currently being checked
1852 const wxChar
*pszTxt
= wx_str();
1855 // the last location where '*' matched
1856 const wxChar
*pszLastStarInText
= NULL
;
1857 const wxChar
*pszLastStarInMask
= NULL
;
1860 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
1861 switch ( *pszMask
) {
1863 if ( *pszTxt
== wxT('\0') )
1866 // pszTxt and pszMask will be incremented in the loop statement
1872 // remember where we started to be able to backtrack later
1873 pszLastStarInText
= pszTxt
;
1874 pszLastStarInMask
= pszMask
;
1876 // ignore special chars immediately following this one
1877 // (should this be an error?)
1878 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
1881 // if there is nothing more, match
1882 if ( *pszMask
== wxT('\0') )
1885 // are there any other metacharacters in the mask?
1887 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
1889 if ( pEndMask
!= NULL
) {
1890 // we have to match the string between two metachars
1891 uiLenMask
= pEndMask
- pszMask
;
1894 // we have to match the remainder of the string
1895 uiLenMask
= wxStrlen(pszMask
);
1898 wxString
strToMatch(pszMask
, uiLenMask
);
1899 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
1900 if ( pMatch
== NULL
)
1903 // -1 to compensate "++" in the loop
1904 pszTxt
= pMatch
+ uiLenMask
- 1;
1905 pszMask
+= uiLenMask
- 1;
1910 if ( *pszMask
!= *pszTxt
)
1916 // match only if nothing left
1917 if ( *pszTxt
== wxT('\0') )
1920 // if we failed to match, backtrack if we can
1921 if ( pszLastStarInText
) {
1922 pszTxt
= pszLastStarInText
+ 1;
1923 pszMask
= pszLastStarInMask
;
1925 pszLastStarInText
= NULL
;
1927 // don't bother resetting pszLastStarInMask, it's unnecessary
1933 #endif // wxUSE_REGEX/!wxUSE_REGEX
1936 // Count the number of chars
1937 int wxString::Freq(wxUniChar ch
) const
1940 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1948 // convert to upper case, return the copy of the string
1949 wxString
wxString::Upper() const
1950 { wxString
s(*this); return s
.MakeUpper(); }
1952 // convert to lower case, return the copy of the string
1953 wxString
wxString::Lower() const { wxString
s(*this); return s
.MakeLower(); }
1955 // ----------------------------------------------------------------------------
1956 // wxUTF8StringBuffer
1957 // ----------------------------------------------------------------------------
1959 #if wxUSE_UNICODE_WCHAR
1960 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1962 wxMBConvStrictUTF8 conv
;
1963 size_t wlen
= conv
.ToWChar(NULL
, 0, m_buf
);
1964 wxCHECK_RET( wlen
!= wxCONV_FAILED
, "invalid UTF-8 data in string buffer?" );
1966 wxStringInternalBuffer
wbuf(m_str
, wlen
);
1967 conv
.ToWChar(wbuf
, wlen
, m_buf
);
1970 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
1972 wxCHECK_RET(m_lenSet
, "length not set");
1974 wxMBConvStrictUTF8 conv
;
1975 size_t wlen
= conv
.ToWChar(NULL
, 0, m_buf
, m_len
);
1976 wxCHECK_RET( wlen
!= wxCONV_FAILED
, "invalid UTF-8 data in string buffer?" );
1978 wxStringInternalBufferLength
wbuf(m_str
, wlen
);
1979 conv
.ToWChar(wbuf
, wlen
, m_buf
, m_len
);
1980 wbuf
.SetLength(wlen
);
1982 #endif // wxUSE_UNICODE_WCHAR