1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
38 #include "wx/hashmap.h"
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
54 // ---------------------------------------------------------------------------
55 // static class variables definition
56 // ---------------------------------------------------------------------------
58 //According to STL _must_ be a -1 size_t
59 const size_t wxString::npos
= (size_t) -1;
61 // ----------------------------------------------------------------------------
63 // ----------------------------------------------------------------------------
65 #if wxUSE_STD_IOSTREAM
69 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
71 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
72 return os
<< (const char *)str
.AsCharBuf();
74 return os
<< str
.AsInternal();
78 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
80 return os
<< str
.c_str();
83 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCharBuffer
& str
)
85 return os
<< str
.data();
89 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxWCharBuffer
& str
)
91 return os
<< str
.data();
95 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
97 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
99 return wos
<< str
.wc_str();
102 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
104 return wos
<< str
.AsWChar();
107 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxWCharBuffer
& str
)
109 return wos
<< str
.data();
112 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
114 #endif // wxUSE_STD_IOSTREAM
116 // ===========================================================================
117 // wxString class core
118 // ===========================================================================
120 #if wxUSE_UNICODE_UTF8
122 void wxString::PosLenToImpl(size_t pos
, size_t len
,
123 size_t *implPos
, size_t *implLen
) const
129 const_iterator i
= begin() + pos
;
130 *implPos
= wxStringImpl::const_iterator(i
.impl()) - m_impl
.begin();
135 // too large length is interpreted as "to the end of the string"
136 // FIXME-UTF8: verify this is the case in std::string, assert
138 if ( pos
+ len
> length() )
139 len
= length() - pos
;
141 *implLen
= (i
+ len
).impl() - i
.impl();
146 #endif // wxUSE_UNICODE_UTF8
148 // ----------------------------------------------------------------------------
149 // wxCStrData converted strings caching
150 // ----------------------------------------------------------------------------
152 // FIXME-UTF8: temporarily disabled because it doesn't work with global
153 // string objects; re-enable after fixing this bug and benchmarking
154 // performance to see if using a hash is a good idea at all
157 // For backward compatibility reasons, it must be possible to assign the value
158 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
159 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
160 // because the memory would be freed immediately, but it has to be valid as long
161 // as the string is not modified, so that code like this still works:
163 // const wxChar *s = str.c_str();
164 // while ( s ) { ... }
166 // FIXME-UTF8: not thread safe!
167 // FIXME-UTF8: we currently clear the cached conversion only when the string is
168 // destroyed, but we should do it when the string is modified, to
169 // keep memory usage down
170 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
171 // invalidated the cache on every change, we could keep the previous
173 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
174 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
177 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
179 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
180 if ( i
!= hash
.end() )
188 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
189 // so we have to use wxString* here and const-cast when used
190 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
191 wxStringCharConversionCache
);
192 static wxStringCharConversionCache gs_stringsCharCache
;
194 const char* wxCStrData::AsChar() const
196 // remove previously cache value, if any (see FIXMEs above):
197 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
199 // convert the string and keep it:
200 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
201 m_str
->mb_str().release();
205 #endif // wxUSE_UNICODE
207 #if !wxUSE_UNICODE_WCHAR
208 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
209 wxStringWCharConversionCache
);
210 static wxStringWCharConversionCache gs_stringsWCharCache
;
212 const wchar_t* wxCStrData::AsWChar() const
214 // remove previously cache value, if any (see FIXMEs above):
215 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
217 // convert the string and keep it:
218 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
219 m_str
->wc_str().release();
223 #endif // !wxUSE_UNICODE_WCHAR
225 wxString::~wxString()
228 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
229 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
231 #if !wxUSE_UNICODE_WCHAR
232 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
237 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
238 const char* wxCStrData::AsChar() const
240 #if wxUSE_UNICODE_UTF8
241 if ( wxLocaleIsUtf8
)
244 // under non-UTF8 locales, we have to convert the internal UTF-8
245 // representation using wxConvLibc and cache the result
247 wxString
*str
= wxConstCast(m_str
, wxString
);
249 // convert the string:
251 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
252 // have it) but it's unfortunately not obvious to implement
253 // because we don't know how big buffer do we need for the
254 // given string length (in case of multibyte encodings, e.g.
255 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
257 // One idea would be to store more than just m_convertedToChar
258 // in wxString: then we could record the length of the string
259 // which was converted the last time and try to reuse the same
260 // buffer if the current length is not greater than it (this
261 // could still fail because string could have been modified in
262 // place but it would work most of the time, so we'd do it and
263 // only allocate the new buffer if in-place conversion returned
264 // an error). We could also store a bit saying if the string
265 // was modified since the last conversion (and update it in all
266 // operation modifying the string, of course) to avoid unneeded
267 // consequential conversions. But both of these ideas require
268 // adding more fields to wxString and require profiling results
269 // to be sure that we really gain enough from them to justify
271 wxCharBuffer
buf(str
->mb_str());
273 // if it failed, return empty string and not NULL to avoid crashes in code
274 // written with either wxWidgets 2 wxString or std::string behaviour in
275 // mind: neither of them ever returns NULL and so we shouldn't neither
279 if ( str
->m_convertedToChar
&&
280 strlen(buf
) == strlen(str
->m_convertedToChar
) )
282 // keep the same buffer for as long as possible, so that several calls
283 // to c_str() in a row still work:
284 strcpy(str
->m_convertedToChar
, buf
);
288 str
->m_convertedToChar
= buf
.release();
292 return str
->m_convertedToChar
+ m_offset
;
294 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
296 #if !wxUSE_UNICODE_WCHAR
297 const wchar_t* wxCStrData::AsWChar() const
299 wxString
*str
= wxConstCast(m_str
, wxString
);
301 // convert the string:
302 wxWCharBuffer
buf(str
->wc_str());
304 // notice that here, unlike above in AsChar(), conversion can't fail as our
305 // internal UTF-8 is always well-formed -- or the string was corrupted and
306 // all bets are off anyhow
308 // FIXME-UTF8: do the conversion in-place in the existing buffer
309 if ( str
->m_convertedToWChar
&&
310 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
312 // keep the same buffer for as long as possible, so that several calls
313 // to c_str() in a row still work:
314 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
318 str
->m_convertedToWChar
= buf
.release();
322 return str
->m_convertedToWChar
+ m_offset
;
324 #endif // !wxUSE_UNICODE_WCHAR
326 // ===========================================================================
327 // wxString class core
328 // ===========================================================================
330 // ---------------------------------------------------------------------------
331 // construction and conversion
332 // ---------------------------------------------------------------------------
334 #if wxUSE_UNICODE_WCHAR
336 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
337 const wxMBConv
& conv
)
340 if ( !psz
|| nLength
== 0 )
341 return SubstrBufFromMB(L
"", 0);
343 if ( nLength
== npos
)
347 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
349 return SubstrBufFromMB(_T(""), 0);
351 return SubstrBufFromMB(wcBuf
, wcLen
);
353 #endif // wxUSE_UNICODE_WCHAR
355 #if wxUSE_UNICODE_UTF8
357 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
358 const wxMBConv
& conv
)
361 if ( !psz
|| nLength
== 0 )
362 return SubstrBufFromMB("", 0);
364 // if psz is already in UTF-8, we don't have to do the roundtrip to
365 // wchar_t* and back:
368 // we need to validate the input because UTF8 iterators assume valid
369 // UTF-8 sequence and psz may be invalid:
370 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
372 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz
), nLength
);
374 // else: do the roundtrip through wchar_t*
377 if ( nLength
== npos
)
380 // first convert to wide string:
382 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
384 return SubstrBufFromMB("", 0);
386 // and then to UTF-8:
387 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
388 // widechar -> UTF-8 conversion isn't supposed to ever fail:
389 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
393 #endif // wxUSE_UNICODE_UTF8
395 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
397 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
398 const wxMBConv
& conv
)
401 if ( !pwz
|| nLength
== 0 )
402 return SubstrBufFromWC("", 0);
404 if ( nLength
== npos
)
408 wxCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
410 return SubstrBufFromWC("", 0);
412 return SubstrBufFromWC(mbBuf
, mbLen
);
414 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
417 #if wxUSE_UNICODE_WCHAR
419 //Convert wxString in Unicode mode to a multi-byte string
420 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
422 return conv
.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL
);
425 #elif wxUSE_UNICODE_UTF8
427 const wxWCharBuffer
wxString::wc_str() const
429 return wxMBConvStrictUTF8().cMB2WC
432 m_impl
.length() + 1, // size, not length
437 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
440 return wxCharBuffer::CreateNonOwned(m_impl
.c_str());
442 // FIXME-UTF8: use wc_str() here once we have buffers with length
445 wxWCharBuffer
wcBuf(wxMBConvStrictUTF8().cMB2WC
448 m_impl
.length() + 1, // size
452 return wxCharBuffer("");
454 return conv
.cWC2MB(wcBuf
, wcLen
+1, NULL
);
459 //Converts this string to a wide character string if unicode
460 //mode is not enabled and wxUSE_WCHAR_T is enabled
461 const wxWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
463 return conv
.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL
);
466 #endif // Unicode/ANSI
468 // shrink to minimal size (releasing extra memory)
469 bool wxString::Shrink()
471 wxString
tmp(begin(), end());
473 return tmp
.length() == length();
476 // deprecated compatibility code:
477 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
478 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
480 return DoGetWriteBuf(nLen
);
483 void wxString::UngetWriteBuf()
488 void wxString::UngetWriteBuf(size_t nLen
)
490 DoUngetWriteBuf(nLen
);
492 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
495 // ---------------------------------------------------------------------------
497 // ---------------------------------------------------------------------------
499 // all functions are inline in string.h
501 // ---------------------------------------------------------------------------
502 // concatenation operators
503 // ---------------------------------------------------------------------------
506 * concatenation functions come in 5 flavours:
508 * char + string and string + char
509 * C str + string and string + C str
512 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
514 #if !wxUSE_STL_BASED_WXSTRING
515 wxASSERT( str1
.IsValid() );
516 wxASSERT( str2
.IsValid() );
525 wxString
operator+(const wxString
& str
, wxUniChar ch
)
527 #if !wxUSE_STL_BASED_WXSTRING
528 wxASSERT( str
.IsValid() );
537 wxString
operator+(wxUniChar ch
, const wxString
& str
)
539 #if !wxUSE_STL_BASED_WXSTRING
540 wxASSERT( str
.IsValid() );
549 wxString
operator+(const wxString
& str
, const char *psz
)
551 #if !wxUSE_STL_BASED_WXSTRING
552 wxASSERT( str
.IsValid() );
556 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
557 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
565 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
567 #if !wxUSE_STL_BASED_WXSTRING
568 wxASSERT( str
.IsValid() );
572 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
573 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
581 wxString
operator+(const char *psz
, const wxString
& str
)
583 #if !wxUSE_STL_BASED_WXSTRING
584 wxASSERT( str
.IsValid() );
588 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
589 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
597 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
599 #if !wxUSE_STL_BASED_WXSTRING
600 wxASSERT( str
.IsValid() );
604 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
605 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
613 // ---------------------------------------------------------------------------
615 // ---------------------------------------------------------------------------
617 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
619 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
620 : wxToupper(GetChar(0u)) == wxToupper(c
));
623 #ifdef HAVE_STD_STRING_COMPARE
625 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
626 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
627 // sort strings in characters code point order by sorting the byte sequence
628 // in byte values order (i.e. what strcmp() and memcmp() do).
630 int wxString::compare(const wxString
& str
) const
632 return m_impl
.compare(str
.m_impl
);
635 int wxString::compare(size_t nStart
, size_t nLen
,
636 const wxString
& str
) const
639 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
640 return m_impl
.compare(pos
, len
, str
.m_impl
);
643 int wxString::compare(size_t nStart
, size_t nLen
,
645 size_t nStart2
, size_t nLen2
) const
648 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
651 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
653 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
656 int wxString::compare(const char* sz
) const
658 return m_impl
.compare(ImplStr(sz
));
661 int wxString::compare(const wchar_t* sz
) const
663 return m_impl
.compare(ImplStr(sz
));
666 int wxString::compare(size_t nStart
, size_t nLen
,
667 const char* sz
, size_t nCount
) const
670 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
672 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
674 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
677 int wxString::compare(size_t nStart
, size_t nLen
,
678 const wchar_t* sz
, size_t nCount
) const
681 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
683 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
685 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
688 #else // !HAVE_STD_STRING_COMPARE
690 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
691 const wxStringCharType
* s2
, size_t l2
)
694 return wxStringMemcmp(s1
, s2
, l1
);
697 int ret
= wxStringMemcmp(s1
, s2
, l1
);
698 return ret
== 0 ? -1 : ret
;
702 int ret
= wxStringMemcmp(s1
, s2
, l2
);
703 return ret
== 0 ? +1 : ret
;
707 int wxString::compare(const wxString
& str
) const
709 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
710 str
.m_impl
.data(), str
.m_impl
.length());
713 int wxString::compare(size_t nStart
, size_t nLen
,
714 const wxString
& str
) const
716 wxASSERT(nStart
<= length());
717 size_type strLen
= length() - nStart
;
718 nLen
= strLen
< nLen
? strLen
: nLen
;
721 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
723 return ::wxDoCmp(m_impl
.data() + pos
, len
,
724 str
.m_impl
.data(), str
.m_impl
.length());
727 int wxString::compare(size_t nStart
, size_t nLen
,
729 size_t nStart2
, size_t nLen2
) const
731 wxASSERT(nStart
<= length());
732 wxASSERT(nStart2
<= str
.length());
733 size_type strLen
= length() - nStart
,
734 strLen2
= str
.length() - nStart2
;
735 nLen
= strLen
< nLen
? strLen
: nLen
;
736 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
739 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
741 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
743 return ::wxDoCmp(m_impl
.data() + pos
, len
,
744 str
.m_impl
.data() + pos2
, len2
);
747 int wxString::compare(const char* sz
) const
749 SubstrBufFromMB
str(ImplStr(sz
, npos
));
750 if ( str
.len
== npos
)
751 str
.len
= wxStringStrlen(str
.data
);
752 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
755 int wxString::compare(const wchar_t* sz
) const
757 SubstrBufFromWC
str(ImplStr(sz
, npos
));
758 if ( str
.len
== npos
)
759 str
.len
= wxStringStrlen(str
.data
);
760 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
763 int wxString::compare(size_t nStart
, size_t nLen
,
764 const char* sz
, size_t nCount
) const
766 wxASSERT(nStart
<= length());
767 size_type strLen
= length() - nStart
;
768 nLen
= strLen
< nLen
? strLen
: nLen
;
771 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
773 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
774 if ( str
.len
== npos
)
775 str
.len
= wxStringStrlen(str
.data
);
777 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
780 int wxString::compare(size_t nStart
, size_t nLen
,
781 const wchar_t* sz
, size_t nCount
) const
783 wxASSERT(nStart
<= length());
784 size_type strLen
= length() - nStart
;
785 nLen
= strLen
< nLen
? strLen
: nLen
;
788 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
790 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
791 if ( str
.len
== npos
)
792 str
.len
= wxStringStrlen(str
.data
);
794 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
797 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
800 // ---------------------------------------------------------------------------
801 // find_{first,last}_[not]_of functions
802 // ---------------------------------------------------------------------------
804 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
806 // NB: All these functions are implemented with the argument being wxChar*,
807 // i.e. widechar string in any Unicode build, even though native string
808 // representation is char* in the UTF-8 build. This is because we couldn't
809 // use memchr() to determine if a character is in a set encoded as UTF-8.
811 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
813 return find_first_of(sz
, nStart
, wxStrlen(sz
));
816 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
818 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
821 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
823 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
826 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
828 if ( wxTmemchr(sz
, *i
, n
) )
835 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
837 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
840 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
842 if ( !wxTmemchr(sz
, *i
, n
) )
850 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
852 return find_last_of(sz
, nStart
, wxStrlen(sz
));
855 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
857 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
860 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
862 size_t len
= length();
864 if ( nStart
== npos
)
870 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
874 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
875 i
!= rend(); --idx
, ++i
)
877 if ( wxTmemchr(sz
, *i
, n
) )
884 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
886 size_t len
= length();
888 if ( nStart
== npos
)
894 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
898 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
899 i
!= rend(); --idx
, ++i
)
901 if ( !wxTmemchr(sz
, *i
, n
) )
908 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
910 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
913 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
922 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
924 size_t len
= length();
926 if ( nStart
== npos
)
932 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
936 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
937 i
!= rend(); --idx
, ++i
)
946 // the functions above were implemented for wchar_t* arguments in Unicode
947 // build and char* in ANSI build; below are implementations for the other
950 #define wxOtherCharType char
951 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
953 #define wxOtherCharType wchar_t
954 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
957 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
958 { return find_first_of(STRCONV(sz
), nStart
); }
960 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
962 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
963 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
964 { return find_last_of(STRCONV(sz
), nStart
); }
965 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
967 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
968 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
969 { return find_first_not_of(STRCONV(sz
), nStart
); }
970 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
972 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
973 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
974 { return find_last_not_of(STRCONV(sz
), nStart
); }
975 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
977 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
979 #undef wxOtherCharType
982 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
984 // ===========================================================================
985 // other common string functions
986 // ===========================================================================
988 int wxString::CmpNoCase(const wxString
& s
) const
990 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
992 const_iterator i1
= begin();
993 const_iterator end1
= end();
994 const_iterator i2
= s
.begin();
995 const_iterator end2
= s
.end();
997 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
999 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1000 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1001 if ( lower1
!= lower2
)
1002 return lower1
< lower2
? -1 : 1;
1005 size_t len1
= length();
1006 size_t len2
= s
.length();
1010 else if ( len1
> len2
)
1019 #ifndef __SCHAR_MAX__
1020 #define __SCHAR_MAX__ 127
1024 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1026 if (!ascii
|| len
== 0)
1027 return wxEmptyString
;
1032 wxStringInternalBuffer
buf(res
, len
);
1033 wxStringCharType
*dest
= buf
;
1035 for ( ; len
> 0; --len
)
1037 unsigned char c
= (unsigned char)*ascii
++;
1038 wxASSERT_MSG( c
< 0x80,
1039 _T("Non-ASCII value passed to FromAscii().") );
1041 *dest
++ = (wchar_t)c
;
1048 wxString
wxString::FromAscii(const char *ascii
)
1050 return FromAscii(ascii
, wxStrlen(ascii
));
1053 wxString
wxString::FromAscii(char ascii
)
1055 // What do we do with '\0' ?
1057 unsigned char c
= (unsigned char)ascii
;
1059 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
1061 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1062 return wxString(wxUniChar((wchar_t)c
));
1065 const wxCharBuffer
wxString::ToAscii() const
1067 // this will allocate enough space for the terminating NUL too
1068 wxCharBuffer
buffer(length());
1069 char *dest
= buffer
.data();
1071 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1074 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1075 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1077 // the output string can't have embedded NULs anyhow, so we can safely
1078 // stop at first of them even if we do have any
1086 #endif // wxUSE_UNICODE
1088 // extract string of length nCount starting at nFirst
1089 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1091 size_t nLen
= length();
1093 // default value of nCount is npos and means "till the end"
1094 if ( nCount
== npos
)
1096 nCount
= nLen
- nFirst
;
1099 // out-of-bounds requests return sensible things
1100 if ( nFirst
+ nCount
> nLen
)
1102 nCount
= nLen
- nFirst
;
1105 if ( nFirst
> nLen
)
1107 // AllocCopy() will return empty string
1108 return wxEmptyString
;
1111 wxString
dest(*this, nFirst
, nCount
);
1112 if ( dest
.length() != nCount
)
1114 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1120 // check that the string starts with prefix and return the rest of the string
1121 // in the provided pointer if it is not NULL, otherwise return false
1122 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1124 if ( compare(0, prefix
.length(), prefix
) != 0 )
1129 // put the rest of the string into provided pointer
1130 rest
->assign(*this, prefix
.length(), npos
);
1137 // check that the string ends with suffix and return the rest of it in the
1138 // provided pointer if it is not NULL, otherwise return false
1139 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1141 int start
= length() - suffix
.length();
1143 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1148 // put the rest of the string into provided pointer
1149 rest
->assign(*this, 0, start
);
1156 // extract nCount last (rightmost) characters
1157 wxString
wxString::Right(size_t nCount
) const
1159 if ( nCount
> length() )
1162 wxString
dest(*this, length() - nCount
, nCount
);
1163 if ( dest
.length() != nCount
) {
1164 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1169 // get all characters after the last occurence of ch
1170 // (returns the whole string if ch not found)
1171 wxString
wxString::AfterLast(wxUniChar ch
) const
1174 int iPos
= Find(ch
, true);
1175 if ( iPos
== wxNOT_FOUND
)
1178 str
= wx_str() + iPos
+ 1;
1183 // extract nCount first (leftmost) characters
1184 wxString
wxString::Left(size_t nCount
) const
1186 if ( nCount
> length() )
1189 wxString
dest(*this, 0, nCount
);
1190 if ( dest
.length() != nCount
) {
1191 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1196 // get all characters before the first occurence of ch
1197 // (returns the whole string if ch not found)
1198 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1200 int iPos
= Find(ch
);
1201 if ( iPos
== wxNOT_FOUND
) iPos
= length();
1202 return wxString(*this, 0, iPos
);
1205 /// get all characters before the last occurence of ch
1206 /// (returns empty string if ch not found)
1207 wxString
wxString::BeforeLast(wxUniChar ch
) const
1210 int iPos
= Find(ch
, true);
1211 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1212 str
= wxString(c_str(), iPos
);
1217 /// get all characters after the first occurence of ch
1218 /// (returns empty string if ch not found)
1219 wxString
wxString::AfterFirst(wxUniChar ch
) const
1222 int iPos
= Find(ch
);
1223 if ( iPos
!= wxNOT_FOUND
)
1224 str
= wx_str() + iPos
+ 1;
1229 // replace first (or all) occurences of some substring with another one
1230 size_t wxString::Replace(const wxString
& strOld
,
1231 const wxString
& strNew
, bool bReplaceAll
)
1233 // if we tried to replace an empty string we'd enter an infinite loop below
1234 wxCHECK_MSG( !strOld
.empty(), 0,
1235 _T("wxString::Replace(): invalid parameter") );
1237 size_t uiCount
= 0; // count of replacements made
1239 size_t uiOldLen
= strOld
.length();
1240 size_t uiNewLen
= strNew
.length();
1244 while ( (*this)[dwPos
] != wxT('\0') )
1246 //DO NOT USE STRSTR HERE
1247 //this string can contain embedded null characters,
1248 //so strstr will function incorrectly
1249 dwPos
= find(strOld
, dwPos
);
1250 if ( dwPos
== npos
)
1251 break; // exit the loop
1254 //replace this occurance of the old string with the new one
1255 replace(dwPos
, uiOldLen
, strNew
, uiNewLen
);
1257 //move up pos past the string that was replaced
1260 //increase replace count
1265 break; // exit the loop
1272 bool wxString::IsAscii() const
1274 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1276 if ( !(*i
).IsAscii() )
1283 bool wxString::IsWord() const
1285 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1287 if ( !wxIsalpha(*i
) )
1294 bool wxString::IsNumber() const
1299 const_iterator i
= begin();
1301 if ( *i
== _T('-') || *i
== _T('+') )
1304 for ( ; i
!= end(); ++i
)
1306 if ( !wxIsdigit(*i
) )
1313 wxString
wxString::Strip(stripType w
) const
1316 if ( w
& leading
) s
.Trim(false);
1317 if ( w
& trailing
) s
.Trim(true);
1321 // ---------------------------------------------------------------------------
1323 // ---------------------------------------------------------------------------
1325 wxString
& wxString::MakeUpper()
1327 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1328 *it
= (wxChar
)wxToupper(*it
);
1333 wxString
& wxString::MakeLower()
1335 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1336 *it
= (wxChar
)wxTolower(*it
);
1341 // ---------------------------------------------------------------------------
1342 // trimming and padding
1343 // ---------------------------------------------------------------------------
1345 // some compilers (VC++ 6.0 not to name them) return true for a call to
1346 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1347 // to live with this by checking that the character is a 7 bit one - even if
1348 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1349 // space-like symbols somewhere except in the first 128 chars), it is arguably
1350 // still better than trimming away accented letters
1351 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1353 // trims spaces (in the sense of isspace) from left or right side
1354 wxString
& wxString::Trim(bool bFromRight
)
1356 // first check if we're going to modify the string at all
1359 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1360 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1366 // find last non-space character
1367 reverse_iterator psz
= rbegin();
1368 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1371 // truncate at trailing space start
1372 erase(psz
.base(), end());
1376 // find first non-space character
1377 iterator psz
= begin();
1378 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1381 // fix up data and length
1382 erase(begin(), psz
);
1389 // adds nCount characters chPad to the string from either side
1390 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1392 wxString
s(chPad
, nCount
);
1405 // truncate the string
1406 wxString
& wxString::Truncate(size_t uiLen
)
1408 if ( uiLen
< length() )
1410 erase(begin() + uiLen
, end());
1412 //else: nothing to do, string is already short enough
1417 // ---------------------------------------------------------------------------
1418 // finding (return wxNOT_FOUND if not found and index otherwise)
1419 // ---------------------------------------------------------------------------
1422 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1424 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1426 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1429 // ----------------------------------------------------------------------------
1430 // conversion to numbers
1431 // ----------------------------------------------------------------------------
1433 // The implementation of all the functions below is exactly the same so factor
1434 // it out. Note that number extraction works correctly on UTF-8 strings, so
1435 // we can use wxStringCharType and wx_str() for maximum efficiency.
1438 #define DO_IF_NOT_WINCE(x) x
1440 #define DO_IF_NOT_WINCE(x)
1443 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1444 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1445 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1447 DO_IF_NOT_WINCE( errno = 0; ) \
1449 const wxStringCharType *start = wx_str(); \
1450 wxStringCharType *end; \
1451 T val = func(start, &end, base); \
1453 /* return true only if scan was stopped by the terminating NUL and */ \
1454 /* if the string was not empty to start with and no under/overflow */ \
1456 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1461 bool wxString::ToLong(long *pVal
, int base
) const
1463 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtol
, long);
1466 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1468 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoul
, unsigned long);
1471 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1473 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoll
, wxLongLong_t
);
1476 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1478 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoull
, wxULongLong_t
);
1481 bool wxString::ToDouble(double *pVal
) const
1483 wxCHECK_MSG( pVal
, false, _T("NULL output pointer") );
1485 DO_IF_NOT_WINCE( errno
= 0; )
1487 const wxChar
*start
= c_str();
1489 double val
= wxStrtod(start
, &end
);
1491 // return true only if scan was stopped by the terminating NUL and if the
1492 // string was not empty to start with and no under/overflow occurred
1493 if ( *end
|| end
== start
DO_IF_NOT_WINCE(|| errno
== ERANGE
) )
1501 // ---------------------------------------------------------------------------
1503 // ---------------------------------------------------------------------------
1505 #if !wxUSE_UTF8_LOCALE_ONLY
1507 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1508 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1510 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1514 va_start(argptr
, format
);
1517 s
.PrintfV(format
, argptr
);
1523 #endif // !wxUSE_UTF8_LOCALE_ONLY
1525 #if wxUSE_UNICODE_UTF8
1527 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1530 va_start(argptr
, format
);
1533 s
.PrintfV(format
, argptr
);
1539 #endif // wxUSE_UNICODE_UTF8
1542 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1545 s
.PrintfV(format
, argptr
);
1549 #if !wxUSE_UTF8_LOCALE_ONLY
1550 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1551 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1553 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1557 va_start(argptr
, format
);
1559 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1560 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1561 // because it's the only cast that works safely for downcasting when
1562 // multiple inheritance is used:
1563 wxString
*str
= static_cast<wxString
*>(this);
1565 wxString
*str
= this;
1568 int iLen
= str
->PrintfV(format
, argptr
);
1574 #endif // !wxUSE_UTF8_LOCALE_ONLY
1576 #if wxUSE_UNICODE_UTF8
1577 int wxString::DoPrintfUtf8(const char *format
, ...)
1580 va_start(argptr
, format
);
1582 int iLen
= PrintfV(format
, argptr
);
1588 #endif // wxUSE_UNICODE_UTF8
1591 Uses wxVsnprintf and places the result into the this string.
1593 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1594 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1595 the ISO C99 (and thus SUSv3) standard the return value for the case of
1596 an undersized buffer is inconsistent. For conforming vsnprintf
1597 implementations the function must return the number of characters that
1598 would have been printed had the buffer been large enough. For conforming
1599 vswprintf implementations the function must return a negative number
1602 What vswprintf sets errno to is undefined but Darwin seems to set it to
1603 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1604 those are defined in the standard and backed up by several conformance
1605 statements. Note that ENOMEM mentioned in the manual page does not
1606 apply to swprintf, only wprintf and fwprintf.
1608 Official manual page:
1609 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1611 Some conformance statements (AIX, Solaris):
1612 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1613 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1615 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1616 EILSEQ and EINVAL are specifically defined to mean the error is other than
1617 an undersized buffer and no other errno are defined we treat those two
1618 as meaning hard errors and everything else gets the old behavior which
1619 is to keep looping and increasing buffer size until the function succeeds.
1621 In practice it's impossible to determine before compilation which behavior
1622 may be used. The vswprintf function may have vsnprintf-like behavior or
1623 vice-versa. Behavior detected on one release can theoretically change
1624 with an updated release. Not to mention that configure testing for it
1625 would require the test to be run on the host system, not the build system
1626 which makes cross compilation difficult. Therefore, we make no assumptions
1627 about behavior and try our best to handle every known case, including the
1628 case where wxVsnprintf returns a negative number and fails to set errno.
1630 There is yet one more non-standard implementation and that is our own.
1631 Fortunately, that can be detected at compile-time.
1633 On top of all that, ISO C99 explicitly defines snprintf to write a null
1634 character to the last position of the specified buffer. That would be at
1635 at the given buffer size minus 1. It is supposed to do this even if it
1636 turns out that the buffer is sized too small.
1638 Darwin (tested on 10.5) follows the C99 behavior exactly.
1640 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1641 errno even when it fails. However, it only seems to ever fail due
1642 to an undersized buffer.
1644 #if wxUSE_UNICODE_UTF8
1645 template<typename BufferType
>
1647 // we only need one version in non-UTF8 builds and at least two Windows
1648 // compilers have problems with this function template, so use just one
1649 // normal function here
1651 static int DoStringPrintfV(wxString
& str
,
1652 const wxString
& format
, va_list argptr
)
1658 #if wxUSE_UNICODE_UTF8
1659 BufferType
tmp(str
, size
+ 1);
1660 typename
BufferType::CharType
*buf
= tmp
;
1662 wxStringBuffer
tmp(str
, size
+ 1);
1670 // in UTF-8 build, leaving uninitialized junk in the buffer
1671 // could result in invalid non-empty UTF-8 string, so just
1672 // reset the string to empty on failure:
1677 // wxVsnprintf() may modify the original arg pointer, so pass it
1680 wxVaCopy(argptrcopy
, argptr
);
1683 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1686 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1689 // some implementations of vsnprintf() don't NUL terminate
1690 // the string if there is not enough space for it so
1691 // always do it manually
1692 // FIXME: This really seems to be the wrong and would be an off-by-one
1693 // bug except the code above allocates an extra character.
1694 buf
[size
] = _T('\0');
1696 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1697 // total number of characters which would have been written if the
1698 // buffer were large enough (newer standards such as Unix98)
1701 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1702 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1703 // is true if *both* of them use our own implementation,
1704 // otherwise we can't be sure
1705 #if wxUSE_WXVSNPRINTF
1706 // we know that our own implementation of wxVsnprintf() returns -1
1707 // only for a format error - thus there's something wrong with
1708 // the user's format string
1711 #else // possibly using system version
1712 // assume it only returns error if there is not enough space, but
1713 // as we don't know how much we need, double the current size of
1716 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1717 // If errno was set to one of the two well-known hard errors
1718 // then fail immediately to avoid an infinite loop.
1721 #endif // __WXWINCE__
1722 // still not enough, as we don't know how much we need, double the
1723 // current size of the buffer
1725 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1727 else if ( len
>= size
)
1729 #if wxUSE_WXVSNPRINTF
1730 // we know that our own implementation of wxVsnprintf() returns
1731 // size+1 when there's not enough space but that's not the size
1732 // of the required buffer!
1733 size
*= 2; // so we just double the current size of the buffer
1735 // some vsnprintf() implementations NUL-terminate the buffer and
1736 // some don't in len == size case, to be safe always add 1
1737 // FIXME: I don't quite understand this comment. The vsnprintf
1738 // function is specifically defined to return the number of
1739 // characters printed not including the null terminator.
1740 // So OF COURSE you need to add 1 to get the right buffer size.
1741 // The following line is definitely correct, no question.
1745 else // ok, there was enough space
1751 // we could have overshot
1754 return str
.length();
1757 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
1759 #if wxUSE_UNICODE_UTF8
1760 #if wxUSE_STL_BASED_WXSTRING
1761 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
1763 typedef wxStringInternalBuffer Utf8Buffer
;
1767 #if wxUSE_UTF8_LOCALE_ONLY
1768 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1770 #if wxUSE_UNICODE_UTF8
1771 if ( wxLocaleIsUtf8
)
1772 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1775 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
1777 return DoStringPrintfV(*this, format
, argptr
);
1778 #endif // UTF8/WCHAR
1782 // ----------------------------------------------------------------------------
1783 // misc other operations
1784 // ----------------------------------------------------------------------------
1786 // returns true if the string matches the pattern which may contain '*' and
1787 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1789 bool wxString::Matches(const wxString
& mask
) const
1791 // I disable this code as it doesn't seem to be faster (in fact, it seems
1792 // to be much slower) than the old, hand-written code below and using it
1793 // here requires always linking with libregex even if the user code doesn't
1795 #if 0 // wxUSE_REGEX
1796 // first translate the shell-like mask into a regex
1798 pattern
.reserve(wxStrlen(pszMask
));
1810 pattern
+= _T(".*");
1821 // these characters are special in a RE, quote them
1822 // (however note that we don't quote '[' and ']' to allow
1823 // using them for Unix shell like matching)
1824 pattern
+= _T('\\');
1828 pattern
+= *pszMask
;
1836 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
1837 #else // !wxUSE_REGEX
1838 // TODO: this is, of course, awfully inefficient...
1840 // FIXME-UTF8: implement using iterators, remove #if
1841 #if wxUSE_UNICODE_UTF8
1842 wxWCharBuffer maskBuf
= mask
.wc_str();
1843 wxWCharBuffer txtBuf
= wc_str();
1844 const wxChar
*pszMask
= maskBuf
.data();
1845 const wxChar
*pszTxt
= txtBuf
.data();
1847 const wxChar
*pszMask
= mask
.wx_str();
1848 // the char currently being checked
1849 const wxChar
*pszTxt
= wx_str();
1852 // the last location where '*' matched
1853 const wxChar
*pszLastStarInText
= NULL
;
1854 const wxChar
*pszLastStarInMask
= NULL
;
1857 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
1858 switch ( *pszMask
) {
1860 if ( *pszTxt
== wxT('\0') )
1863 // pszTxt and pszMask will be incremented in the loop statement
1869 // remember where we started to be able to backtrack later
1870 pszLastStarInText
= pszTxt
;
1871 pszLastStarInMask
= pszMask
;
1873 // ignore special chars immediately following this one
1874 // (should this be an error?)
1875 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
1878 // if there is nothing more, match
1879 if ( *pszMask
== wxT('\0') )
1882 // are there any other metacharacters in the mask?
1884 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
1886 if ( pEndMask
!= NULL
) {
1887 // we have to match the string between two metachars
1888 uiLenMask
= pEndMask
- pszMask
;
1891 // we have to match the remainder of the string
1892 uiLenMask
= wxStrlen(pszMask
);
1895 wxString
strToMatch(pszMask
, uiLenMask
);
1896 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
1897 if ( pMatch
== NULL
)
1900 // -1 to compensate "++" in the loop
1901 pszTxt
= pMatch
+ uiLenMask
- 1;
1902 pszMask
+= uiLenMask
- 1;
1907 if ( *pszMask
!= *pszTxt
)
1913 // match only if nothing left
1914 if ( *pszTxt
== wxT('\0') )
1917 // if we failed to match, backtrack if we can
1918 if ( pszLastStarInText
) {
1919 pszTxt
= pszLastStarInText
+ 1;
1920 pszMask
= pszLastStarInMask
;
1922 pszLastStarInText
= NULL
;
1924 // don't bother resetting pszLastStarInMask, it's unnecessary
1930 #endif // wxUSE_REGEX/!wxUSE_REGEX
1933 // Count the number of chars
1934 int wxString::Freq(wxUniChar ch
) const
1937 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1945 // convert to upper case, return the copy of the string
1946 wxString
wxString::Upper() const
1947 { wxString
s(*this); return s
.MakeUpper(); }
1949 // convert to lower case, return the copy of the string
1950 wxString
wxString::Lower() const { wxString
s(*this); return s
.MakeLower(); }
1952 // ----------------------------------------------------------------------------
1953 // wxUTF8StringBuffer
1954 // ----------------------------------------------------------------------------
1956 #if wxUSE_UNICODE_WCHAR
1957 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1959 wxMBConvStrictUTF8 conv
;
1960 size_t wlen
= conv
.ToWChar(NULL
, 0, m_buf
);
1961 wxCHECK_RET( wlen
!= wxCONV_FAILED
, "invalid UTF-8 data in string buffer?" );
1963 wxStringInternalBuffer
wbuf(m_str
, wlen
);
1964 conv
.ToWChar(wbuf
, wlen
, m_buf
);
1967 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
1969 wxCHECK_RET(m_lenSet
, "length not set");
1971 wxMBConvStrictUTF8 conv
;
1972 size_t wlen
= conv
.ToWChar(NULL
, 0, m_buf
, m_len
);
1973 wxCHECK_RET( wlen
!= wxCONV_FAILED
, "invalid UTF-8 data in string buffer?" );
1975 wxStringInternalBufferLength
wbuf(m_str
, wlen
);
1976 conv
.ToWChar(wbuf
, wlen
, m_buf
, m_len
);
1977 wbuf
.SetLength(wlen
);
1979 #endif // wxUSE_UNICODE_WCHAR