1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
41 #include "wx/hashmap.h"
43 // string handling functions used by wxString:
44 #if wxUSE_UNICODE_UTF8
45 #define wxStringMemcpy memcpy
46 #define wxStringMemcmp memcmp
47 #define wxStringMemchr memchr
48 #define wxStringStrlen strlen
50 #define wxStringMemcpy wxTmemcpy
51 #define wxStringMemcmp wxTmemcmp
52 #define wxStringMemchr wxTmemchr
53 #define wxStringStrlen wxStrlen
57 // ---------------------------------------------------------------------------
58 // static class variables definition
59 // ---------------------------------------------------------------------------
61 //According to STL _must_ be a -1 size_t
62 const size_t wxString::npos
= (size_t) -1;
64 // ----------------------------------------------------------------------------
66 // ----------------------------------------------------------------------------
68 #if wxUSE_STD_IOSTREAM
72 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
74 // FIXME-UTF8: always, not only if wxUSE_UNICODE
75 #if wxUSE_UNICODE && !defined(__BORLANDC__)
76 return os
<< (const wchar_t*)str
.AsWCharBuf();
78 return os
<< (const char*)str
.AsCharBuf();
82 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
84 return os
<< str
.c_str();
87 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCharBuffer
& str
)
89 return os
<< str
.data();
93 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxWCharBuffer
& str
)
95 return os
<< str
.data();
99 #endif // wxUSE_STD_IOSTREAM
101 // ===========================================================================
102 // wxString class core
103 // ===========================================================================
105 #if wxUSE_UNICODE_UTF8
107 void wxString::PosLenToImpl(size_t pos
, size_t len
,
108 size_t *implPos
, size_t *implLen
) const
114 const_iterator i
= begin() + pos
;
115 *implPos
= wxStringImpl::const_iterator(i
.impl()) - m_impl
.begin();
120 // too large length is interpreted as "to the end of the string"
121 // FIXME-UTF8: verify this is the case in std::string, assert
123 if ( pos
+ len
> length() )
124 len
= length() - pos
;
126 *implLen
= (i
+ len
).impl() - i
.impl();
131 #endif // wxUSE_UNICODE_UTF8
133 // ----------------------------------------------------------------------------
134 // wxCStrData converted strings caching
135 // ----------------------------------------------------------------------------
137 // FIXME-UTF8: temporarily disabled because it doesn't work with global
138 // string objects; re-enable after fixing this bug and benchmarking
139 // performance to see if using a hash is a good idea at all
142 // For backward compatibility reasons, it must be possible to assign the value
143 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
144 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
145 // because the memory would be freed immediately, but it has to be valid as long
146 // as the string is not modified, so that code like this still works:
148 // const wxChar *s = str.c_str();
149 // while ( s ) { ... }
151 // FIXME-UTF8: not thread safe!
152 // FIXME-UTF8: we currently clear the cached conversion only when the string is
153 // destroyed, but we should do it when the string is modified, to
154 // keep memory usage down
155 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
156 // invalidated the cache on every change, we could keep the previous
158 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
159 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
162 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
164 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
165 if ( i
!= hash
.end() )
173 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
174 // so we have to use wxString* here and const-cast when used
175 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
176 wxStringCharConversionCache
);
177 static wxStringCharConversionCache gs_stringsCharCache
;
179 const char* wxCStrData::AsChar() const
181 // remove previously cache value, if any (see FIXMEs above):
182 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
184 // convert the string and keep it:
185 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
186 m_str
->mb_str().release();
190 #endif // wxUSE_UNICODE
192 #if !wxUSE_UNICODE_WCHAR
193 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
194 wxStringWCharConversionCache
);
195 static wxStringWCharConversionCache gs_stringsWCharCache
;
197 const wchar_t* wxCStrData::AsWChar() const
199 // remove previously cache value, if any (see FIXMEs above):
200 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
202 // convert the string and keep it:
203 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
204 m_str
->wc_str().release();
208 #endif // !wxUSE_UNICODE_WCHAR
210 wxString::~wxString()
213 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
214 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
216 #if !wxUSE_UNICODE_WCHAR
217 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
223 const char* wxCStrData::AsChar() const
225 wxString
*str
= wxConstCast(m_str
, wxString
);
227 // convert the string:
228 wxCharBuffer
buf(str
->mb_str());
230 // FIXME-UTF8: do the conversion in-place in the existing buffer
231 if ( str
->m_convertedToChar
&&
232 strlen(buf
) == strlen(str
->m_convertedToChar
) )
234 // keep the same buffer for as long as possible, so that several calls
235 // to c_str() in a row still work:
236 strcpy(str
->m_convertedToChar
, buf
);
240 str
->m_convertedToChar
= buf
.release();
244 return str
->m_convertedToChar
+ m_offset
;
246 #endif // wxUSE_UNICODE
248 #if !wxUSE_UNICODE_WCHAR
249 const wchar_t* wxCStrData::AsWChar() const
251 wxString
*str
= wxConstCast(m_str
, wxString
);
253 // convert the string:
254 wxWCharBuffer
buf(str
->wc_str());
256 // FIXME-UTF8: do the conversion in-place in the existing buffer
257 if ( str
->m_convertedToWChar
&&
258 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
260 // keep the same buffer for as long as possible, so that several calls
261 // to c_str() in a row still work:
262 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
266 str
->m_convertedToWChar
= buf
.release();
270 return str
->m_convertedToWChar
+ m_offset
;
272 #endif // !wxUSE_UNICODE_WCHAR
274 // ===========================================================================
275 // wxString class core
276 // ===========================================================================
278 // ---------------------------------------------------------------------------
279 // construction and conversion
280 // ---------------------------------------------------------------------------
282 #if wxUSE_UNICODE_WCHAR
284 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
285 const wxMBConv
& conv
)
288 if ( !psz
|| nLength
== 0 )
289 return SubstrBufFromMB(L
"", 0);
291 if ( nLength
== npos
)
295 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
297 return SubstrBufFromMB(_T(""), 0);
299 return SubstrBufFromMB(wcBuf
, wcLen
);
301 #endif // wxUSE_UNICODE_WCHAR
303 #if wxUSE_UNICODE_UTF8
305 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
306 const wxMBConv
& conv
)
308 // FIXME-UTF8: return as-is without copying under UTF8 locale, return
309 // converted string under other locales - needs wxCharBuffer
313 if ( !psz
|| nLength
== 0 )
314 return SubstrBufFromMB("", 0);
316 if ( nLength
== npos
)
319 // first convert to wide string:
321 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
323 return SubstrBufFromMB("", 0);
325 // and then to UTF-8:
326 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxConvUTF8
));
327 // widechar -> UTF-8 conversion isn't supposed to ever fail:
328 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
332 #endif // wxUSE_UNICODE_UTF8
334 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
336 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
337 const wxMBConv
& conv
)
340 if ( !pwz
|| nLength
== 0 )
341 return SubstrBufFromWC("", 0);
343 if ( nLength
== npos
)
347 wxCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
349 return SubstrBufFromWC("", 0);
351 return SubstrBufFromWC(mbBuf
, mbLen
);
353 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
356 #if wxUSE_UNICODE_WCHAR
358 //Convert wxString in Unicode mode to a multi-byte string
359 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
361 return conv
.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL
);
364 #elif wxUSE_UNICODE_UTF8
366 const wxWCharBuffer
wxString::wc_str() const
368 return wxConvUTF8
.cMB2WC(m_impl
.c_str(),
369 m_impl
.length() + 1 /* size, not length */,
373 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
375 // FIXME-UTF8: optimize the case when conv==wxConvUTF8 or wxConvLibc
377 // FIXME-UTF8: use wc_str() here once we have buffers with length
381 wxConvUTF8
.cMB2WC(m_impl
.c_str(),
382 m_impl
.length() + 1 /* size, not length */,
385 return wxCharBuffer("");
387 return conv
.cWC2MB(wcBuf
, wcLen
, NULL
);
392 //Converts this string to a wide character string if unicode
393 //mode is not enabled and wxUSE_WCHAR_T is enabled
394 const wxWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
396 return conv
.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL
);
399 #endif // Unicode/ANSI
401 // shrink to minimal size (releasing extra memory)
402 bool wxString::Shrink()
404 wxString
tmp(begin(), end());
406 return tmp
.length() == length();
409 // deprecated compatibility code:
410 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
411 wxChar
*wxString::GetWriteBuf(size_t nLen
)
413 return DoGetWriteBuf(nLen
);
416 void wxString::UngetWriteBuf()
421 void wxString::UngetWriteBuf(size_t nLen
)
423 DoUngetWriteBuf(nLen
);
425 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
428 // ---------------------------------------------------------------------------
430 // ---------------------------------------------------------------------------
432 // all functions are inline in string.h
434 // ---------------------------------------------------------------------------
435 // concatenation operators
436 // ---------------------------------------------------------------------------
439 * concatenation functions come in 5 flavours:
441 * char + string and string + char
442 * C str + string and string + C str
445 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
447 #if !wxUSE_STL_BASED_WXSTRING
448 wxASSERT( str1
.IsValid() );
449 wxASSERT( str2
.IsValid() );
458 wxString
operator+(const wxString
& str
, wxUniChar ch
)
460 #if !wxUSE_STL_BASED_WXSTRING
461 wxASSERT( str
.IsValid() );
470 wxString
operator+(wxUniChar ch
, const wxString
& str
)
472 #if !wxUSE_STL_BASED_WXSTRING
473 wxASSERT( str
.IsValid() );
482 wxString
operator+(const wxString
& str
, const char *psz
)
484 #if !wxUSE_STL_BASED_WXSTRING
485 wxASSERT( str
.IsValid() );
489 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
490 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
498 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
500 #if !wxUSE_STL_BASED_WXSTRING
501 wxASSERT( str
.IsValid() );
505 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
506 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
514 wxString
operator+(const char *psz
, const wxString
& str
)
516 #if !wxUSE_STL_BASED_WXSTRING
517 wxASSERT( str
.IsValid() );
521 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
522 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
530 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
532 #if !wxUSE_STL_BASED_WXSTRING
533 wxASSERT( str
.IsValid() );
537 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
538 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
546 // ---------------------------------------------------------------------------
548 // ---------------------------------------------------------------------------
550 #ifdef HAVE_STD_STRING_COMPARE
552 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
553 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
554 // sort strings in characters code point order by sorting the byte sequence
555 // in byte values order (i.e. what strcmp() and memcmp() do).
557 int wxString::compare(const wxString
& str
) const
559 return m_impl
.compare(str
.m_impl
);
562 int wxString::compare(size_t nStart
, size_t nLen
,
563 const wxString
& str
) const
566 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
567 return m_impl
.compare(pos
, len
, str
.m_impl
);
570 int wxString::compare(size_t nStart
, size_t nLen
,
572 size_t nStart2
, size_t nLen2
) const
575 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
578 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
580 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
583 int wxString::compare(const char* sz
) const
585 return m_impl
.compare(ImplStr(sz
));
588 int wxString::compare(const wchar_t* sz
) const
590 return m_impl
.compare(ImplStr(sz
));
593 int wxString::compare(size_t nStart
, size_t nLen
,
594 const char* sz
, size_t nCount
) const
597 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
599 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
601 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
604 int wxString::compare(size_t nStart
, size_t nLen
,
605 const wchar_t* sz
, size_t nCount
) const
608 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
610 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
612 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
615 #else // !HAVE_STD_STRING_COMPARE
617 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
618 const wxStringCharType
* s2
, size_t l2
)
621 return wxStringMemcmp(s1
, s2
, l1
);
624 int ret
= wxStringMemcmp(s1
, s2
, l1
);
625 return ret
== 0 ? -1 : ret
;
629 int ret
= wxStringMemcmp(s1
, s2
, l2
);
630 return ret
== 0 ? +1 : ret
;
634 int wxString::compare(const wxString
& str
) const
636 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
637 str
.m_impl
.data(), str
.m_impl
.length());
640 int wxString::compare(size_t nStart
, size_t nLen
,
641 const wxString
& str
) const
643 wxASSERT(nStart
<= length());
644 size_type strLen
= length() - nStart
;
645 nLen
= strLen
< nLen
? strLen
: nLen
;
648 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
650 return ::wxDoCmp(m_impl
.data() + pos
, len
,
651 str
.m_impl
.data(), str
.m_impl
.length());
654 int wxString::compare(size_t nStart
, size_t nLen
,
656 size_t nStart2
, size_t nLen2
) const
658 wxASSERT(nStart
<= length());
659 wxASSERT(nStart2
<= str
.length());
660 size_type strLen
= length() - nStart
,
661 strLen2
= str
.length() - nStart2
;
662 nLen
= strLen
< nLen
? strLen
: nLen
;
663 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
666 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
668 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
670 return ::wxDoCmp(m_impl
.data() + pos
, len
,
671 str
.m_impl
.data() + pos2
, len2
);
674 int wxString::compare(const char* sz
) const
676 SubstrBufFromMB
str(ImplStr(sz
, npos
));
677 if ( str
.len
== npos
)
678 str
.len
= wxStringStrlen(str
.data
);
679 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
682 int wxString::compare(const wchar_t* sz
) const
684 SubstrBufFromWC
str(ImplStr(sz
, npos
));
685 if ( str
.len
== npos
)
686 str
.len
= wxStringStrlen(str
.data
);
687 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
690 int wxString::compare(size_t nStart
, size_t nLen
,
691 const char* sz
, size_t nCount
) const
693 wxASSERT(nStart
<= length());
694 size_type strLen
= length() - nStart
;
695 nLen
= strLen
< nLen
? strLen
: nLen
;
698 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
700 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
701 if ( str
.len
== npos
)
702 str
.len
= wxStringStrlen(str
.data
);
704 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
707 int wxString::compare(size_t nStart
, size_t nLen
,
708 const wchar_t* sz
, size_t nCount
) const
710 wxASSERT(nStart
<= length());
711 size_type strLen
= length() - nStart
;
712 nLen
= strLen
< nLen
? strLen
: nLen
;
715 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
717 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
718 if ( str
.len
== npos
)
719 str
.len
= wxStringStrlen(str
.data
);
721 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
724 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
727 // ---------------------------------------------------------------------------
728 // find_{first,last}_[not]_of functions
729 // ---------------------------------------------------------------------------
731 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
733 // NB: All these functions are implemented with the argument being wxChar*,
734 // i.e. widechar string in any Unicode build, even though native string
735 // representation is char* in the UTF-8 build. This is because we couldn't
736 // use memchr() to determine if a character is in a set encoded as UTF-8.
738 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
740 return find_first_of(sz
, nStart
, wxStrlen(sz
));
743 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
745 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
748 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
750 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
753 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
755 if ( wxTmemchr(sz
, *i
, n
) )
762 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
764 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
767 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
769 if ( !wxTmemchr(sz
, *i
, n
) )
777 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
779 return find_last_of(sz
, nStart
, wxStrlen(sz
));
782 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
784 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
787 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
789 size_t len
= length();
791 if ( nStart
== npos
)
797 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
801 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
802 i
!= rend(); --idx
, ++i
)
804 if ( wxTmemchr(sz
, *i
, n
) )
811 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
813 size_t len
= length();
815 if ( nStart
== npos
)
821 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
825 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
826 i
!= rend(); --idx
, ++i
)
828 if ( !wxTmemchr(sz
, *i
, n
) )
835 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
837 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
840 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
849 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
851 size_t len
= length();
853 if ( nStart
== npos
)
859 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
863 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
864 i
!= rend(); --idx
, ++i
)
873 // the functions above were implemented for wchar_t* arguments in Unicode
874 // build and char* in ANSI build; below are implementations for the other
877 #define wxOtherCharType char
878 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
880 #define wxOtherCharType wchar_t
881 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
884 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
885 { return find_first_of(STRCONV(sz
), nStart
); }
887 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
889 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
890 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
891 { return find_last_of(STRCONV(sz
), nStart
); }
892 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
894 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
895 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
896 { return find_first_not_of(STRCONV(sz
), nStart
); }
897 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
899 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
900 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
901 { return find_last_not_of(STRCONV(sz
), nStart
); }
902 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
904 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
906 #undef wxOtherCharType
909 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
911 // ===========================================================================
912 // other common string functions
913 // ===========================================================================
915 int wxString::CmpNoCase(const wxString
& s
) const
917 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
920 const_iterator i1
= begin();
921 const_iterator end1
= end();
922 const_iterator i2
= s
.begin();
923 const_iterator end2
= s
.end();
925 for ( ; i1
!= end1
&& i2
!= end2
; ++idx
, ++i1
, ++i2
)
927 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
928 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
929 if ( lower1
!= lower2
)
930 return lower1
< lower2
? -1 : 1;
933 size_t len1
= length();
934 size_t len2
= s
.length();
938 else if ( len1
> len2
)
947 #ifndef __SCHAR_MAX__
948 #define __SCHAR_MAX__ 127
952 wxString
wxString::FromAscii(const char *ascii
)
955 return wxEmptyString
;
957 size_t len
= strlen( ascii
);
962 wxStringBuffer
buf(res
, len
);
968 if ( (*dest
++ = (wchar_t)(unsigned char)*ascii
++) == L
'\0' )
976 wxString
wxString::FromAscii(const char ascii
)
978 // What do we do with '\0' ?
981 res
+= (wchar_t)(unsigned char) ascii
;
986 const wxCharBuffer
wxString::ToAscii() const
988 // this will allocate enough space for the terminating NUL too
989 wxCharBuffer
buffer(length());
992 char *dest
= buffer
.data();
994 const wchar_t *pwc
= c_str();
997 *dest
++ = (char)(*pwc
> SCHAR_MAX
? wxT('_') : *pwc
);
999 // the output string can't have embedded NULs anyhow, so we can safely
1000 // stop at first of them even if we do have any
1010 // extract string of length nCount starting at nFirst
1011 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1013 size_t nLen
= length();
1015 // default value of nCount is npos and means "till the end"
1016 if ( nCount
== npos
)
1018 nCount
= nLen
- nFirst
;
1021 // out-of-bounds requests return sensible things
1022 if ( nFirst
+ nCount
> nLen
)
1024 nCount
= nLen
- nFirst
;
1027 if ( nFirst
> nLen
)
1029 // AllocCopy() will return empty string
1030 return wxEmptyString
;
1033 wxString
dest(*this, nFirst
, nCount
);
1034 if ( dest
.length() != nCount
)
1036 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1042 // check that the string starts with prefix and return the rest of the string
1043 // in the provided pointer if it is not NULL, otherwise return false
1044 bool wxString::StartsWith(const wxChar
*prefix
, wxString
*rest
) const
1046 wxASSERT_MSG( prefix
, _T("invalid parameter in wxString::StartsWith") );
1048 // first check if the beginning of the string matches the prefix: note
1049 // that we don't have to check that we don't run out of this string as
1050 // when we reach the terminating NUL, either prefix string ends too (and
1051 // then it's ok) or we break out of the loop because there is no match
1052 const wxChar
*p
= c_str();
1055 if ( *prefix
++ != *p
++ )
1064 // put the rest of the string into provided pointer
1072 // check that the string ends with suffix and return the rest of it in the
1073 // provided pointer if it is not NULL, otherwise return false
1074 bool wxString::EndsWith(const wxChar
*suffix
, wxString
*rest
) const
1076 wxASSERT_MSG( suffix
, _T("invalid parameter in wxString::EndssWith") );
1078 int start
= length() - wxStrlen(suffix
);
1080 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1085 // put the rest of the string into provided pointer
1086 rest
->assign(*this, 0, start
);
1093 // extract nCount last (rightmost) characters
1094 wxString
wxString::Right(size_t nCount
) const
1096 if ( nCount
> length() )
1099 wxString
dest(*this, length() - nCount
, nCount
);
1100 if ( dest
.length() != nCount
) {
1101 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1106 // get all characters after the last occurence of ch
1107 // (returns the whole string if ch not found)
1108 wxString
wxString::AfterLast(wxUniChar ch
) const
1111 int iPos
= Find(ch
, true);
1112 if ( iPos
== wxNOT_FOUND
)
1115 str
= wx_str() + iPos
+ 1;
1120 // extract nCount first (leftmost) characters
1121 wxString
wxString::Left(size_t nCount
) const
1123 if ( nCount
> length() )
1126 wxString
dest(*this, 0, nCount
);
1127 if ( dest
.length() != nCount
) {
1128 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1133 // get all characters before the first occurence of ch
1134 // (returns the whole string if ch not found)
1135 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1137 int iPos
= Find(ch
);
1138 if ( iPos
== wxNOT_FOUND
) iPos
= length();
1139 return wxString(*this, 0, iPos
);
1142 /// get all characters before the last occurence of ch
1143 /// (returns empty string if ch not found)
1144 wxString
wxString::BeforeLast(wxUniChar ch
) const
1147 int iPos
= Find(ch
, true);
1148 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1149 str
= wxString(c_str(), iPos
);
1154 /// get all characters after the first occurence of ch
1155 /// (returns empty string if ch not found)
1156 wxString
wxString::AfterFirst(wxUniChar ch
) const
1159 int iPos
= Find(ch
);
1160 if ( iPos
!= wxNOT_FOUND
)
1161 str
= wx_str() + iPos
+ 1;
1166 // replace first (or all) occurences of some substring with another one
1167 size_t wxString::Replace(const wxString
& strOld
,
1168 const wxString
& strNew
, bool bReplaceAll
)
1170 // if we tried to replace an empty string we'd enter an infinite loop below
1171 wxCHECK_MSG( !strOld
.empty(), 0,
1172 _T("wxString::Replace(): invalid parameter") );
1174 size_t uiCount
= 0; // count of replacements made
1176 size_t uiOldLen
= strOld
.length();
1177 size_t uiNewLen
= strNew
.length();
1181 while ( (*this)[dwPos
] != wxT('\0') )
1183 //DO NOT USE STRSTR HERE
1184 //this string can contain embedded null characters,
1185 //so strstr will function incorrectly
1186 dwPos
= find(strOld
, dwPos
);
1187 if ( dwPos
== npos
)
1188 break; // exit the loop
1191 //replace this occurance of the old string with the new one
1192 replace(dwPos
, uiOldLen
, strNew
, uiNewLen
);
1194 //move up pos past the string that was replaced
1197 //increase replace count
1202 break; // exit the loop
1209 bool wxString::IsAscii() const
1211 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1213 if ( !(*i
).IsAscii() )
1220 bool wxString::IsWord() const
1222 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1224 if ( !wxIsalpha(*i
) )
1231 bool wxString::IsNumber() const
1236 const_iterator i
= begin();
1238 if ( *i
== _T('-') || *i
== _T('+') )
1241 for ( ; i
!= end(); ++i
)
1243 if ( !wxIsdigit(*i
) )
1250 wxString
wxString::Strip(stripType w
) const
1253 if ( w
& leading
) s
.Trim(false);
1254 if ( w
& trailing
) s
.Trim(true);
1258 // ---------------------------------------------------------------------------
1260 // ---------------------------------------------------------------------------
1262 wxString
& wxString::MakeUpper()
1264 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1265 *it
= (wxChar
)wxToupper(*it
);
1270 wxString
& wxString::MakeLower()
1272 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1273 *it
= (wxChar
)wxTolower(*it
);
1278 // ---------------------------------------------------------------------------
1279 // trimming and padding
1280 // ---------------------------------------------------------------------------
1282 // some compilers (VC++ 6.0 not to name them) return true for a call to
1283 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1284 // live with this by checking that the character is a 7 bit one - even if this
1285 // may fail to detect some spaces (I don't know if Unicode doesn't have
1286 // space-like symbols somewhere except in the first 128 chars), it is arguably
1287 // still better than trimming away accented letters
1288 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1290 // trims spaces (in the sense of isspace) from left or right side
1291 wxString
& wxString::Trim(bool bFromRight
)
1293 // first check if we're going to modify the string at all
1296 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1297 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1303 // find last non-space character
1304 reverse_iterator psz
= rbegin();
1305 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1308 // truncate at trailing space start
1309 erase(psz
.base(), end());
1313 // find first non-space character
1314 iterator psz
= begin();
1315 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1318 // fix up data and length
1319 erase(begin(), psz
);
1326 // adds nCount characters chPad to the string from either side
1327 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1329 wxString
s(chPad
, nCount
);
1342 // truncate the string
1343 wxString
& wxString::Truncate(size_t uiLen
)
1345 if ( uiLen
< length() )
1347 erase(begin() + uiLen
, end());
1349 //else: nothing to do, string is already short enough
1354 // ---------------------------------------------------------------------------
1355 // finding (return wxNOT_FOUND if not found and index otherwise)
1356 // ---------------------------------------------------------------------------
1359 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1361 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1363 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1366 // ----------------------------------------------------------------------------
1367 // conversion to numbers
1368 // ----------------------------------------------------------------------------
1370 // the implementation of all the functions below is exactly the same so factor
1373 template <typename T
, typename F
>
1374 bool wxStringToIntType(const wxChar
*start
,
1379 wxCHECK_MSG( val
, false, _T("NULL output pointer") );
1380 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1387 *val
= (*func
)(start
, &end
, base
);
1389 // return true only if scan was stopped by the terminating NUL and if the
1390 // string was not empty to start with and no under/overflow occurred
1391 return !*end
&& (end
!= start
)
1393 && (errno
!= ERANGE
)
1398 bool wxString::ToLong(long *val
, int base
) const
1400 return wxStringToIntType((const wxChar
*)c_str(), val
, base
, wxStrtol
);
1403 bool wxString::ToULong(unsigned long *val
, int base
) const
1405 return wxStringToIntType((const wxChar
*)c_str(), val
, base
, wxStrtoul
);
1408 bool wxString::ToLongLong(wxLongLong_t
*val
, int base
) const
1410 return wxStringToIntType((const wxChar
*)c_str(), val
, base
, wxStrtoll
);
1413 bool wxString::ToULongLong(wxULongLong_t
*val
, int base
) const
1415 return wxStringToIntType((const wxChar
*)c_str(), val
, base
, wxStrtoull
);
1418 bool wxString::ToDouble(double *val
) const
1420 wxCHECK_MSG( val
, false, _T("NULL pointer in wxString::ToDouble") );
1426 const wxChar
*start
= c_str();
1428 *val
= wxStrtod(start
, &end
);
1430 // return true only if scan was stopped by the terminating NUL and if the
1431 // string was not empty to start with and no under/overflow occurred
1432 return !*end
&& (end
!= start
)
1434 && (errno
!= ERANGE
)
1439 // ---------------------------------------------------------------------------
1441 // ---------------------------------------------------------------------------
1444 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1445 wxString
wxStringPrintfMixinBase::DoFormat(const wxChar
*format
, ...)
1447 wxString
wxString::DoFormat(const wxChar
*format
, ...)
1451 va_start(argptr
, format
);
1454 s
.PrintfV(format
, argptr
);
1462 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1465 s
.PrintfV(format
, argptr
);
1469 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1470 int wxStringPrintfMixinBase::DoPrintf(const wxChar
*format
, ...)
1472 int wxString::DoPrintf(const wxChar
*format
, ...)
1476 va_start(argptr
, format
);
1478 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1479 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1480 // because it's the only cast that works safely for downcasting when
1481 // multiple inheritance is used:
1482 wxString
*str
= static_cast<wxString
*>(this);
1484 wxString
*str
= this;
1487 int iLen
= str
->PrintfV(format
, argptr
);
1494 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
1500 wxStringBuffer
tmp(*this, size
+ 1);
1509 // wxVsnprintf() may modify the original arg pointer, so pass it
1512 wxVaCopy(argptrcopy
, argptr
);
1513 int len
= wxVsnprintf(buf
, size
, (const wxChar
*)/*FIXME-UTF8*/format
, argptrcopy
);
1516 // some implementations of vsnprintf() don't NUL terminate
1517 // the string if there is not enough space for it so
1518 // always do it manually
1519 buf
[size
] = _T('\0');
1521 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1522 // total number of characters which would have been written if the
1523 // buffer were large enough (newer standards such as Unix98)
1526 #if wxUSE_WXVSNPRINTF
1527 // we know that our own implementation of wxVsnprintf() returns -1
1528 // only for a format error - thus there's something wrong with
1529 // the user's format string
1531 #else // assume that system version only returns error if not enough space
1532 // still not enough, as we don't know how much we need, double the
1533 // current size of the buffer
1535 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1537 else if ( len
>= size
)
1539 #if wxUSE_WXVSNPRINTF
1540 // we know that our own implementation of wxVsnprintf() returns
1541 // size+1 when there's not enough space but that's not the size
1542 // of the required buffer!
1543 size
*= 2; // so we just double the current size of the buffer
1545 // some vsnprintf() implementations NUL-terminate the buffer and
1546 // some don't in len == size case, to be safe always add 1
1550 else // ok, there was enough space
1556 // we could have overshot
1562 // ----------------------------------------------------------------------------
1563 // misc other operations
1564 // ----------------------------------------------------------------------------
1566 // returns true if the string matches the pattern which may contain '*' and
1567 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1569 bool wxString::Matches(const wxString
& mask
) const
1571 // I disable this code as it doesn't seem to be faster (in fact, it seems
1572 // to be much slower) than the old, hand-written code below and using it
1573 // here requires always linking with libregex even if the user code doesn't
1575 #if 0 // wxUSE_REGEX
1576 // first translate the shell-like mask into a regex
1578 pattern
.reserve(wxStrlen(pszMask
));
1590 pattern
+= _T(".*");
1601 // these characters are special in a RE, quote them
1602 // (however note that we don't quote '[' and ']' to allow
1603 // using them for Unix shell like matching)
1604 pattern
+= _T('\\');
1608 pattern
+= *pszMask
;
1616 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
1617 #else // !wxUSE_REGEX
1618 // TODO: this is, of course, awfully inefficient...
1620 // FIXME-UTF8: implement using iterators, remove #if
1621 #if wxUSE_UNICODE_UTF8
1622 wxWCharBuffer maskBuf
= mask
.wc_str();
1623 wxWCharBuffer txtBuf
= wc_str();
1624 const wxChar
*pszMask
= maskBuf
.data();
1625 const wxChar
*pszTxt
= txtBuf
.data();
1627 const wxChar
*pszMask
= mask
.wx_str();
1628 // the char currently being checked
1629 const wxChar
*pszTxt
= wx_str();
1632 // the last location where '*' matched
1633 const wxChar
*pszLastStarInText
= NULL
;
1634 const wxChar
*pszLastStarInMask
= NULL
;
1637 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
1638 switch ( *pszMask
) {
1640 if ( *pszTxt
== wxT('\0') )
1643 // pszTxt and pszMask will be incremented in the loop statement
1649 // remember where we started to be able to backtrack later
1650 pszLastStarInText
= pszTxt
;
1651 pszLastStarInMask
= pszMask
;
1653 // ignore special chars immediately following this one
1654 // (should this be an error?)
1655 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
1658 // if there is nothing more, match
1659 if ( *pszMask
== wxT('\0') )
1662 // are there any other metacharacters in the mask?
1664 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
1666 if ( pEndMask
!= NULL
) {
1667 // we have to match the string between two metachars
1668 uiLenMask
= pEndMask
- pszMask
;
1671 // we have to match the remainder of the string
1672 uiLenMask
= wxStrlen(pszMask
);
1675 wxString
strToMatch(pszMask
, uiLenMask
);
1676 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
1677 if ( pMatch
== NULL
)
1680 // -1 to compensate "++" in the loop
1681 pszTxt
= pMatch
+ uiLenMask
- 1;
1682 pszMask
+= uiLenMask
- 1;
1687 if ( *pszMask
!= *pszTxt
)
1693 // match only if nothing left
1694 if ( *pszTxt
== wxT('\0') )
1697 // if we failed to match, backtrack if we can
1698 if ( pszLastStarInText
) {
1699 pszTxt
= pszLastStarInText
+ 1;
1700 pszMask
= pszLastStarInMask
;
1702 pszLastStarInText
= NULL
;
1704 // don't bother resetting pszLastStarInMask, it's unnecessary
1710 #endif // wxUSE_REGEX/!wxUSE_REGEX
1713 // Count the number of chars
1714 int wxString::Freq(wxUniChar ch
) const
1717 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1725 // convert to upper case, return the copy of the string
1726 wxString
wxString::Upper() const
1727 { wxString
s(*this); return s
.MakeUpper(); }
1729 // convert to lower case, return the copy of the string
1730 wxString
wxString::Lower() const { wxString
s(*this); return s
.MakeLower(); }