1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
42 #include "wx/hashmap.h"
44 // string handling functions used by wxString:
45 #if wxUSE_UNICODE_UTF8
46 #define wxStringMemcpy memcpy
47 #define wxStringMemcmp memcmp
48 #define wxStringMemchr memchr
49 #define wxStringStrlen strlen
51 #define wxStringMemcpy wxTmemcpy
52 #define wxStringMemcmp wxTmemcmp
53 #define wxStringMemchr wxTmemchr
54 #define wxStringStrlen wxStrlen
58 // ---------------------------------------------------------------------------
59 // static class variables definition
60 // ---------------------------------------------------------------------------
62 //According to STL _must_ be a -1 size_t
63 const size_t wxString::npos
= (size_t) -1;
65 // ----------------------------------------------------------------------------
67 // ----------------------------------------------------------------------------
69 #if wxUSE_STD_IOSTREAM
73 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
75 // FIXME-UTF8: always, not only if wxUSE_UNICODE
76 #if wxUSE_UNICODE && !defined(__BORLANDC__)
77 return os
<< (const wchar_t*)str
.AsWCharBuf();
79 return os
<< (const char*)str
.AsCharBuf();
83 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
85 return os
<< str
.c_str();
88 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCharBuffer
& str
)
90 return os
<< str
.data();
94 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxWCharBuffer
& str
)
96 return os
<< str
.data();
100 #endif // wxUSE_STD_IOSTREAM
102 // ===========================================================================
103 // wxString class core
104 // ===========================================================================
106 #if wxUSE_UNICODE_UTF8
108 void wxString::PosLenToImpl(size_t pos
, size_t len
,
109 size_t *implPos
, size_t *implLen
) const
115 const_iterator i
= begin() + pos
;
116 *implPos
= wxStringImpl::const_iterator(i
.impl()) - m_impl
.begin();
121 // too large length is interpreted as "to the end of the string"
122 // FIXME-UTF8: verify this is the case in std::string, assert
124 if ( pos
+ len
> length() )
125 len
= length() - pos
;
127 *implLen
= (i
+ len
).impl() - i
.impl();
132 #endif // wxUSE_UNICODE_UTF8
134 // ----------------------------------------------------------------------------
135 // wxCStrData converted strings caching
136 // ----------------------------------------------------------------------------
138 // FIXME-UTF8: temporarily disabled because it doesn't work with global
139 // string objects; re-enable after fixing this bug and benchmarking
140 // performance to see if using a hash is a good idea at all
143 // For backward compatibility reasons, it must be possible to assign the value
144 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
145 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
146 // because the memory would be freed immediately, but it has to be valid as long
147 // as the string is not modified, so that code like this still works:
149 // const wxChar *s = str.c_str();
150 // while ( s ) { ... }
152 // FIXME-UTF8: not thread safe!
153 // FIXME-UTF8: we currently clear the cached conversion only when the string is
154 // destroyed, but we should do it when the string is modified, to
155 // keep memory usage down
156 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
157 // invalidated the cache on every change, we could keep the previous
159 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
160 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
163 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
165 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
166 if ( i
!= hash
.end() )
174 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
175 // so we have to use wxString* here and const-cast when used
176 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
177 wxStringCharConversionCache
);
178 static wxStringCharConversionCache gs_stringsCharCache
;
180 const char* wxCStrData::AsChar() const
182 // remove previously cache value, if any (see FIXMEs above):
183 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
185 // convert the string and keep it:
186 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
187 m_str
->mb_str().release();
191 #endif // wxUSE_UNICODE
193 #if !wxUSE_UNICODE_WCHAR
194 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
195 wxStringWCharConversionCache
);
196 static wxStringWCharConversionCache gs_stringsWCharCache
;
198 const wchar_t* wxCStrData::AsWChar() const
200 // remove previously cache value, if any (see FIXMEs above):
201 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
203 // convert the string and keep it:
204 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
205 m_str
->wc_str().release();
209 #endif // !wxUSE_UNICODE_WCHAR
211 wxString::~wxString()
214 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
215 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
217 #if !wxUSE_UNICODE_WCHAR
218 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
224 const char* wxCStrData::AsChar() const
226 wxString
*str
= wxConstCast(m_str
, wxString
);
228 // convert the string:
229 wxCharBuffer
buf(str
->mb_str());
231 // FIXME-UTF8: do the conversion in-place in the existing buffer
232 if ( str
->m_convertedToChar
&&
233 strlen(buf
) == strlen(str
->m_convertedToChar
) )
235 // keep the same buffer for as long as possible, so that several calls
236 // to c_str() in a row still work:
237 strcpy(str
->m_convertedToChar
, buf
);
241 str
->m_convertedToChar
= buf
.release();
245 return str
->m_convertedToChar
+ m_offset
;
247 #endif // wxUSE_UNICODE
249 #if !wxUSE_UNICODE_WCHAR
250 const wchar_t* wxCStrData::AsWChar() const
252 wxString
*str
= wxConstCast(m_str
, wxString
);
254 // convert the string:
255 wxWCharBuffer
buf(str
->wc_str());
257 // FIXME-UTF8: do the conversion in-place in the existing buffer
258 if ( str
->m_convertedToWChar
&&
259 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
261 // keep the same buffer for as long as possible, so that several calls
262 // to c_str() in a row still work:
263 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
267 str
->m_convertedToWChar
= buf
.release();
271 return str
->m_convertedToWChar
+ m_offset
;
273 #endif // !wxUSE_UNICODE_WCHAR
275 // ===========================================================================
276 // wxString class core
277 // ===========================================================================
279 // ---------------------------------------------------------------------------
280 // construction and conversion
281 // ---------------------------------------------------------------------------
283 #if wxUSE_UNICODE_WCHAR
285 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
286 const wxMBConv
& conv
)
289 if ( !psz
|| nLength
== 0 )
290 return SubstrBufFromMB(L
"", 0);
292 if ( nLength
== npos
)
296 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
298 return SubstrBufFromMB(_T(""), 0);
300 return SubstrBufFromMB(wcBuf
, wcLen
);
302 #endif // wxUSE_UNICODE_WCHAR
304 #if wxUSE_UNICODE_UTF8
306 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
307 const wxMBConv
& conv
)
309 // FIXME-UTF8: return as-is without copying under UTF8 locale, return
310 // converted string under other locales - needs wxCharBuffer
314 if ( !psz
|| nLength
== 0 )
315 return SubstrBufFromMB("", 0);
317 if ( nLength
== npos
)
320 // first convert to wide string:
322 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
324 return SubstrBufFromMB("", 0);
326 // and then to UTF-8:
327 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxConvUTF8
));
328 // widechar -> UTF-8 conversion isn't supposed to ever fail:
329 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
333 #endif // wxUSE_UNICODE_UTF8
335 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
337 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
338 const wxMBConv
& conv
)
341 if ( !pwz
|| nLength
== 0 )
342 return SubstrBufFromWC("", 0);
344 if ( nLength
== npos
)
348 wxCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
350 return SubstrBufFromWC("", 0);
352 return SubstrBufFromWC(mbBuf
, mbLen
);
354 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
357 #if wxUSE_UNICODE_WCHAR
359 //Convert wxString in Unicode mode to a multi-byte string
360 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
362 return conv
.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL
);
365 #elif wxUSE_UNICODE_UTF8
367 const wxWCharBuffer
wxString::wc_str() const
369 return wxConvUTF8
.cMB2WC(m_impl
.c_str(),
370 m_impl
.length() + 1 /* size, not length */,
374 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
376 // FIXME-UTF8: optimize the case when conv==wxConvUTF8 or wxConvLibc
378 // FIXME-UTF8: use wc_str() here once we have buffers with length
382 wxConvUTF8
.cMB2WC(m_impl
.c_str(),
383 m_impl
.length() + 1 /* size, not length */,
386 return wxCharBuffer("");
388 return conv
.cWC2MB(wcBuf
, wcLen
, NULL
);
393 //Converts this string to a wide character string if unicode
394 //mode is not enabled and wxUSE_WCHAR_T is enabled
395 const wxWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
397 return conv
.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL
);
400 #endif // Unicode/ANSI
402 // shrink to minimal size (releasing extra memory)
403 bool wxString::Shrink()
405 wxString
tmp(begin(), end());
407 return tmp
.length() == length();
410 // deprecated compatibility code:
411 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
412 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
414 return DoGetWriteBuf(nLen
);
417 void wxString::UngetWriteBuf()
422 void wxString::UngetWriteBuf(size_t nLen
)
424 DoUngetWriteBuf(nLen
);
426 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
429 // ---------------------------------------------------------------------------
431 // ---------------------------------------------------------------------------
433 // all functions are inline in string.h
435 // ---------------------------------------------------------------------------
436 // concatenation operators
437 // ---------------------------------------------------------------------------
440 * concatenation functions come in 5 flavours:
442 * char + string and string + char
443 * C str + string and string + C str
446 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
448 #if !wxUSE_STL_BASED_WXSTRING
449 wxASSERT( str1
.IsValid() );
450 wxASSERT( str2
.IsValid() );
459 wxString
operator+(const wxString
& str
, wxUniChar ch
)
461 #if !wxUSE_STL_BASED_WXSTRING
462 wxASSERT( str
.IsValid() );
471 wxString
operator+(wxUniChar ch
, const wxString
& str
)
473 #if !wxUSE_STL_BASED_WXSTRING
474 wxASSERT( str
.IsValid() );
483 wxString
operator+(const wxString
& str
, const char *psz
)
485 #if !wxUSE_STL_BASED_WXSTRING
486 wxASSERT( str
.IsValid() );
490 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
491 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
499 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
501 #if !wxUSE_STL_BASED_WXSTRING
502 wxASSERT( str
.IsValid() );
506 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
507 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
515 wxString
operator+(const char *psz
, const wxString
& str
)
517 #if !wxUSE_STL_BASED_WXSTRING
518 wxASSERT( str
.IsValid() );
522 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
523 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
531 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
533 #if !wxUSE_STL_BASED_WXSTRING
534 wxASSERT( str
.IsValid() );
538 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
539 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
547 // ---------------------------------------------------------------------------
549 // ---------------------------------------------------------------------------
551 #ifdef HAVE_STD_STRING_COMPARE
553 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
554 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
555 // sort strings in characters code point order by sorting the byte sequence
556 // in byte values order (i.e. what strcmp() and memcmp() do).
558 int wxString::compare(const wxString
& str
) const
560 return m_impl
.compare(str
.m_impl
);
563 int wxString::compare(size_t nStart
, size_t nLen
,
564 const wxString
& str
) const
567 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
568 return m_impl
.compare(pos
, len
, str
.m_impl
);
571 int wxString::compare(size_t nStart
, size_t nLen
,
573 size_t nStart2
, size_t nLen2
) const
576 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
579 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
581 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
584 int wxString::compare(const char* sz
) const
586 return m_impl
.compare(ImplStr(sz
));
589 int wxString::compare(const wchar_t* sz
) const
591 return m_impl
.compare(ImplStr(sz
));
594 int wxString::compare(size_t nStart
, size_t nLen
,
595 const char* sz
, size_t nCount
) const
598 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
600 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
602 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
605 int wxString::compare(size_t nStart
, size_t nLen
,
606 const wchar_t* sz
, size_t nCount
) const
609 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
611 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
613 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
616 #else // !HAVE_STD_STRING_COMPARE
618 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
619 const wxStringCharType
* s2
, size_t l2
)
622 return wxStringMemcmp(s1
, s2
, l1
);
625 int ret
= wxStringMemcmp(s1
, s2
, l1
);
626 return ret
== 0 ? -1 : ret
;
630 int ret
= wxStringMemcmp(s1
, s2
, l2
);
631 return ret
== 0 ? +1 : ret
;
635 int wxString::compare(const wxString
& str
) const
637 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
638 str
.m_impl
.data(), str
.m_impl
.length());
641 int wxString::compare(size_t nStart
, size_t nLen
,
642 const wxString
& str
) const
644 wxASSERT(nStart
<= length());
645 size_type strLen
= length() - nStart
;
646 nLen
= strLen
< nLen
? strLen
: nLen
;
649 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
651 return ::wxDoCmp(m_impl
.data() + pos
, len
,
652 str
.m_impl
.data(), str
.m_impl
.length());
655 int wxString::compare(size_t nStart
, size_t nLen
,
657 size_t nStart2
, size_t nLen2
) const
659 wxASSERT(nStart
<= length());
660 wxASSERT(nStart2
<= str
.length());
661 size_type strLen
= length() - nStart
,
662 strLen2
= str
.length() - nStart2
;
663 nLen
= strLen
< nLen
? strLen
: nLen
;
664 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
667 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
669 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
671 return ::wxDoCmp(m_impl
.data() + pos
, len
,
672 str
.m_impl
.data() + pos2
, len2
);
675 int wxString::compare(const char* sz
) const
677 SubstrBufFromMB
str(ImplStr(sz
, npos
));
678 if ( str
.len
== npos
)
679 str
.len
= wxStringStrlen(str
.data
);
680 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
683 int wxString::compare(const wchar_t* sz
) const
685 SubstrBufFromWC
str(ImplStr(sz
, npos
));
686 if ( str
.len
== npos
)
687 str
.len
= wxStringStrlen(str
.data
);
688 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
691 int wxString::compare(size_t nStart
, size_t nLen
,
692 const char* sz
, size_t nCount
) const
694 wxASSERT(nStart
<= length());
695 size_type strLen
= length() - nStart
;
696 nLen
= strLen
< nLen
? strLen
: nLen
;
699 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
701 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
702 if ( str
.len
== npos
)
703 str
.len
= wxStringStrlen(str
.data
);
705 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
708 int wxString::compare(size_t nStart
, size_t nLen
,
709 const wchar_t* sz
, size_t nCount
) const
711 wxASSERT(nStart
<= length());
712 size_type strLen
= length() - nStart
;
713 nLen
= strLen
< nLen
? strLen
: nLen
;
716 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
718 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
719 if ( str
.len
== npos
)
720 str
.len
= wxStringStrlen(str
.data
);
722 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
725 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
728 // ---------------------------------------------------------------------------
729 // find_{first,last}_[not]_of functions
730 // ---------------------------------------------------------------------------
732 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
734 // NB: All these functions are implemented with the argument being wxChar*,
735 // i.e. widechar string in any Unicode build, even though native string
736 // representation is char* in the UTF-8 build. This is because we couldn't
737 // use memchr() to determine if a character is in a set encoded as UTF-8.
739 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
741 return find_first_of(sz
, nStart
, wxStrlen(sz
));
744 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
746 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
749 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
751 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
754 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
756 if ( wxTmemchr(sz
, *i
, n
) )
763 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
765 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
768 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
770 if ( !wxTmemchr(sz
, *i
, n
) )
778 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
780 return find_last_of(sz
, nStart
, wxStrlen(sz
));
783 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
785 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
788 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
790 size_t len
= length();
792 if ( nStart
== npos
)
798 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
802 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
803 i
!= rend(); --idx
, ++i
)
805 if ( wxTmemchr(sz
, *i
, n
) )
812 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
814 size_t len
= length();
816 if ( nStart
== npos
)
822 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
826 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
827 i
!= rend(); --idx
, ++i
)
829 if ( !wxTmemchr(sz
, *i
, n
) )
836 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
838 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
841 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
850 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
852 size_t len
= length();
854 if ( nStart
== npos
)
860 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
864 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
865 i
!= rend(); --idx
, ++i
)
874 // the functions above were implemented for wchar_t* arguments in Unicode
875 // build and char* in ANSI build; below are implementations for the other
878 #define wxOtherCharType char
879 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
881 #define wxOtherCharType wchar_t
882 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
885 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
886 { return find_first_of(STRCONV(sz
), nStart
); }
888 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
890 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
891 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
892 { return find_last_of(STRCONV(sz
), nStart
); }
893 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
895 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
896 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
897 { return find_first_not_of(STRCONV(sz
), nStart
); }
898 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
900 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
901 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
902 { return find_last_not_of(STRCONV(sz
), nStart
); }
903 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
905 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
907 #undef wxOtherCharType
910 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
912 // ===========================================================================
913 // other common string functions
914 // ===========================================================================
916 int wxString::CmpNoCase(const wxString
& s
) const
918 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
921 const_iterator i1
= begin();
922 const_iterator end1
= end();
923 const_iterator i2
= s
.begin();
924 const_iterator end2
= s
.end();
926 for ( ; i1
!= end1
&& i2
!= end2
; ++idx
, ++i1
, ++i2
)
928 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
929 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
930 if ( lower1
!= lower2
)
931 return lower1
< lower2
? -1 : 1;
934 size_t len1
= length();
935 size_t len2
= s
.length();
939 else if ( len1
> len2
)
948 #ifndef __SCHAR_MAX__
949 #define __SCHAR_MAX__ 127
953 wxString
wxString::FromAscii(const char *ascii
)
956 return wxEmptyString
;
958 size_t len
= strlen(ascii
);
963 wxImplStringBuffer
buf(res
, len
);
964 wxStringCharType
*dest
= buf
;
968 unsigned char c
= (unsigned char)*ascii
++;
969 wxASSERT_MSG( c
< 0x80,
970 _T("Non-ASCII value passed to FromAscii().") );
972 *dest
++ = (wchar_t)c
;
982 wxString
wxString::FromAscii(const char ascii
)
984 // What do we do with '\0' ?
986 unsigned char c
= (unsigned char)ascii
;
988 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
990 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
991 return wxString(wxUniChar((wchar_t)c
));
994 const wxCharBuffer
wxString::ToAscii() const
996 // this will allocate enough space for the terminating NUL too
997 wxCharBuffer
buffer(length());
998 char *dest
= buffer
.data();
1000 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1003 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1004 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1006 // the output string can't have embedded NULs anyhow, so we can safely
1007 // stop at first of them even if we do have any
1015 #endif // wxUSE_UNICODE
1017 // extract string of length nCount starting at nFirst
1018 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1020 size_t nLen
= length();
1022 // default value of nCount is npos and means "till the end"
1023 if ( nCount
== npos
)
1025 nCount
= nLen
- nFirst
;
1028 // out-of-bounds requests return sensible things
1029 if ( nFirst
+ nCount
> nLen
)
1031 nCount
= nLen
- nFirst
;
1034 if ( nFirst
> nLen
)
1036 // AllocCopy() will return empty string
1037 return wxEmptyString
;
1040 wxString
dest(*this, nFirst
, nCount
);
1041 if ( dest
.length() != nCount
)
1043 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1049 // check that the string starts with prefix and return the rest of the string
1050 // in the provided pointer if it is not NULL, otherwise return false
1051 bool wxString::StartsWith(const wxChar
*prefix
, wxString
*rest
) const
1053 wxASSERT_MSG( prefix
, _T("invalid parameter in wxString::StartsWith") );
1055 // first check if the beginning of the string matches the prefix: note
1056 // that we don't have to check that we don't run out of this string as
1057 // when we reach the terminating NUL, either prefix string ends too (and
1058 // then it's ok) or we break out of the loop because there is no match
1059 const wxChar
*p
= c_str();
1062 if ( *prefix
++ != *p
++ )
1071 // put the rest of the string into provided pointer
1079 // check that the string ends with suffix and return the rest of it in the
1080 // provided pointer if it is not NULL, otherwise return false
1081 bool wxString::EndsWith(const wxChar
*suffix
, wxString
*rest
) const
1083 wxASSERT_MSG( suffix
, _T("invalid parameter in wxString::EndssWith") );
1085 int start
= length() - wxStrlen(suffix
);
1087 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1092 // put the rest of the string into provided pointer
1093 rest
->assign(*this, 0, start
);
1100 // extract nCount last (rightmost) characters
1101 wxString
wxString::Right(size_t nCount
) const
1103 if ( nCount
> length() )
1106 wxString
dest(*this, length() - nCount
, nCount
);
1107 if ( dest
.length() != nCount
) {
1108 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1113 // get all characters after the last occurence of ch
1114 // (returns the whole string if ch not found)
1115 wxString
wxString::AfterLast(wxUniChar ch
) const
1118 int iPos
= Find(ch
, true);
1119 if ( iPos
== wxNOT_FOUND
)
1122 str
= wx_str() + iPos
+ 1;
1127 // extract nCount first (leftmost) characters
1128 wxString
wxString::Left(size_t nCount
) const
1130 if ( nCount
> length() )
1133 wxString
dest(*this, 0, nCount
);
1134 if ( dest
.length() != nCount
) {
1135 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1140 // get all characters before the first occurence of ch
1141 // (returns the whole string if ch not found)
1142 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1144 int iPos
= Find(ch
);
1145 if ( iPos
== wxNOT_FOUND
) iPos
= length();
1146 return wxString(*this, 0, iPos
);
1149 /// get all characters before the last occurence of ch
1150 /// (returns empty string if ch not found)
1151 wxString
wxString::BeforeLast(wxUniChar ch
) const
1154 int iPos
= Find(ch
, true);
1155 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1156 str
= wxString(c_str(), iPos
);
1161 /// get all characters after the first occurence of ch
1162 /// (returns empty string if ch not found)
1163 wxString
wxString::AfterFirst(wxUniChar ch
) const
1166 int iPos
= Find(ch
);
1167 if ( iPos
!= wxNOT_FOUND
)
1168 str
= wx_str() + iPos
+ 1;
1173 // replace first (or all) occurences of some substring with another one
1174 size_t wxString::Replace(const wxString
& strOld
,
1175 const wxString
& strNew
, bool bReplaceAll
)
1177 // if we tried to replace an empty string we'd enter an infinite loop below
1178 wxCHECK_MSG( !strOld
.empty(), 0,
1179 _T("wxString::Replace(): invalid parameter") );
1181 size_t uiCount
= 0; // count of replacements made
1183 size_t uiOldLen
= strOld
.length();
1184 size_t uiNewLen
= strNew
.length();
1188 while ( (*this)[dwPos
] != wxT('\0') )
1190 //DO NOT USE STRSTR HERE
1191 //this string can contain embedded null characters,
1192 //so strstr will function incorrectly
1193 dwPos
= find(strOld
, dwPos
);
1194 if ( dwPos
== npos
)
1195 break; // exit the loop
1198 //replace this occurance of the old string with the new one
1199 replace(dwPos
, uiOldLen
, strNew
, uiNewLen
);
1201 //move up pos past the string that was replaced
1204 //increase replace count
1209 break; // exit the loop
1216 bool wxString::IsAscii() const
1218 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1220 if ( !(*i
).IsAscii() )
1227 bool wxString::IsWord() const
1229 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1231 if ( !wxIsalpha(*i
) )
1238 bool wxString::IsNumber() const
1243 const_iterator i
= begin();
1245 if ( *i
== _T('-') || *i
== _T('+') )
1248 for ( ; i
!= end(); ++i
)
1250 if ( !wxIsdigit(*i
) )
1257 wxString
wxString::Strip(stripType w
) const
1260 if ( w
& leading
) s
.Trim(false);
1261 if ( w
& trailing
) s
.Trim(true);
1265 // ---------------------------------------------------------------------------
1267 // ---------------------------------------------------------------------------
1269 wxString
& wxString::MakeUpper()
1271 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1272 *it
= (wxChar
)wxToupper(*it
);
1277 wxString
& wxString::MakeLower()
1279 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1280 *it
= (wxChar
)wxTolower(*it
);
1285 // ---------------------------------------------------------------------------
1286 // trimming and padding
1287 // ---------------------------------------------------------------------------
1289 // some compilers (VC++ 6.0 not to name them) return true for a call to
1290 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1291 // live with this by checking that the character is a 7 bit one - even if this
1292 // may fail to detect some spaces (I don't know if Unicode doesn't have
1293 // space-like symbols somewhere except in the first 128 chars), it is arguably
1294 // still better than trimming away accented letters
1295 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1297 // trims spaces (in the sense of isspace) from left or right side
1298 wxString
& wxString::Trim(bool bFromRight
)
1300 // first check if we're going to modify the string at all
1303 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1304 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1310 // find last non-space character
1311 reverse_iterator psz
= rbegin();
1312 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1315 // truncate at trailing space start
1316 erase(psz
.base(), end());
1320 // find first non-space character
1321 iterator psz
= begin();
1322 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1325 // fix up data and length
1326 erase(begin(), psz
);
1333 // adds nCount characters chPad to the string from either side
1334 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1336 wxString
s(chPad
, nCount
);
1349 // truncate the string
1350 wxString
& wxString::Truncate(size_t uiLen
)
1352 if ( uiLen
< length() )
1354 erase(begin() + uiLen
, end());
1356 //else: nothing to do, string is already short enough
1361 // ---------------------------------------------------------------------------
1362 // finding (return wxNOT_FOUND if not found and index otherwise)
1363 // ---------------------------------------------------------------------------
1366 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1368 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1370 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1373 // ----------------------------------------------------------------------------
1374 // conversion to numbers
1375 // ----------------------------------------------------------------------------
1377 // the implementation of all the functions below is exactly the same so factor
1380 template <typename T
, typename F
>
1381 bool wxStringToIntType(const wxChar
*start
,
1386 wxCHECK_MSG( val
, false, _T("NULL output pointer") );
1387 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1394 *val
= (*func
)(start
, &end
, base
);
1396 // return true only if scan was stopped by the terminating NUL and if the
1397 // string was not empty to start with and no under/overflow occurred
1398 return !*end
&& (end
!= start
)
1400 && (errno
!= ERANGE
)
1405 bool wxString::ToLong(long *val
, int base
) const
1407 return wxStringToIntType((const wxChar
*)c_str(), val
, base
, wxStrtol
);
1410 bool wxString::ToULong(unsigned long *val
, int base
) const
1412 return wxStringToIntType((const wxChar
*)c_str(), val
, base
, wxStrtoul
);
1415 bool wxString::ToLongLong(wxLongLong_t
*val
, int base
) const
1417 return wxStringToIntType((const wxChar
*)c_str(), val
, base
, wxStrtoll
);
1420 bool wxString::ToULongLong(wxULongLong_t
*val
, int base
) const
1422 return wxStringToIntType((const wxChar
*)c_str(), val
, base
, wxStrtoull
);
1425 bool wxString::ToDouble(double *val
) const
1427 wxCHECK_MSG( val
, false, _T("NULL pointer in wxString::ToDouble") );
1433 const wxChar
*start
= c_str();
1435 *val
= wxStrtod(start
, &end
);
1437 // return true only if scan was stopped by the terminating NUL and if the
1438 // string was not empty to start with and no under/overflow occurred
1439 return !*end
&& (end
!= start
)
1441 && (errno
!= ERANGE
)
1446 // ---------------------------------------------------------------------------
1448 // ---------------------------------------------------------------------------
1451 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1452 wxString
wxStringPrintfMixinBase::DoFormat(const wxString
& format
, ...)
1454 wxString
wxString::DoFormat(const wxString
& format
, ...)
1458 va_start(argptr
, format
);
1461 s
.PrintfV(format
, argptr
);
1469 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1472 s
.PrintfV(format
, argptr
);
1476 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1477 int wxStringPrintfMixinBase::DoPrintf(const wxString
& format
, ...)
1479 int wxString::DoPrintf(const wxString
& format
, ...)
1483 va_start(argptr
, format
);
1485 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1486 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1487 // because it's the only cast that works safely for downcasting when
1488 // multiple inheritance is used:
1489 wxString
*str
= static_cast<wxString
*>(this);
1491 wxString
*str
= this;
1494 int iLen
= str
->PrintfV(format
, argptr
);
1501 #if wxUSE_UNICODE_UTF8
1502 template<typename BufferType
>
1504 // we only need one version in non-UTF8 builds and at least two Windows
1505 // compilers have problems with this function template, so use just one
1506 // normal function here
1508 static int DoStringPrintfV(wxString
& str
,
1509 const wxString
& format
, va_list argptr
)
1515 #if wxUSE_UNICODE_UTF8
1516 BufferType
tmp(str
, size
+ 1);
1517 typename
BufferType::CharType
*buf
= tmp
;
1519 wxStringBuffer
tmp(str
, size
+ 1);
1529 // wxVsnprintf() may modify the original arg pointer, so pass it
1532 wxVaCopy(argptrcopy
, argptr
);
1533 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1536 // some implementations of vsnprintf() don't NUL terminate
1537 // the string if there is not enough space for it so
1538 // always do it manually
1539 buf
[size
] = _T('\0');
1541 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1542 // total number of characters which would have been written if the
1543 // buffer were large enough (newer standards such as Unix98)
1546 #if wxUSE_WXVSNPRINTF
1547 // we know that our own implementation of wxVsnprintf() returns -1
1548 // only for a format error - thus there's something wrong with
1549 // the user's format string
1551 #else // assume that system version only returns error if not enough space
1552 // still not enough, as we don't know how much we need, double the
1553 // current size of the buffer
1555 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1557 else if ( len
>= size
)
1559 #if wxUSE_WXVSNPRINTF
1560 // we know that our own implementation of wxVsnprintf() returns
1561 // size+1 when there's not enough space but that's not the size
1562 // of the required buffer!
1563 size
*= 2; // so we just double the current size of the buffer
1565 // some vsnprintf() implementations NUL-terminate the buffer and
1566 // some don't in len == size case, to be safe always add 1
1570 else // ok, there was enough space
1576 // we could have overshot
1579 return str
.length();
1582 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
1585 wxVaCopy(argcopy
, argptr
);
1587 #if wxUSE_UNICODE_UTF8
1588 #if wxUSE_STL_BASED_WXSTRING
1589 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
1591 typedef wxImplStringBuffer Utf8Buffer
;
1595 #if wxUSE_UTF8_LOCALE_ONLY
1596 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argcopy
);
1598 #if wxUSE_UNICODE_UTF8
1599 if ( wxLocaleIsUtf8
)
1600 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argcopy
);
1603 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argcopy
);
1605 return DoStringPrintfV(*this, format
, argcopy
);
1606 #endif // UTF8/WCHAR
1610 // ----------------------------------------------------------------------------
1611 // misc other operations
1612 // ----------------------------------------------------------------------------
1614 // returns true if the string matches the pattern which may contain '*' and
1615 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1617 bool wxString::Matches(const wxString
& mask
) const
1619 // I disable this code as it doesn't seem to be faster (in fact, it seems
1620 // to be much slower) than the old, hand-written code below and using it
1621 // here requires always linking with libregex even if the user code doesn't
1623 #if 0 // wxUSE_REGEX
1624 // first translate the shell-like mask into a regex
1626 pattern
.reserve(wxStrlen(pszMask
));
1638 pattern
+= _T(".*");
1649 // these characters are special in a RE, quote them
1650 // (however note that we don't quote '[' and ']' to allow
1651 // using them for Unix shell like matching)
1652 pattern
+= _T('\\');
1656 pattern
+= *pszMask
;
1664 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
1665 #else // !wxUSE_REGEX
1666 // TODO: this is, of course, awfully inefficient...
1668 // FIXME-UTF8: implement using iterators, remove #if
1669 #if wxUSE_UNICODE_UTF8
1670 wxWCharBuffer maskBuf
= mask
.wc_str();
1671 wxWCharBuffer txtBuf
= wc_str();
1672 const wxChar
*pszMask
= maskBuf
.data();
1673 const wxChar
*pszTxt
= txtBuf
.data();
1675 const wxChar
*pszMask
= mask
.wx_str();
1676 // the char currently being checked
1677 const wxChar
*pszTxt
= wx_str();
1680 // the last location where '*' matched
1681 const wxChar
*pszLastStarInText
= NULL
;
1682 const wxChar
*pszLastStarInMask
= NULL
;
1685 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
1686 switch ( *pszMask
) {
1688 if ( *pszTxt
== wxT('\0') )
1691 // pszTxt and pszMask will be incremented in the loop statement
1697 // remember where we started to be able to backtrack later
1698 pszLastStarInText
= pszTxt
;
1699 pszLastStarInMask
= pszMask
;
1701 // ignore special chars immediately following this one
1702 // (should this be an error?)
1703 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
1706 // if there is nothing more, match
1707 if ( *pszMask
== wxT('\0') )
1710 // are there any other metacharacters in the mask?
1712 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
1714 if ( pEndMask
!= NULL
) {
1715 // we have to match the string between two metachars
1716 uiLenMask
= pEndMask
- pszMask
;
1719 // we have to match the remainder of the string
1720 uiLenMask
= wxStrlen(pszMask
);
1723 wxString
strToMatch(pszMask
, uiLenMask
);
1724 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
1725 if ( pMatch
== NULL
)
1728 // -1 to compensate "++" in the loop
1729 pszTxt
= pMatch
+ uiLenMask
- 1;
1730 pszMask
+= uiLenMask
- 1;
1735 if ( *pszMask
!= *pszTxt
)
1741 // match only if nothing left
1742 if ( *pszTxt
== wxT('\0') )
1745 // if we failed to match, backtrack if we can
1746 if ( pszLastStarInText
) {
1747 pszTxt
= pszLastStarInText
+ 1;
1748 pszMask
= pszLastStarInMask
;
1750 pszLastStarInText
= NULL
;
1752 // don't bother resetting pszLastStarInMask, it's unnecessary
1758 #endif // wxUSE_REGEX/!wxUSE_REGEX
1761 // Count the number of chars
1762 int wxString::Freq(wxUniChar ch
) const
1765 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1773 // convert to upper case, return the copy of the string
1774 wxString
wxString::Upper() const
1775 { wxString
s(*this); return s
.MakeUpper(); }
1777 // convert to lower case, return the copy of the string
1778 wxString
wxString::Lower() const { wxString
s(*this); return s
.MakeLower(); }