1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
15 * 1) all empty strings use g_strEmpty, nRefs = -1 (set in Init())
16 * 2) AllocBuffer() sets nRefs to 1, Lock() increments it by one
17 * 3) Unlock() decrements nRefs and frees memory if it goes to 0
20 // ===========================================================================
21 // headers, declarations, constants
22 // ===========================================================================
24 // For compilers that support precompilation, includes "wx.h".
25 #include "wx/wxprec.h"
32 #include "wx/string.h"
48 #include "wx/hashmap.h"
50 // string handling functions used by wxString:
51 #if wxUSE_UNICODE_UTF8
52 #define wxStringMemcpy memcpy
53 #define wxStringMemcmp memcmp
54 #define wxStringMemchr memchr
55 #define wxStringStrlen strlen
57 #define wxStringMemcpy wxTmemcpy
58 #define wxStringMemcmp wxTmemcmp
59 #define wxStringMemchr wxTmemchr
60 #define wxStringStrlen wxStrlen
64 // ---------------------------------------------------------------------------
65 // static class variables definition
66 // ---------------------------------------------------------------------------
68 //According to STL _must_ be a -1 size_t
69 const size_t wxString::npos
= (size_t) -1;
71 // ----------------------------------------------------------------------------
73 // ----------------------------------------------------------------------------
75 #if wxUSE_STD_IOSTREAM
79 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
81 // FIXME-UTF8: always, not only if wxUSE_UNICODE
82 #if wxUSE_UNICODE && !defined(__BORLANDC__)
83 return os
<< str
.AsWChar();
85 return os
<< str
.AsChar();
89 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
91 return os
<< str
.c_str();
94 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCharBuffer
& str
)
96 return os
<< str
.data();
100 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxWCharBuffer
& str
)
102 return os
<< str
.data();
106 #endif // wxUSE_STD_IOSTREAM
108 // ----------------------------------------------------------------------------
109 // wxCStrData converted strings caching
110 // ----------------------------------------------------------------------------
112 // FIXME-UTF8: temporarily disabled because it doesn't work with global
113 // string objects; re-enable after fixing this bug and benchmarking
114 // performance to see if using a hash is a good idea at all
117 // For backward compatibility reasons, it must be possible to assign the value
118 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
119 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
120 // because the memory would be freed immediately, but it has to be valid as long
121 // as the string is not modified, so that code like this still works:
123 // const wxChar *s = str.c_str();
124 // while ( s ) { ... }
126 // FIXME-UTF8: not thread safe!
127 // FIXME-UTF8: we currently clear the cached conversion only when the string is
128 // destroyed, but we should do it when the string is modified, to
129 // keep memory usage down
130 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
131 // invalidated the cache on every change, we could keep the previous
133 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
134 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
137 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
139 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
140 if ( i
!= hash
.end() )
148 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
149 // so we have to use wxString* here and const-cast when used
150 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
151 wxStringCharConversionCache
);
152 static wxStringCharConversionCache gs_stringsCharCache
;
154 const char* wxCStrData::AsChar() const
156 // remove previously cache value, if any (see FIXMEs above):
157 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
159 // convert the string and keep it:
160 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
161 m_str
->mb_str().release();
165 #endif // wxUSE_UNICODE
167 #if !wxUSE_UNICODE_WCHAR
168 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
169 wxStringWCharConversionCache
);
170 static wxStringWCharConversionCache gs_stringsWCharCache
;
172 const wchar_t* wxCStrData::AsWChar() const
174 // remove previously cache value, if any (see FIXMEs above):
175 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
177 // convert the string and keep it:
178 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
179 m_str
->wc_str().release();
183 #endif // !wxUSE_UNICODE_WCHAR
185 wxString::~wxString()
188 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
189 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
191 #if !wxUSE_UNICODE_WCHAR
192 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
198 const char* wxCStrData::AsChar() const
200 wxString
*str
= wxConstCast(m_str
, wxString
);
202 // convert the string:
203 wxCharBuffer
buf(str
->mb_str());
205 // FIXME-UTF8: do the conversion in-place in the existing buffer
206 if ( str
->m_convertedToChar
&&
207 strlen(buf
) == strlen(str
->m_convertedToChar
) )
209 // keep the same buffer for as long as possible, so that several calls
210 // to c_str() in a row still work:
211 strcpy(str
->m_convertedToChar
, buf
);
215 str
->m_convertedToChar
= buf
.release();
219 return str
->m_convertedToChar
+ m_offset
;
221 #endif // wxUSE_UNICODE
223 #if !wxUSE_UNICODE_WCHAR
224 const wchar_t* wxCStrData::AsWChar() const
226 wxString
*str
= wxConstCast(m_str
, wxString
);
228 // convert the string:
229 wxWCharBuffer
buf(str
->wc_str());
231 // FIXME-UTF8: do the conversion in-place in the existing buffer
232 if ( str
->m_convertedToWChar
&&
233 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
235 // keep the same buffer for as long as possible, so that several calls
236 // to c_str() in a row still work:
237 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
241 str
->m_convertedToWChar
= buf
.release();
245 return str
->m_convertedToWChar
+ m_offset
;
247 #endif // !wxUSE_UNICODE_WCHAR
249 // ===========================================================================
250 // wxString class core
251 // ===========================================================================
253 // ---------------------------------------------------------------------------
254 // construction and conversion
255 // ---------------------------------------------------------------------------
259 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
260 const wxMBConv
& conv
)
263 if ( !psz
|| nLength
== 0 )
264 return SubstrBufFromMB();
266 if ( nLength
== npos
)
270 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
272 return SubstrBufFromMB();
274 return SubstrBufFromMB(wcBuf
, wcLen
);
278 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
279 const wxMBConv
& conv
)
282 if ( !pwz
|| nLength
== 0 )
283 return SubstrBufFromWC();
285 if ( nLength
== npos
)
289 wxCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
291 return SubstrBufFromWC();
293 return SubstrBufFromWC(mbBuf
, mbLen
);
300 //Convert wxString in Unicode mode to a multi-byte string
301 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
303 return conv
.cWC2MB(c_str(), length() + 1 /* size, not length */, NULL
);
310 //Converts this string to a wide character string if unicode
311 //mode is not enabled and wxUSE_WCHAR_T is enabled
312 const wxWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
314 return conv
.cMB2WC(c_str(), length() + 1 /* size, not length */, NULL
);
317 #endif // wxUSE_WCHAR_T
319 #endif // Unicode/ANSI
321 // shrink to minimal size (releasing extra memory)
322 bool wxString::Shrink()
324 wxString
tmp(begin(), end());
326 return tmp
.length() == length();
329 // deprecated compatibility code:
330 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
331 wxChar
*wxString::GetWriteBuf(size_t nLen
)
333 return DoGetWriteBuf(nLen
);
336 void wxString::UngetWriteBuf()
341 void wxString::UngetWriteBuf(size_t nLen
)
343 DoUngetWriteBuf(nLen
);
345 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
348 // ---------------------------------------------------------------------------
350 // ---------------------------------------------------------------------------
352 // all functions are inline in string.h
354 // ---------------------------------------------------------------------------
355 // concatenation operators
356 // ---------------------------------------------------------------------------
359 * concatenation functions come in 5 flavours:
361 * char + string and string + char
362 * C str + string and string + C str
365 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
367 #if !wxUSE_STL_BASED_WXSTRING
368 wxASSERT( str1
.IsValid() );
369 wxASSERT( str2
.IsValid() );
378 wxString
operator+(const wxString
& str
, wxUniChar ch
)
380 #if !wxUSE_STL_BASED_WXSTRING
381 wxASSERT( str
.IsValid() );
390 wxString
operator+(wxUniChar ch
, const wxString
& str
)
392 #if !wxUSE_STL_BASED_WXSTRING
393 wxASSERT( str
.IsValid() );
402 wxString
operator+(const wxString
& str
, const char *psz
)
404 #if !wxUSE_STL_BASED_WXSTRING
405 wxASSERT( str
.IsValid() );
409 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
410 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
418 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
420 #if !wxUSE_STL_BASED_WXSTRING
421 wxASSERT( str
.IsValid() );
425 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
426 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
434 wxString
operator+(const char *psz
, const wxString
& str
)
436 #if !wxUSE_STL_BASED_WXSTRING
437 wxASSERT( str
.IsValid() );
441 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
442 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
450 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
452 #if !wxUSE_STL_BASED_WXSTRING
453 wxASSERT( str
.IsValid() );
457 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
458 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
466 // ---------------------------------------------------------------------------
468 // ---------------------------------------------------------------------------
470 #ifdef HAVE_STD_STRING_COMPARE
472 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
473 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
474 // sort strings in characters code point order by sorting the byte sequence
475 // in byte values order (i.e. what strcmp() and memcmp() do).
477 int wxString::compare(const wxString
& str
) const
479 return m_impl
.compare(str
.m_impl
);
482 int wxString::compare(size_t nStart
, size_t nLen
,
483 const wxString
& str
) const
486 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
487 return m_impl
.compare(pos
, len
, str
.m_impl
);
490 int wxString::compare(size_t nStart
, size_t nLen
,
492 size_t nStart2
, size_t nLen2
) const
495 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
498 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
500 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
503 int wxString::compare(const char* sz
) const
505 return m_impl
.compare(ImplStr(sz
));
508 int wxString::compare(const wchar_t* sz
) const
510 return m_impl
.compare(ImplStr(sz
));
513 int wxString::compare(size_t nStart
, size_t nLen
,
514 const char* sz
, size_t nCount
) const
517 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
519 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
521 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
524 int wxString::compare(size_t nStart
, size_t nLen
,
525 const wchar_t* sz
, size_t nCount
) const
528 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
530 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
532 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
535 #else // !HAVE_STD_STRING_COMPARE
537 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
538 const wxStringCharType
* s2
, size_t l2
)
541 return wxStringMemcmp(s1
, s2
, l1
);
544 int ret
= wxStringMemcmp(s1
, s2
, l1
);
545 return ret
== 0 ? -1 : ret
;
549 int ret
= wxStringMemcmp(s1
, s2
, l2
);
550 return ret
== 0 ? +1 : ret
;
554 int wxString::compare(const wxString
& str
) const
556 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
557 str
.m_impl
.data(), str
.m_impl
.length());
560 int wxString::compare(size_t nStart
, size_t nLen
,
561 const wxString
& str
) const
563 wxASSERT(nStart
<= length());
564 size_type strLen
= length() - nStart
;
565 nLen
= strLen
< nLen
? strLen
: nLen
;
568 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
570 return ::wxDoCmp(m_impl
.data() + pos
, len
,
571 str
.m_impl
.data(), str
.m_impl
.length());
574 int wxString::compare(size_t nStart
, size_t nLen
,
576 size_t nStart2
, size_t nLen2
) const
578 wxASSERT(nStart
<= length());
579 wxASSERT(nStart2
<= str
.length());
580 size_type strLen
= length() - nStart
,
581 strLen2
= str
.length() - nStart2
;
582 nLen
= strLen
< nLen
? strLen
: nLen
;
583 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
586 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
588 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
590 return ::wxDoCmp(m_impl
.data() + pos
, len
,
591 str
.m_impl
.data() + pos2
, len2
);
594 int wxString::compare(const char* sz
) const
596 SubstrBufFromMB
str(ImplStr(sz
, npos
));
597 if ( str
.len
== npos
)
598 str
.len
= wxStringStrlen(str
.data
);
599 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
602 int wxString::compare(const wchar_t* sz
) const
604 SubstrBufFromWC
str(ImplStr(sz
, npos
));
605 if ( str
.len
== npos
)
606 str
.len
= wxStringStrlen(str
.data
);
607 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
610 int wxString::compare(size_t nStart
, size_t nLen
,
611 const char* sz
, size_t nCount
) const
613 wxASSERT(nStart
<= length());
614 size_type strLen
= length() - nStart
;
615 nLen
= strLen
< nLen
? strLen
: nLen
;
618 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
620 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
621 if ( str
.len
== npos
)
622 str
.len
= wxStringStrlen(str
.data
);
624 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
627 int wxString::compare(size_t nStart
, size_t nLen
,
628 const wchar_t* sz
, size_t nCount
) const
630 wxASSERT(nStart
<= length());
631 size_type strLen
= length() - nStart
;
632 nLen
= strLen
< nLen
? strLen
: nLen
;
635 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
637 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
638 if ( str
.len
== npos
)
639 str
.len
= wxStringStrlen(str
.data
);
641 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
644 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
647 // ---------------------------------------------------------------------------
648 // find_{first,last}_[not]_of functions
649 // ---------------------------------------------------------------------------
651 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
653 // NB: All these functions are implemented with the argument being wxChar*,
654 // i.e. widechar string in any Unicode build, even though native string
655 // representation is char* in the UTF-8 build. This is because we couldn't
656 // use memchr() to determine if a character is in a set encoded as UTF-8.
658 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
660 return find_first_of(sz
, nStart
, wxStrlen(sz
));
663 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
665 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
668 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
670 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
673 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
675 if ( wxTmemchr(sz
, *i
, n
) )
682 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
684 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
687 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
689 if ( !wxTmemchr(sz
, *i
, n
) )
697 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
699 return find_last_of(sz
, nStart
, wxStrlen(sz
));
702 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
704 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
707 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
709 size_t len
= length();
711 if ( nStart
== npos
)
717 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
721 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
722 i
!= rend(); --idx
, ++i
)
724 if ( wxTmemchr(sz
, *i
, n
) )
731 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
733 size_t len
= length();
735 if ( nStart
== npos
)
741 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
745 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
746 i
!= rend(); --idx
, ++i
)
748 if ( !wxTmemchr(sz
, *i
, n
) )
755 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
757 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
760 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
769 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
771 size_t len
= length();
773 if ( nStart
== npos
)
779 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
783 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
784 i
!= rend(); --idx
, ++i
)
793 // the functions above were implemented for wchar_t* arguments in Unicode
794 // build and char* in ANSI build; below are implementations for the other
797 #define wxOtherCharType char
798 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
800 #define wxOtherCharType wchar_t
801 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
804 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
805 { return find_first_of(STRCONV(sz
), nStart
); }
807 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
809 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
810 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
811 { return find_last_of(STRCONV(sz
), nStart
); }
812 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
814 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
815 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
816 { return find_first_not_of(STRCONV(sz
), nStart
); }
817 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
819 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
820 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
821 { return find_last_not_of(STRCONV(sz
), nStart
); }
822 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
824 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
826 #undef wxOtherCharType
829 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
831 // ===========================================================================
832 // other common string functions
833 // ===========================================================================
835 int wxString::CmpNoCase(const wxString
& s
) const
837 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
840 const_iterator i1
= begin();
841 const_iterator end1
= end();
842 const_iterator i2
= s
.begin();
843 const_iterator end2
= s
.end();
845 for ( ; i1
!= end1
&& i2
!= end2
; ++idx
, ++i1
, ++i2
)
847 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
848 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
849 if ( lower1
!= lower2
)
850 return lower1
< lower2
? -1 : 1;
853 size_t len1
= length();
854 size_t len2
= s
.length();
858 else if ( len1
> len2
)
867 #ifndef __SCHAR_MAX__
868 #define __SCHAR_MAX__ 127
872 wxString
wxString::FromAscii(const char *ascii
)
875 return wxEmptyString
;
877 size_t len
= strlen( ascii
);
882 wxStringBuffer
buf(res
, len
);
888 if ( (*dest
++ = (wchar_t)(unsigned char)*ascii
++) == L
'\0' )
896 wxString
wxString::FromAscii(const char ascii
)
898 // What do we do with '\0' ?
901 res
+= (wchar_t)(unsigned char) ascii
;
906 const wxCharBuffer
wxString::ToAscii() const
908 // this will allocate enough space for the terminating NUL too
909 wxCharBuffer
buffer(length());
912 char *dest
= buffer
.data();
914 const wchar_t *pwc
= c_str();
917 *dest
++ = (char)(*pwc
> SCHAR_MAX
? wxT('_') : *pwc
);
919 // the output string can't have embedded NULs anyhow, so we can safely
920 // stop at first of them even if we do have any
930 // extract string of length nCount starting at nFirst
931 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
933 size_t nLen
= length();
935 // default value of nCount is npos and means "till the end"
936 if ( nCount
== npos
)
938 nCount
= nLen
- nFirst
;
941 // out-of-bounds requests return sensible things
942 if ( nFirst
+ nCount
> nLen
)
944 nCount
= nLen
- nFirst
;
949 // AllocCopy() will return empty string
950 return wxEmptyString
;
953 wxString
dest(*this, nFirst
, nCount
);
954 if ( dest
.length() != nCount
)
956 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
962 // check that the string starts with prefix and return the rest of the string
963 // in the provided pointer if it is not NULL, otherwise return false
964 bool wxString::StartsWith(const wxChar
*prefix
, wxString
*rest
) const
966 wxASSERT_MSG( prefix
, _T("invalid parameter in wxString::StartsWith") );
968 // first check if the beginning of the string matches the prefix: note
969 // that we don't have to check that we don't run out of this string as
970 // when we reach the terminating NUL, either prefix string ends too (and
971 // then it's ok) or we break out of the loop because there is no match
972 const wxChar
*p
= c_str();
975 if ( *prefix
++ != *p
++ )
984 // put the rest of the string into provided pointer
992 // check that the string ends with suffix and return the rest of it in the
993 // provided pointer if it is not NULL, otherwise return false
994 bool wxString::EndsWith(const wxChar
*suffix
, wxString
*rest
) const
996 wxASSERT_MSG( suffix
, _T("invalid parameter in wxString::EndssWith") );
998 int start
= length() - wxStrlen(suffix
);
999 if ( start
< 0 || wxStrcmp(wx_str() + start
, suffix
) != 0 )
1004 // put the rest of the string into provided pointer
1005 rest
->assign(*this, 0, start
);
1012 // extract nCount last (rightmost) characters
1013 wxString
wxString::Right(size_t nCount
) const
1015 if ( nCount
> length() )
1018 wxString
dest(*this, length() - nCount
, nCount
);
1019 if ( dest
.length() != nCount
) {
1020 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1025 // get all characters after the last occurence of ch
1026 // (returns the whole string if ch not found)
1027 wxString
wxString::AfterLast(wxUniChar ch
) const
1030 int iPos
= Find(ch
, true);
1031 if ( iPos
== wxNOT_FOUND
)
1034 str
= wx_str() + iPos
+ 1;
1039 // extract nCount first (leftmost) characters
1040 wxString
wxString::Left(size_t nCount
) const
1042 if ( nCount
> length() )
1045 wxString
dest(*this, 0, nCount
);
1046 if ( dest
.length() != nCount
) {
1047 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1052 // get all characters before the first occurence of ch
1053 // (returns the whole string if ch not found)
1054 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1056 int iPos
= Find(ch
);
1057 if ( iPos
== wxNOT_FOUND
) iPos
= length();
1058 return wxString(*this, 0, iPos
);
1061 /// get all characters before the last occurence of ch
1062 /// (returns empty string if ch not found)
1063 wxString
wxString::BeforeLast(wxUniChar ch
) const
1066 int iPos
= Find(ch
, true);
1067 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1068 str
= wxString(c_str(), iPos
);
1073 /// get all characters after the first occurence of ch
1074 /// (returns empty string if ch not found)
1075 wxString
wxString::AfterFirst(wxUniChar ch
) const
1078 int iPos
= Find(ch
);
1079 if ( iPos
!= wxNOT_FOUND
)
1080 str
= wx_str() + iPos
+ 1;
1085 // replace first (or all) occurences of some substring with another one
1086 size_t wxString::Replace(const wxString
& strOld
,
1087 const wxString
& strNew
, bool bReplaceAll
)
1089 // if we tried to replace an empty string we'd enter an infinite loop below
1090 wxCHECK_MSG( !strOld
.empty(), 0,
1091 _T("wxString::Replace(): invalid parameter") );
1093 size_t uiCount
= 0; // count of replacements made
1095 size_t uiOldLen
= strOld
.length();
1096 size_t uiNewLen
= strNew
.length();
1100 while ( (*this)[dwPos
] != wxT('\0') )
1102 //DO NOT USE STRSTR HERE
1103 //this string can contain embedded null characters,
1104 //so strstr will function incorrectly
1105 dwPos
= find(strOld
, dwPos
);
1106 if ( dwPos
== npos
)
1107 break; // exit the loop
1110 //replace this occurance of the old string with the new one
1111 replace(dwPos
, uiOldLen
, strNew
, uiNewLen
);
1113 //move up pos past the string that was replaced
1116 //increase replace count
1121 break; // exit the loop
1128 bool wxString::IsAscii() const
1130 const wxChar
*s
= (const wxChar
*) *this;
1132 if(!isascii(*s
)) return(false);
1138 bool wxString::IsWord() const
1140 const wxChar
*s
= (const wxChar
*) *this;
1142 if(!wxIsalpha(*s
)) return(false);
1148 bool wxString::IsNumber() const
1150 const wxChar
*s
= (const wxChar
*) *this;
1152 if ((s
[0] == wxT('-')) || (s
[0] == wxT('+'))) s
++;
1154 if(!wxIsdigit(*s
)) return(false);
1160 wxString
wxString::Strip(stripType w
) const
1163 if ( w
& leading
) s
.Trim(false);
1164 if ( w
& trailing
) s
.Trim(true);
1168 // ---------------------------------------------------------------------------
1170 // ---------------------------------------------------------------------------
1172 wxString
& wxString::MakeUpper()
1174 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1175 *it
= (wxChar
)wxToupper(*it
);
1180 wxString
& wxString::MakeLower()
1182 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1183 *it
= (wxChar
)wxTolower(*it
);
1188 // ---------------------------------------------------------------------------
1189 // trimming and padding
1190 // ---------------------------------------------------------------------------
1192 // some compilers (VC++ 6.0 not to name them) return true for a call to
1193 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1194 // live with this by checking that the character is a 7 bit one - even if this
1195 // may fail to detect some spaces (I don't know if Unicode doesn't have
1196 // space-like symbols somewhere except in the first 128 chars), it is arguably
1197 // still better than trimming away accented letters
1198 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1200 // trims spaces (in the sense of isspace) from left or right side
1201 wxString
& wxString::Trim(bool bFromRight
)
1203 // first check if we're going to modify the string at all
1206 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1207 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1213 // find last non-space character
1214 reverse_iterator psz
= rbegin();
1215 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1218 // truncate at trailing space start
1219 erase(psz
.base(), end());
1223 // find first non-space character
1224 iterator psz
= begin();
1225 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1228 // fix up data and length
1229 erase(begin(), psz
);
1236 // adds nCount characters chPad to the string from either side
1237 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1239 wxString
s(chPad
, nCount
);
1252 // truncate the string
1253 wxString
& wxString::Truncate(size_t uiLen
)
1255 if ( uiLen
< length() )
1257 erase(begin() + uiLen
, end());
1259 //else: nothing to do, string is already short enough
1264 // ---------------------------------------------------------------------------
1265 // finding (return wxNOT_FOUND if not found and index otherwise)
1266 // ---------------------------------------------------------------------------
1269 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1271 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1273 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1276 // ----------------------------------------------------------------------------
1277 // conversion to numbers
1278 // ----------------------------------------------------------------------------
1280 // the implementation of all the functions below is exactly the same so factor
1283 template <typename T
, typename F
>
1284 bool wxStringToIntType(const wxChar
*start
,
1289 wxCHECK_MSG( val
, false, _T("NULL output pointer") );
1290 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1297 *val
= (*func
)(start
, &end
, base
);
1299 // return true only if scan was stopped by the terminating NUL and if the
1300 // string was not empty to start with and no under/overflow occurred
1301 return !*end
&& (end
!= start
)
1303 && (errno
!= ERANGE
)
1308 bool wxString::ToLong(long *val
, int base
) const
1310 return wxStringToIntType((const wxChar
*)c_str(), val
, base
, wxStrtol
);
1313 bool wxString::ToULong(unsigned long *val
, int base
) const
1315 return wxStringToIntType((const wxChar
*)c_str(), val
, base
, wxStrtoul
);
1318 bool wxString::ToLongLong(wxLongLong_t
*val
, int base
) const
1320 #ifdef wxHAS_STRTOLL
1321 return wxStringToIntType((const wxChar
*)c_str(), val
, base
, wxStrtoll
);
1323 // TODO: implement this ourselves
1327 #endif // wxHAS_STRTOLL
1330 bool wxString::ToULongLong(wxULongLong_t
*val
, int base
) const
1332 #ifdef wxHAS_STRTOLL
1333 return wxStringToIntType((const wxChar
*)c_str(), val
, base
, wxStrtoull
);
1335 // TODO: implement this ourselves
1342 bool wxString::ToDouble(double *val
) const
1344 wxCHECK_MSG( val
, false, _T("NULL pointer in wxString::ToDouble") );
1350 const wxChar
*start
= c_str();
1352 *val
= wxStrtod(start
, &end
);
1354 // return true only if scan was stopped by the terminating NUL and if the
1355 // string was not empty to start with and no under/overflow occurred
1356 return !*end
&& (end
!= start
)
1358 && (errno
!= ERANGE
)
1363 // ---------------------------------------------------------------------------
1365 // ---------------------------------------------------------------------------
1368 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1369 wxString
wxStringPrintfMixinBase::DoFormat(const wxChar
*format
, ...)
1371 wxString
wxString::DoFormat(const wxChar
*format
, ...)
1375 va_start(argptr
, format
);
1378 s
.PrintfV(format
, argptr
);
1386 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1389 s
.PrintfV(format
, argptr
);
1393 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1394 int wxStringPrintfMixinBase::DoPrintf(const wxChar
*format
, ...)
1396 int wxString::DoPrintf(const wxChar
*format
, ...)
1400 va_start(argptr
, format
);
1402 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1403 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1404 // because it's the only cast that works safely for downcasting when
1405 // multiple inheritance is used:
1406 wxString
*str
= static_cast<wxString
*>(this);
1408 wxString
*str
= this;
1411 int iLen
= str
->PrintfV(format
, argptr
);
1418 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
1424 wxStringBuffer
tmp(*this, size
+ 1);
1433 // wxVsnprintf() may modify the original arg pointer, so pass it
1436 wxVaCopy(argptrcopy
, argptr
);
1437 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1440 // some implementations of vsnprintf() don't NUL terminate
1441 // the string if there is not enough space for it so
1442 // always do it manually
1443 buf
[size
] = _T('\0');
1445 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1446 // total number of characters which would have been written if the
1447 // buffer were large enough (newer standards such as Unix98)
1450 #if wxUSE_WXVSNPRINTF
1451 // we know that our own implementation of wxVsnprintf() returns -1
1452 // only for a format error - thus there's something wrong with
1453 // the user's format string
1455 #else // assume that system version only returns error if not enough space
1456 // still not enough, as we don't know how much we need, double the
1457 // current size of the buffer
1459 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1461 else if ( len
>= size
)
1463 #if wxUSE_WXVSNPRINTF
1464 // we know that our own implementation of wxVsnprintf() returns
1465 // size+1 when there's not enough space but that's not the size
1466 // of the required buffer!
1467 size
*= 2; // so we just double the current size of the buffer
1469 // some vsnprintf() implementations NUL-terminate the buffer and
1470 // some don't in len == size case, to be safe always add 1
1474 else // ok, there was enough space
1480 // we could have overshot
1486 // ----------------------------------------------------------------------------
1487 // misc other operations
1488 // ----------------------------------------------------------------------------
1490 // returns true if the string matches the pattern which may contain '*' and
1491 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1493 bool wxString::Matches(const wxString
& mask
) const
1495 // I disable this code as it doesn't seem to be faster (in fact, it seems
1496 // to be much slower) than the old, hand-written code below and using it
1497 // here requires always linking with libregex even if the user code doesn't
1499 #if 0 // wxUSE_REGEX
1500 // first translate the shell-like mask into a regex
1502 pattern
.reserve(wxStrlen(pszMask
));
1514 pattern
+= _T(".*");
1525 // these characters are special in a RE, quote them
1526 // (however note that we don't quote '[' and ']' to allow
1527 // using them for Unix shell like matching)
1528 pattern
+= _T('\\');
1532 pattern
+= *pszMask
;
1540 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
1541 #else // !wxUSE_REGEX
1542 // TODO: this is, of course, awfully inefficient...
1544 // FIXME-UTF8: implement using iterators, remove #if
1545 #if wxUSE_UNICODE_UTF8
1546 wxWCharBuffer maskBuf
= mask
.wc_str();
1547 wxWCharBuffer txtBuf
= wc_str();
1548 const wxChar
*pszMask
= maskBuf
.data();
1549 const wxChar
*pszTxt
= txtBuf
.data();
1551 const wxChar
*pszMask
= mask
.wx_str();
1552 // the char currently being checked
1553 const wxChar
*pszTxt
= wx_str();
1556 // the last location where '*' matched
1557 const wxChar
*pszLastStarInText
= NULL
;
1558 const wxChar
*pszLastStarInMask
= NULL
;
1561 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
1562 switch ( *pszMask
) {
1564 if ( *pszTxt
== wxT('\0') )
1567 // pszTxt and pszMask will be incremented in the loop statement
1573 // remember where we started to be able to backtrack later
1574 pszLastStarInText
= pszTxt
;
1575 pszLastStarInMask
= pszMask
;
1577 // ignore special chars immediately following this one
1578 // (should this be an error?)
1579 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
1582 // if there is nothing more, match
1583 if ( *pszMask
== wxT('\0') )
1586 // are there any other metacharacters in the mask?
1588 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
1590 if ( pEndMask
!= NULL
) {
1591 // we have to match the string between two metachars
1592 uiLenMask
= pEndMask
- pszMask
;
1595 // we have to match the remainder of the string
1596 uiLenMask
= wxStrlen(pszMask
);
1599 wxString
strToMatch(pszMask
, uiLenMask
);
1600 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
1601 if ( pMatch
== NULL
)
1604 // -1 to compensate "++" in the loop
1605 pszTxt
= pMatch
+ uiLenMask
- 1;
1606 pszMask
+= uiLenMask
- 1;
1611 if ( *pszMask
!= *pszTxt
)
1617 // match only if nothing left
1618 if ( *pszTxt
== wxT('\0') )
1621 // if we failed to match, backtrack if we can
1622 if ( pszLastStarInText
) {
1623 pszTxt
= pszLastStarInText
+ 1;
1624 pszMask
= pszLastStarInMask
;
1626 pszLastStarInText
= NULL
;
1628 // don't bother resetting pszLastStarInMask, it's unnecessary
1634 #endif // wxUSE_REGEX/!wxUSE_REGEX
1637 // Count the number of chars
1638 int wxString::Freq(wxUniChar ch
) const
1641 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1649 // convert to upper case, return the copy of the string
1650 wxString
wxString::Upper() const
1651 { wxString
s(*this); return s
.MakeUpper(); }
1653 // convert to lower case, return the copy of the string
1654 wxString
wxString::Lower() const { wxString
s(*this); return s
.MakeLower(); }