1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
15 * 1) all empty strings use g_strEmpty, nRefs = -1 (set in Init())
16 * 2) AllocBuffer() sets nRefs to 1, Lock() increments it by one
17 * 3) Unlock() decrements nRefs and frees memory if it goes to 0
20 // ===========================================================================
21 // headers, declarations, constants
22 // ===========================================================================
24 // For compilers that support precompilation, includes "wx.h".
25 #include "wx/wxprec.h"
32 #include "wx/string.h"
48 #include "wx/hashmap.h"
50 // string handling functions used by wxString:
51 #if wxUSE_UNICODE_UTF8
52 #define wxStringMemcpy memcpy
53 #define wxStringMemcmp memcmp
54 #define wxStringMemchr memchr
55 #define wxStringStrlen strlen
57 #define wxStringMemcpy wxTmemcpy
58 #define wxStringMemcmp wxTmemcmp
59 #define wxStringMemchr wxTmemchr
60 #define wxStringStrlen wxStrlen
64 // ---------------------------------------------------------------------------
65 // static class variables definition
66 // ---------------------------------------------------------------------------
68 //According to STL _must_ be a -1 size_t
69 const size_t wxString::npos
= (size_t) -1;
71 // ----------------------------------------------------------------------------
73 // ----------------------------------------------------------------------------
75 #if wxUSE_STD_IOSTREAM
79 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
81 // FIXME-UTF8: always, not only if wxUSE_UNICODE
82 #if wxUSE_UNICODE && !defined(__BORLANDC__)
83 return os
<< (const wchar_t*)str
.AsWCharBuf();
85 return os
<< (const char*)str
.AsCharBuf();
89 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
91 return os
<< str
.c_str();
94 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCharBuffer
& str
)
96 return os
<< str
.data();
100 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxWCharBuffer
& str
)
102 return os
<< str
.data();
106 #endif // wxUSE_STD_IOSTREAM
108 // ===========================================================================
109 // wxString class core
110 // ===========================================================================
112 #if wxUSE_UNICODE_UTF8
114 // ---------------------------------------------------------------------------
116 // ---------------------------------------------------------------------------
119 // Table 3.1B from Unicode spec: Legal UTF-8 Byte Sequences
121 // Code Points | 1st Byte | 2nd Byte | 3rd Byte | 4th Byte |
122 // -------------------+----------+----------+----------+----------+
123 // U+0000..U+007F | 00..7F | | | |
124 // U+0080..U+07FF | C2..DF | 80..BF | | |
125 // U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
126 // U+1000..U+FFFF | E1..EF | 80..BF | 80..BF | |
127 // U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
128 // U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
129 // U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
130 // -------------------+----------+----------+----------+----------+
132 bool wxString::IsValidUtf8String(const char *str
)
135 return true; // empty string is UTF8 string
137 const unsigned char *c
= (const unsigned char*)str
;
141 unsigned char b
= *c
;
143 if ( b
<= 0x7F ) // 00..7F
146 else if ( b
< 0xC2 ) // invalid lead bytes: 80..C1
149 // two-byte sequences:
150 else if ( b
<= 0xDF ) // C2..DF
153 if ( !(b
>= 0x80 && b
<= 0xBF ) )
157 // three-byte sequences:
158 else if ( b
== 0xE0 )
161 if ( !(b
>= 0xA0 && b
<= 0xBF ) )
164 if ( !(b
>= 0x80 && b
<= 0xBF ) )
167 else if ( b
<= 0xEF ) // E1..EF
169 for ( int i
= 0; i
< 2; ++i
)
172 if ( !(b
>= 0x80 && b
<= 0xBF ) )
177 // four-byte sequences:
178 else if ( b
== 0xF0 )
181 if ( !(b
>= 0x90 && b
<= 0xBF ) )
183 for ( int i
= 0; i
< 2; ++i
)
186 if ( !(b
>= 0x80 && b
<= 0xBF ) )
190 else if ( b
<= 0xF3 ) // F1..F3
192 for ( int i
= 0; i
< 3; ++i
)
195 if ( !(b
>= 0x80 && b
<= 0xBF ) )
199 else if ( b
== 0xF4 )
202 if ( !(b
>= 0x80 && b
<= 0x8F ) )
204 for ( int i
= 0; i
< 2; ++i
)
207 if ( !(b
>= 0x80 && b
<= 0xBF ) )
211 else // otherwise, it's invalid lead byte
220 bool wxString::IsValidUtf8LeadByte(unsigned char c
)
222 return (c
<= 0x7F) || (c
>= 0xC2 && c
<= 0xF4);
226 unsigned char wxString::ms_utf8IterTable
[256] = {
227 // single-byte sequences (ASCII):
228 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00..0F
229 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10..1F
230 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20..2F
231 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30..3F
232 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40..4F
233 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50..5F
234 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60..6F
235 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70..7F
237 // these are invalid, we use step 1 to skip
238 // over them (should never happen):
239 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80..8F
240 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 90..9F
241 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A0..AF
242 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B0..BF
245 // two-byte sequences:
246 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C2..CF
247 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D0..DF
249 // three-byte sequences:
250 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E0..EF
252 // four-byte sequences:
253 4, 4, 4, 4, 4, // F0..F4
255 // these are invalid again (5- or 6-byte
256 // sequences and sequences for code points
257 // above U+10FFFF, as restricted by RFC 3629):
258 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F5..FF
262 void wxString::DecIter(wxStringImpl::const_iterator
& i
)
264 wxASSERT( IsValidUtf8LeadByte(*i
) );
266 // Non-lead bytes are all in the 0x80..0xBF range (i.e. 10xxxxxx in
267 // binary), so we just have to go back until we hit a byte that is either
268 // < 0x80 (i.e. 0xxxxxxx in binary) or 0xC0..0xFF (11xxxxxx in binary; this
269 // includes some invalid values, but we can ignore it here, because we
270 // assume valid UTF-8 input for the purpose of efficient implementation).
272 while ( ((*i
) & 0xC0) == 0x80 /* 2 highest bits are '10' */ )
277 void wxString::DecIter(wxStringImpl::iterator
& i
)
279 // FIXME-UTF8: use template instead
280 wxASSERT( IsValidUtf8LeadByte(*i
) );
282 while ( ((*i
) & 0xC0) == 0x80 /* 2 highest bits are '10' */ )
287 wxStringImpl::const_iterator
288 wxString::AddToIter(wxStringImpl::const_iterator i
, int n
)
290 wxStringImpl::const_iterator
out(i
);
294 for ( int j
= 0; j
< n
; ++j
)
299 for ( int j
= 0; j
> n
; --j
)
306 wxStringImpl::iterator
307 wxString::AddToIter(wxStringImpl::iterator i
, int n
)
309 // FIXME-UTF8: use template instead
310 wxStringImpl::iterator
out(i
);
314 for ( int j
= 0; j
< n
; ++j
)
319 for ( int j
= 0; j
> n
; --j
)
328 int wxString::DiffIters(wxStringImpl::const_iterator i1
,
329 wxStringImpl::const_iterator i2
)
353 int wxString::DiffIters(wxStringImpl::iterator i1
, wxStringImpl::iterator i2
)
355 // FIXME-UTF8: use template instead
379 wxString::Utf8CharBuffer
wxString::EncodeChar(wxUniChar ch
)
382 char *out
= buf
.data
;
384 wxUniChar::value_type code
= ch
.GetValue();
386 // Char. number range | UTF-8 octet sequence
387 // (hexadecimal) | (binary)
388 // ----------------------+---------------------------------------------
389 // 0000 0000 - 0000 007F | 0xxxxxxx
390 // 0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
391 // 0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
392 // 0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
394 // Code point value is stored in bits marked with 'x', lowest-order bit
395 // of the value on the right side in the diagram above.
403 else if ( code
<= 0x07FF )
406 // NB: this line takes 6 least significant bits, encodes them as
407 // 10xxxxxx and discards them so that the next byte can be encoded:
408 out
[1] = 0x80 | (code
& 0x3F); code
>>= 6;
409 out
[0] = 0xC0 | code
;
411 else if ( code
< 0xFFFF )
414 out
[2] = 0x80 | (code
& 0x3F); code
>>= 6;
415 out
[1] = 0x80 | (code
& 0x3F); code
>>= 6;
416 out
[0] = 0xE0 | code
;
418 else if ( code
<= 0x10FFFF )
421 out
[3] = 0x80 | (code
& 0x3F); code
>>= 6;
422 out
[2] = 0x80 | (code
& 0x3F); code
>>= 6;
423 out
[1] = 0x80 | (code
& 0x3F); code
>>= 6;
424 out
[0] = 0xF0 | code
;
428 wxFAIL_MSG( _T("trying to encode undefined Unicode character") );
436 wxUniChar
wxUniCharRef::DecodeChar(wxStringImpl::const_iterator i
)
438 wxASSERT( wxString::IsValidUtf8LeadByte(*i
) ); // FIXME-UTF8: no "wxString::"
440 wxUniChar::value_type code
= 0;
441 size_t len
= wxString::GetUtf8CharLength(*i
);
442 wxASSERT_MSG( len
<= 4, _T("invalid UTF-8 sequence length") );
444 // Char. number range | UTF-8 octet sequence
445 // (hexadecimal) | (binary)
446 // ----------------------+---------------------------------------------
447 // 0000 0000 - 0000 007F | 0xxxxxxx
448 // 0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
449 // 0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
450 // 0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
452 // Code point value is stored in bits marked with 'x', lowest-order bit
453 // of the value on the right side in the diagram above.
456 // mask to extract lead byte's value ('x' bits above), by sequence's length:
457 static const unsigned char s_leadValueMask
[4] = { 0x7F, 0x1F, 0x0F, 0x07 };
459 // mask and value of lead byte's most significant bits, by length:
460 static const unsigned char s_leadMarkerMask
[4] = { 0x80, 0xE0, 0xF0, 0xF8 };
461 static const unsigned char s_leadMarkerVal
[4] = { 0x00, 0xC0, 0xE0, 0xF0 };
464 // extract the lead byte's value bits:
465 wxASSERT_MSG( ((unsigned char)*i
& s_leadMarkerMask
[len
-1]) ==
466 s_leadMarkerVal
[len
-1],
467 _T("invalid UTF-8 lead byte") );
468 code
= (unsigned char)*i
& s_leadValueMask
[len
-1];
470 // all remaining bytes, if any, are handled in the same way regardless of
471 // sequence's length:
472 for ( ++i
; len
> 1; --len
, ++i
)
474 wxASSERT_MSG( ((unsigned char)*i
& 0xC0) == 0x80,
475 _T("invalid UTF-8 byte") );
478 code
|= (unsigned char)*i
& 0x3F;
481 return wxUniChar(code
);
485 wxCharBuffer
wxString::EncodeNChars(size_t n
, wxUniChar ch
)
487 Utf8CharBuffer
once(EncodeChar(ch
));
488 // the IncIter() table can be used to determine the length of ch's encoding:
489 size_t len
= ms_utf8IterTable
[(unsigned char)once
.data
[0]];
491 wxCharBuffer
buf(n
* len
);
492 char *ptr
= buf
.data();
493 for ( size_t i
= 0; i
< n
; i
++, ptr
+= len
)
495 memcpy(ptr
, once
.data
, len
);
502 void wxString::PosLenToImpl(size_t pos
, size_t len
,
503 size_t *implPos
, size_t *implLen
) const
509 const_iterator i
= begin() + pos
;
510 *implPos
= wxStringImpl::const_iterator(i
.impl()) - m_impl
.begin();
515 // too large length is interpreted as "to the end of the string"
516 // FIXME-UTF8: verify this is the case in std::string, assert
518 if ( pos
+ len
> length() )
519 len
= length() - pos
;
521 *implLen
= (i
+ len
).impl() - i
.impl();
526 #endif // wxUSE_UNICODE_UTF8
528 // ----------------------------------------------------------------------------
529 // wxCStrData converted strings caching
530 // ----------------------------------------------------------------------------
532 // FIXME-UTF8: temporarily disabled because it doesn't work with global
533 // string objects; re-enable after fixing this bug and benchmarking
534 // performance to see if using a hash is a good idea at all
537 // For backward compatibility reasons, it must be possible to assign the value
538 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
539 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
540 // because the memory would be freed immediately, but it has to be valid as long
541 // as the string is not modified, so that code like this still works:
543 // const wxChar *s = str.c_str();
544 // while ( s ) { ... }
546 // FIXME-UTF8: not thread safe!
547 // FIXME-UTF8: we currently clear the cached conversion only when the string is
548 // destroyed, but we should do it when the string is modified, to
549 // keep memory usage down
550 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
551 // invalidated the cache on every change, we could keep the previous
553 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
554 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
557 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
559 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
560 if ( i
!= hash
.end() )
568 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
569 // so we have to use wxString* here and const-cast when used
570 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
571 wxStringCharConversionCache
);
572 static wxStringCharConversionCache gs_stringsCharCache
;
574 const char* wxCStrData::AsChar() const
576 // remove previously cache value, if any (see FIXMEs above):
577 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
579 // convert the string and keep it:
580 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
581 m_str
->mb_str().release();
585 #endif // wxUSE_UNICODE
587 #if !wxUSE_UNICODE_WCHAR
588 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
589 wxStringWCharConversionCache
);
590 static wxStringWCharConversionCache gs_stringsWCharCache
;
592 const wchar_t* wxCStrData::AsWChar() const
594 // remove previously cache value, if any (see FIXMEs above):
595 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
597 // convert the string and keep it:
598 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
599 m_str
->wc_str().release();
603 #endif // !wxUSE_UNICODE_WCHAR
605 wxString::~wxString()
608 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
609 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
611 #if !wxUSE_UNICODE_WCHAR
612 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
618 const char* wxCStrData::AsChar() const
620 wxString
*str
= wxConstCast(m_str
, wxString
);
622 // convert the string:
623 wxCharBuffer
buf(str
->mb_str());
625 // FIXME-UTF8: do the conversion in-place in the existing buffer
626 if ( str
->m_convertedToChar
&&
627 strlen(buf
) == strlen(str
->m_convertedToChar
) )
629 // keep the same buffer for as long as possible, so that several calls
630 // to c_str() in a row still work:
631 strcpy(str
->m_convertedToChar
, buf
);
635 str
->m_convertedToChar
= buf
.release();
639 return str
->m_convertedToChar
+ m_offset
;
641 #endif // wxUSE_UNICODE
643 #if !wxUSE_UNICODE_WCHAR
644 const wchar_t* wxCStrData::AsWChar() const
646 wxString
*str
= wxConstCast(m_str
, wxString
);
648 // convert the string:
649 wxWCharBuffer
buf(str
->wc_str());
651 // FIXME-UTF8: do the conversion in-place in the existing buffer
652 if ( str
->m_convertedToWChar
&&
653 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
655 // keep the same buffer for as long as possible, so that several calls
656 // to c_str() in a row still work:
657 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
661 str
->m_convertedToWChar
= buf
.release();
665 return str
->m_convertedToWChar
+ m_offset
;
667 #endif // !wxUSE_UNICODE_WCHAR
669 // ===========================================================================
670 // wxString class core
671 // ===========================================================================
673 // ---------------------------------------------------------------------------
674 // construction and conversion
675 // ---------------------------------------------------------------------------
677 #if wxUSE_UNICODE_WCHAR
679 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
680 const wxMBConv
& conv
)
683 if ( !psz
|| nLength
== 0 )
684 return SubstrBufFromMB(L
"", 0);
686 if ( nLength
== npos
)
690 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
692 return SubstrBufFromMB(_T(""), 0);
694 return SubstrBufFromMB(wcBuf
, wcLen
);
696 #endif // wxUSE_UNICODE_WCHAR
698 #if wxUSE_UNICODE_UTF8
700 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
701 const wxMBConv
& conv
)
703 // FIXME-UTF8: return as-is without copying under UTF8 locale, return
704 // converted string under other locales - needs wxCharBuffer
708 if ( !psz
|| nLength
== 0 )
709 return SubstrBufFromMB("", 0);
711 if ( nLength
== npos
)
714 // first convert to wide string:
716 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
718 return SubstrBufFromMB("", 0);
720 // and then to UTF-8:
721 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxConvUTF8
));
722 // widechar -> UTF-8 conversion isn't supposed to ever fail:
723 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
727 #endif // wxUSE_UNICODE_UTF8
729 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
731 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
732 const wxMBConv
& conv
)
735 if ( !pwz
|| nLength
== 0 )
736 return SubstrBufFromWC("", 0);
738 if ( nLength
== npos
)
742 wxCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
744 return SubstrBufFromWC("", 0);
746 return SubstrBufFromWC(mbBuf
, mbLen
);
748 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
751 #if wxUSE_UNICODE_WCHAR
753 //Convert wxString in Unicode mode to a multi-byte string
754 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
756 return conv
.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL
);
759 #elif wxUSE_UNICODE_UTF8
761 const wxWCharBuffer
wxString::wc_str() const
763 return wxConvUTF8
.cMB2WC(m_impl
.c_str(),
764 m_impl
.length() + 1 /* size, not length */,
768 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
770 // FIXME-UTF8: optimize the case when conv==wxConvUTF8 or wxConvLibc
772 // FIXME-UTF8: use wc_str() here once we have buffers with length
776 wxConvUTF8
.cMB2WC(m_impl
.c_str(),
777 m_impl
.length() + 1 /* size, not length */,
780 return wxCharBuffer("");
782 return conv
.cWC2MB(wcBuf
, wcLen
, NULL
);
787 //Converts this string to a wide character string if unicode
788 //mode is not enabled and wxUSE_WCHAR_T is enabled
789 const wxWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
791 return conv
.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL
);
794 #endif // Unicode/ANSI
796 // shrink to minimal size (releasing extra memory)
797 bool wxString::Shrink()
799 wxString
tmp(begin(), end());
801 return tmp
.length() == length();
804 // deprecated compatibility code:
805 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
806 wxChar
*wxString::GetWriteBuf(size_t nLen
)
808 return DoGetWriteBuf(nLen
);
811 void wxString::UngetWriteBuf()
816 void wxString::UngetWriteBuf(size_t nLen
)
818 DoUngetWriteBuf(nLen
);
820 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
823 // ---------------------------------------------------------------------------
825 // ---------------------------------------------------------------------------
827 // all functions are inline in string.h
829 // ---------------------------------------------------------------------------
830 // concatenation operators
831 // ---------------------------------------------------------------------------
834 * concatenation functions come in 5 flavours:
836 * char + string and string + char
837 * C str + string and string + C str
840 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
842 #if !wxUSE_STL_BASED_WXSTRING
843 wxASSERT( str1
.IsValid() );
844 wxASSERT( str2
.IsValid() );
853 wxString
operator+(const wxString
& str
, wxUniChar ch
)
855 #if !wxUSE_STL_BASED_WXSTRING
856 wxASSERT( str
.IsValid() );
865 wxString
operator+(wxUniChar ch
, const wxString
& str
)
867 #if !wxUSE_STL_BASED_WXSTRING
868 wxASSERT( str
.IsValid() );
877 wxString
operator+(const wxString
& str
, const char *psz
)
879 #if !wxUSE_STL_BASED_WXSTRING
880 wxASSERT( str
.IsValid() );
884 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
885 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
893 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
895 #if !wxUSE_STL_BASED_WXSTRING
896 wxASSERT( str
.IsValid() );
900 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
901 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
909 wxString
operator+(const char *psz
, const wxString
& str
)
911 #if !wxUSE_STL_BASED_WXSTRING
912 wxASSERT( str
.IsValid() );
916 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
917 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
925 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
927 #if !wxUSE_STL_BASED_WXSTRING
928 wxASSERT( str
.IsValid() );
932 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
933 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
941 // ---------------------------------------------------------------------------
943 // ---------------------------------------------------------------------------
945 #ifdef HAVE_STD_STRING_COMPARE
947 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
948 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
949 // sort strings in characters code point order by sorting the byte sequence
950 // in byte values order (i.e. what strcmp() and memcmp() do).
952 int wxString::compare(const wxString
& str
) const
954 return m_impl
.compare(str
.m_impl
);
957 int wxString::compare(size_t nStart
, size_t nLen
,
958 const wxString
& str
) const
961 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
962 return m_impl
.compare(pos
, len
, str
.m_impl
);
965 int wxString::compare(size_t nStart
, size_t nLen
,
967 size_t nStart2
, size_t nLen2
) const
970 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
973 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
975 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
978 int wxString::compare(const char* sz
) const
980 return m_impl
.compare(ImplStr(sz
));
983 int wxString::compare(const wchar_t* sz
) const
985 return m_impl
.compare(ImplStr(sz
));
988 int wxString::compare(size_t nStart
, size_t nLen
,
989 const char* sz
, size_t nCount
) const
992 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
994 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
996 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
999 int wxString::compare(size_t nStart
, size_t nLen
,
1000 const wchar_t* sz
, size_t nCount
) const
1003 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
1005 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
1007 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
1010 #else // !HAVE_STD_STRING_COMPARE
1012 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
1013 const wxStringCharType
* s2
, size_t l2
)
1016 return wxStringMemcmp(s1
, s2
, l1
);
1019 int ret
= wxStringMemcmp(s1
, s2
, l1
);
1020 return ret
== 0 ? -1 : ret
;
1024 int ret
= wxStringMemcmp(s1
, s2
, l2
);
1025 return ret
== 0 ? +1 : ret
;
1029 int wxString::compare(const wxString
& str
) const
1031 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
1032 str
.m_impl
.data(), str
.m_impl
.length());
1035 int wxString::compare(size_t nStart
, size_t nLen
,
1036 const wxString
& str
) const
1038 wxASSERT(nStart
<= length());
1039 size_type strLen
= length() - nStart
;
1040 nLen
= strLen
< nLen
? strLen
: nLen
;
1043 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
1045 return ::wxDoCmp(m_impl
.data() + pos
, len
,
1046 str
.m_impl
.data(), str
.m_impl
.length());
1049 int wxString::compare(size_t nStart
, size_t nLen
,
1050 const wxString
& str
,
1051 size_t nStart2
, size_t nLen2
) const
1053 wxASSERT(nStart
<= length());
1054 wxASSERT(nStart2
<= str
.length());
1055 size_type strLen
= length() - nStart
,
1056 strLen2
= str
.length() - nStart2
;
1057 nLen
= strLen
< nLen
? strLen
: nLen
;
1058 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
1061 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
1063 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
1065 return ::wxDoCmp(m_impl
.data() + pos
, len
,
1066 str
.m_impl
.data() + pos2
, len2
);
1069 int wxString::compare(const char* sz
) const
1071 SubstrBufFromMB
str(ImplStr(sz
, npos
));
1072 if ( str
.len
== npos
)
1073 str
.len
= wxStringStrlen(str
.data
);
1074 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
1077 int wxString::compare(const wchar_t* sz
) const
1079 SubstrBufFromWC
str(ImplStr(sz
, npos
));
1080 if ( str
.len
== npos
)
1081 str
.len
= wxStringStrlen(str
.data
);
1082 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
1085 int wxString::compare(size_t nStart
, size_t nLen
,
1086 const char* sz
, size_t nCount
) const
1088 wxASSERT(nStart
<= length());
1089 size_type strLen
= length() - nStart
;
1090 nLen
= strLen
< nLen
? strLen
: nLen
;
1093 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
1095 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
1096 if ( str
.len
== npos
)
1097 str
.len
= wxStringStrlen(str
.data
);
1099 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
1102 int wxString::compare(size_t nStart
, size_t nLen
,
1103 const wchar_t* sz
, size_t nCount
) const
1105 wxASSERT(nStart
<= length());
1106 size_type strLen
= length() - nStart
;
1107 nLen
= strLen
< nLen
? strLen
: nLen
;
1110 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
1112 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
1113 if ( str
.len
== npos
)
1114 str
.len
= wxStringStrlen(str
.data
);
1116 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
1119 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
1122 // ---------------------------------------------------------------------------
1123 // find_{first,last}_[not]_of functions
1124 // ---------------------------------------------------------------------------
1126 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1128 // NB: All these functions are implemented with the argument being wxChar*,
1129 // i.e. widechar string in any Unicode build, even though native string
1130 // representation is char* in the UTF-8 build. This is because we couldn't
1131 // use memchr() to determine if a character is in a set encoded as UTF-8.
1133 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
1135 return find_first_of(sz
, nStart
, wxStrlen(sz
));
1138 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
1140 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
1143 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
1145 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
1147 size_t idx
= nStart
;
1148 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1150 if ( wxTmemchr(sz
, *i
, n
) )
1157 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
1159 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
1161 size_t idx
= nStart
;
1162 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1164 if ( !wxTmemchr(sz
, *i
, n
) )
1172 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
1174 return find_last_of(sz
, nStart
, wxStrlen(sz
));
1177 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
1179 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
1182 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
1184 size_t len
= length();
1186 if ( nStart
== npos
)
1192 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1195 size_t idx
= nStart
;
1196 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1197 i
!= rend(); --idx
, ++i
)
1199 if ( wxTmemchr(sz
, *i
, n
) )
1206 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
1208 size_t len
= length();
1210 if ( nStart
== npos
)
1216 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1219 size_t idx
= nStart
;
1220 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1221 i
!= rend(); --idx
, ++i
)
1223 if ( !wxTmemchr(sz
, *i
, n
) )
1230 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
1232 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
1234 size_t idx
= nStart
;
1235 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1244 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1246 size_t len
= length();
1248 if ( nStart
== npos
)
1254 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1257 size_t idx
= nStart
;
1258 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1259 i
!= rend(); --idx
, ++i
)
1268 // the functions above were implemented for wchar_t* arguments in Unicode
1269 // build and char* in ANSI build; below are implementations for the other
1272 #define wxOtherCharType char
1273 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1275 #define wxOtherCharType wchar_t
1276 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1279 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1280 { return find_first_of(STRCONV(sz
), nStart
); }
1282 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1284 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1285 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1286 { return find_last_of(STRCONV(sz
), nStart
); }
1287 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1289 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1290 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1291 { return find_first_not_of(STRCONV(sz
), nStart
); }
1292 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1294 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1295 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1296 { return find_last_not_of(STRCONV(sz
), nStart
); }
1297 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1299 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1301 #undef wxOtherCharType
1304 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1306 // ===========================================================================
1307 // other common string functions
1308 // ===========================================================================
1310 int wxString::CmpNoCase(const wxString
& s
) const
1312 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1315 const_iterator i1
= begin();
1316 const_iterator end1
= end();
1317 const_iterator i2
= s
.begin();
1318 const_iterator end2
= s
.end();
1320 for ( ; i1
!= end1
&& i2
!= end2
; ++idx
, ++i1
, ++i2
)
1322 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1323 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1324 if ( lower1
!= lower2
)
1325 return lower1
< lower2
? -1 : 1;
1328 size_t len1
= length();
1329 size_t len2
= s
.length();
1333 else if ( len1
> len2
)
1342 #ifndef __SCHAR_MAX__
1343 #define __SCHAR_MAX__ 127
1347 wxString
wxString::FromAscii(const char *ascii
)
1350 return wxEmptyString
;
1352 size_t len
= strlen( ascii
);
1357 wxStringBuffer
buf(res
, len
);
1359 wchar_t *dest
= buf
;
1363 if ( (*dest
++ = (wchar_t)(unsigned char)*ascii
++) == L
'\0' )
1371 wxString
wxString::FromAscii(const char ascii
)
1373 // What do we do with '\0' ?
1376 res
+= (wchar_t)(unsigned char) ascii
;
1381 const wxCharBuffer
wxString::ToAscii() const
1383 // this will allocate enough space for the terminating NUL too
1384 wxCharBuffer
buffer(length());
1387 char *dest
= buffer
.data();
1389 const wchar_t *pwc
= c_str();
1392 *dest
++ = (char)(*pwc
> SCHAR_MAX
? wxT('_') : *pwc
);
1394 // the output string can't have embedded NULs anyhow, so we can safely
1395 // stop at first of them even if we do have any
1405 // extract string of length nCount starting at nFirst
1406 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1408 size_t nLen
= length();
1410 // default value of nCount is npos and means "till the end"
1411 if ( nCount
== npos
)
1413 nCount
= nLen
- nFirst
;
1416 // out-of-bounds requests return sensible things
1417 if ( nFirst
+ nCount
> nLen
)
1419 nCount
= nLen
- nFirst
;
1422 if ( nFirst
> nLen
)
1424 // AllocCopy() will return empty string
1425 return wxEmptyString
;
1428 wxString
dest(*this, nFirst
, nCount
);
1429 if ( dest
.length() != nCount
)
1431 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1437 // check that the string starts with prefix and return the rest of the string
1438 // in the provided pointer if it is not NULL, otherwise return false
1439 bool wxString::StartsWith(const wxChar
*prefix
, wxString
*rest
) const
1441 wxASSERT_MSG( prefix
, _T("invalid parameter in wxString::StartsWith") );
1443 // first check if the beginning of the string matches the prefix: note
1444 // that we don't have to check that we don't run out of this string as
1445 // when we reach the terminating NUL, either prefix string ends too (and
1446 // then it's ok) or we break out of the loop because there is no match
1447 const wxChar
*p
= c_str();
1450 if ( *prefix
++ != *p
++ )
1459 // put the rest of the string into provided pointer
1467 // check that the string ends with suffix and return the rest of it in the
1468 // provided pointer if it is not NULL, otherwise return false
1469 bool wxString::EndsWith(const wxChar
*suffix
, wxString
*rest
) const
1471 wxASSERT_MSG( suffix
, _T("invalid parameter in wxString::EndssWith") );
1473 int start
= length() - wxStrlen(suffix
);
1475 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1480 // put the rest of the string into provided pointer
1481 rest
->assign(*this, 0, start
);
1488 // extract nCount last (rightmost) characters
1489 wxString
wxString::Right(size_t nCount
) const
1491 if ( nCount
> length() )
1494 wxString
dest(*this, length() - nCount
, nCount
);
1495 if ( dest
.length() != nCount
) {
1496 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1501 // get all characters after the last occurence of ch
1502 // (returns the whole string if ch not found)
1503 wxString
wxString::AfterLast(wxUniChar ch
) const
1506 int iPos
= Find(ch
, true);
1507 if ( iPos
== wxNOT_FOUND
)
1510 str
= wx_str() + iPos
+ 1;
1515 // extract nCount first (leftmost) characters
1516 wxString
wxString::Left(size_t nCount
) const
1518 if ( nCount
> length() )
1521 wxString
dest(*this, 0, nCount
);
1522 if ( dest
.length() != nCount
) {
1523 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1528 // get all characters before the first occurence of ch
1529 // (returns the whole string if ch not found)
1530 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1532 int iPos
= Find(ch
);
1533 if ( iPos
== wxNOT_FOUND
) iPos
= length();
1534 return wxString(*this, 0, iPos
);
1537 /// get all characters before the last occurence of ch
1538 /// (returns empty string if ch not found)
1539 wxString
wxString::BeforeLast(wxUniChar ch
) const
1542 int iPos
= Find(ch
, true);
1543 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1544 str
= wxString(c_str(), iPos
);
1549 /// get all characters after the first occurence of ch
1550 /// (returns empty string if ch not found)
1551 wxString
wxString::AfterFirst(wxUniChar ch
) const
1554 int iPos
= Find(ch
);
1555 if ( iPos
!= wxNOT_FOUND
)
1556 str
= wx_str() + iPos
+ 1;
1561 // replace first (or all) occurences of some substring with another one
1562 size_t wxString::Replace(const wxString
& strOld
,
1563 const wxString
& strNew
, bool bReplaceAll
)
1565 // if we tried to replace an empty string we'd enter an infinite loop below
1566 wxCHECK_MSG( !strOld
.empty(), 0,
1567 _T("wxString::Replace(): invalid parameter") );
1569 size_t uiCount
= 0; // count of replacements made
1571 size_t uiOldLen
= strOld
.length();
1572 size_t uiNewLen
= strNew
.length();
1576 while ( (*this)[dwPos
] != wxT('\0') )
1578 //DO NOT USE STRSTR HERE
1579 //this string can contain embedded null characters,
1580 //so strstr will function incorrectly
1581 dwPos
= find(strOld
, dwPos
);
1582 if ( dwPos
== npos
)
1583 break; // exit the loop
1586 //replace this occurance of the old string with the new one
1587 replace(dwPos
, uiOldLen
, strNew
, uiNewLen
);
1589 //move up pos past the string that was replaced
1592 //increase replace count
1597 break; // exit the loop
1604 bool wxString::IsAscii() const
1606 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1608 if ( !(*i
).IsAscii() )
1615 bool wxString::IsWord() const
1617 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1619 if ( !wxIsalpha(*i
) )
1626 bool wxString::IsNumber() const
1631 const_iterator i
= begin();
1633 if ( *i
== _T('-') || *i
== _T('+') )
1636 for ( ; i
!= end(); ++i
)
1638 if ( !wxIsdigit(*i
) )
1645 wxString
wxString::Strip(stripType w
) const
1648 if ( w
& leading
) s
.Trim(false);
1649 if ( w
& trailing
) s
.Trim(true);
1653 // ---------------------------------------------------------------------------
1655 // ---------------------------------------------------------------------------
1657 wxString
& wxString::MakeUpper()
1659 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1660 *it
= (wxChar
)wxToupper(*it
);
1665 wxString
& wxString::MakeLower()
1667 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1668 *it
= (wxChar
)wxTolower(*it
);
1673 // ---------------------------------------------------------------------------
1674 // trimming and padding
1675 // ---------------------------------------------------------------------------
1677 // some compilers (VC++ 6.0 not to name them) return true for a call to
1678 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1679 // live with this by checking that the character is a 7 bit one - even if this
1680 // may fail to detect some spaces (I don't know if Unicode doesn't have
1681 // space-like symbols somewhere except in the first 128 chars), it is arguably
1682 // still better than trimming away accented letters
1683 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1685 // trims spaces (in the sense of isspace) from left or right side
1686 wxString
& wxString::Trim(bool bFromRight
)
1688 // first check if we're going to modify the string at all
1691 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1692 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1698 // find last non-space character
1699 reverse_iterator psz
= rbegin();
1700 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1703 // truncate at trailing space start
1704 erase(psz
.base(), end());
1708 // find first non-space character
1709 iterator psz
= begin();
1710 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1713 // fix up data and length
1714 erase(begin(), psz
);
1721 // adds nCount characters chPad to the string from either side
1722 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1724 wxString
s(chPad
, nCount
);
1737 // truncate the string
1738 wxString
& wxString::Truncate(size_t uiLen
)
1740 if ( uiLen
< length() )
1742 erase(begin() + uiLen
, end());
1744 //else: nothing to do, string is already short enough
1749 // ---------------------------------------------------------------------------
1750 // finding (return wxNOT_FOUND if not found and index otherwise)
1751 // ---------------------------------------------------------------------------
1754 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1756 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1758 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1761 // ----------------------------------------------------------------------------
1762 // conversion to numbers
1763 // ----------------------------------------------------------------------------
1765 // the implementation of all the functions below is exactly the same so factor
1768 template <typename T
, typename F
>
1769 bool wxStringToIntType(const wxChar
*start
,
1774 wxCHECK_MSG( val
, false, _T("NULL output pointer") );
1775 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1782 *val
= (*func
)(start
, &end
, base
);
1784 // return true only if scan was stopped by the terminating NUL and if the
1785 // string was not empty to start with and no under/overflow occurred
1786 return !*end
&& (end
!= start
)
1788 && (errno
!= ERANGE
)
1793 bool wxString::ToLong(long *val
, int base
) const
1795 return wxStringToIntType((const wxChar
*)c_str(), val
, base
, wxStrtol
);
1798 bool wxString::ToULong(unsigned long *val
, int base
) const
1800 return wxStringToIntType((const wxChar
*)c_str(), val
, base
, wxStrtoul
);
1803 bool wxString::ToLongLong(wxLongLong_t
*val
, int base
) const
1805 return wxStringToIntType((const wxChar
*)c_str(), val
, base
, wxStrtoll
);
1808 bool wxString::ToULongLong(wxULongLong_t
*val
, int base
) const
1810 return wxStringToIntType((const wxChar
*)c_str(), val
, base
, wxStrtoull
);
1813 bool wxString::ToDouble(double *val
) const
1815 wxCHECK_MSG( val
, false, _T("NULL pointer in wxString::ToDouble") );
1821 const wxChar
*start
= c_str();
1823 *val
= wxStrtod(start
, &end
);
1825 // return true only if scan was stopped by the terminating NUL and if the
1826 // string was not empty to start with and no under/overflow occurred
1827 return !*end
&& (end
!= start
)
1829 && (errno
!= ERANGE
)
1834 // ---------------------------------------------------------------------------
1836 // ---------------------------------------------------------------------------
1839 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1840 wxString
wxStringPrintfMixinBase::DoFormat(const wxChar
*format
, ...)
1842 wxString
wxString::DoFormat(const wxChar
*format
, ...)
1846 va_start(argptr
, format
);
1849 s
.PrintfV(format
, argptr
);
1857 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1860 s
.PrintfV(format
, argptr
);
1864 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1865 int wxStringPrintfMixinBase::DoPrintf(const wxChar
*format
, ...)
1867 int wxString::DoPrintf(const wxChar
*format
, ...)
1871 va_start(argptr
, format
);
1873 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1874 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1875 // because it's the only cast that works safely for downcasting when
1876 // multiple inheritance is used:
1877 wxString
*str
= static_cast<wxString
*>(this);
1879 wxString
*str
= this;
1882 int iLen
= str
->PrintfV(format
, argptr
);
1889 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
1895 wxStringBuffer
tmp(*this, size
+ 1);
1904 // wxVsnprintf() may modify the original arg pointer, so pass it
1907 wxVaCopy(argptrcopy
, argptr
);
1908 int len
= wxVsnprintf(buf
, size
, (const wxChar
*)/*FIXME-UTF8*/format
, argptrcopy
);
1911 // some implementations of vsnprintf() don't NUL terminate
1912 // the string if there is not enough space for it so
1913 // always do it manually
1914 buf
[size
] = _T('\0');
1916 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1917 // total number of characters which would have been written if the
1918 // buffer were large enough (newer standards such as Unix98)
1921 #if wxUSE_WXVSNPRINTF
1922 // we know that our own implementation of wxVsnprintf() returns -1
1923 // only for a format error - thus there's something wrong with
1924 // the user's format string
1926 #else // assume that system version only returns error if not enough space
1927 // still not enough, as we don't know how much we need, double the
1928 // current size of the buffer
1930 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1932 else if ( len
>= size
)
1934 #if wxUSE_WXVSNPRINTF
1935 // we know that our own implementation of wxVsnprintf() returns
1936 // size+1 when there's not enough space but that's not the size
1937 // of the required buffer!
1938 size
*= 2; // so we just double the current size of the buffer
1940 // some vsnprintf() implementations NUL-terminate the buffer and
1941 // some don't in len == size case, to be safe always add 1
1945 else // ok, there was enough space
1951 // we could have overshot
1957 // ----------------------------------------------------------------------------
1958 // misc other operations
1959 // ----------------------------------------------------------------------------
1961 // returns true if the string matches the pattern which may contain '*' and
1962 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1964 bool wxString::Matches(const wxString
& mask
) const
1966 // I disable this code as it doesn't seem to be faster (in fact, it seems
1967 // to be much slower) than the old, hand-written code below and using it
1968 // here requires always linking with libregex even if the user code doesn't
1970 #if 0 // wxUSE_REGEX
1971 // first translate the shell-like mask into a regex
1973 pattern
.reserve(wxStrlen(pszMask
));
1985 pattern
+= _T(".*");
1996 // these characters are special in a RE, quote them
1997 // (however note that we don't quote '[' and ']' to allow
1998 // using them for Unix shell like matching)
1999 pattern
+= _T('\\');
2003 pattern
+= *pszMask
;
2011 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
2012 #else // !wxUSE_REGEX
2013 // TODO: this is, of course, awfully inefficient...
2015 // FIXME-UTF8: implement using iterators, remove #if
2016 #if wxUSE_UNICODE_UTF8
2017 wxWCharBuffer maskBuf
= mask
.wc_str();
2018 wxWCharBuffer txtBuf
= wc_str();
2019 const wxChar
*pszMask
= maskBuf
.data();
2020 const wxChar
*pszTxt
= txtBuf
.data();
2022 const wxChar
*pszMask
= mask
.wx_str();
2023 // the char currently being checked
2024 const wxChar
*pszTxt
= wx_str();
2027 // the last location where '*' matched
2028 const wxChar
*pszLastStarInText
= NULL
;
2029 const wxChar
*pszLastStarInMask
= NULL
;
2032 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
2033 switch ( *pszMask
) {
2035 if ( *pszTxt
== wxT('\0') )
2038 // pszTxt and pszMask will be incremented in the loop statement
2044 // remember where we started to be able to backtrack later
2045 pszLastStarInText
= pszTxt
;
2046 pszLastStarInMask
= pszMask
;
2048 // ignore special chars immediately following this one
2049 // (should this be an error?)
2050 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2053 // if there is nothing more, match
2054 if ( *pszMask
== wxT('\0') )
2057 // are there any other metacharacters in the mask?
2059 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2061 if ( pEndMask
!= NULL
) {
2062 // we have to match the string between two metachars
2063 uiLenMask
= pEndMask
- pszMask
;
2066 // we have to match the remainder of the string
2067 uiLenMask
= wxStrlen(pszMask
);
2070 wxString
strToMatch(pszMask
, uiLenMask
);
2071 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2072 if ( pMatch
== NULL
)
2075 // -1 to compensate "++" in the loop
2076 pszTxt
= pMatch
+ uiLenMask
- 1;
2077 pszMask
+= uiLenMask
- 1;
2082 if ( *pszMask
!= *pszTxt
)
2088 // match only if nothing left
2089 if ( *pszTxt
== wxT('\0') )
2092 // if we failed to match, backtrack if we can
2093 if ( pszLastStarInText
) {
2094 pszTxt
= pszLastStarInText
+ 1;
2095 pszMask
= pszLastStarInMask
;
2097 pszLastStarInText
= NULL
;
2099 // don't bother resetting pszLastStarInMask, it's unnecessary
2105 #endif // wxUSE_REGEX/!wxUSE_REGEX
2108 // Count the number of chars
2109 int wxString::Freq(wxUniChar ch
) const
2112 for ( const_iterator i
= begin(); i
!= end(); ++i
)
2120 // convert to upper case, return the copy of the string
2121 wxString
wxString::Upper() const
2122 { wxString
s(*this); return s
.MakeUpper(); }
2124 // convert to lower case, return the copy of the string
2125 wxString
wxString::Lower() const { wxString
s(*this); return s
.MakeLower(); }