1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
38 #include "wx/hashmap.h"
39 #include "wx/vector.h"
40 #include "wx/xlocale.h"
43 #include "wx/msw/wrapwin.h"
46 // string handling functions used by wxString:
47 #if wxUSE_UNICODE_UTF8
48 #define wxStringMemcpy memcpy
49 #define wxStringMemcmp memcmp
50 #define wxStringMemchr memchr
51 #define wxStringStrlen strlen
53 #define wxStringMemcpy wxTmemcpy
54 #define wxStringMemcmp wxTmemcmp
55 #define wxStringMemchr wxTmemchr
56 #define wxStringStrlen wxStrlen
59 // ----------------------------------------------------------------------------
61 // ----------------------------------------------------------------------------
66 static UntypedBufferData
s_untypedNullData(NULL
, 0);
68 UntypedBufferData
* const untypedNullDataPtr
= &s_untypedNullData
;
70 } // namespace wxPrivate
72 // ---------------------------------------------------------------------------
73 // static class variables definition
74 // ---------------------------------------------------------------------------
76 //According to STL _must_ be a -1 size_t
77 const size_t wxString::npos
= (size_t) -1;
79 #if wxUSE_STRING_POS_CACHE
81 #ifdef wxHAS_COMPILER_TLS
83 wxTLS_TYPE(wxString::Cache
) wxString::ms_cache
;
85 #else // !wxHAS_COMPILER_TLS
87 struct wxStrCacheInitializer
89 wxStrCacheInitializer()
91 // calling this function triggers s_cache initialization in it, and
92 // from now on it becomes safe to call from multiple threads
98 wxString::Cache& wxString::GetCache()
100 static wxTLS_TYPE(Cache) s_cache;
102 return wxTLS_VALUE(s_cache);
106 static wxStrCacheInitializer gs_stringCacheInit
;
108 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
110 // gdb seems to be unable to display thread-local variables correctly, at least
111 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
112 #if wxDEBUG_LEVEL >= 2
114 struct wxStrCacheDumper
116 static void ShowAll()
118 puts("*** wxString cache dump:");
119 for ( unsigned n
= 0; n
< wxString::Cache::SIZE
; n
++ )
121 const wxString::Cache::Element
&
122 c
= wxString::GetCacheBegin()[n
];
124 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
126 n
== wxString::LastUsedCacheElement() ? " [*]" : "",
128 (unsigned long)c
.pos
,
129 (unsigned long)c
.impl
,
135 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
137 #endif // wxDEBUG_LEVEL >= 2
139 #ifdef wxPROFILE_STRING_CACHE
141 wxString::CacheStats
wxString::ms_cacheStats
;
143 struct wxStrCacheStatsDumper
145 ~wxStrCacheStatsDumper()
147 const wxString::CacheStats
& stats
= wxString::ms_cacheStats
;
151 puts("*** wxString cache statistics:");
152 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
154 printf("\tHits %u (of which %u not used) or %.2f%%\n",
157 100.*float(stats
.poshits
- stats
.mishits
)/stats
.postot
);
158 printf("\tAverage position requested: %.2f\n",
159 float(stats
.sumpos
) / stats
.postot
);
160 printf("\tAverage offset after cached hint: %.2f\n",
161 float(stats
.sumofs
) / stats
.postot
);
166 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
167 stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
);
172 static wxStrCacheStatsDumper s_showCacheStats
;
174 #endif // wxPROFILE_STRING_CACHE
176 #endif // wxUSE_STRING_POS_CACHE
178 // ----------------------------------------------------------------------------
180 // ----------------------------------------------------------------------------
182 #if wxUSE_STD_IOSTREAM
186 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
188 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
189 const wxScopedCharBuffer
buf(str
.AsCharBuf());
191 os
.clear(wxSTD
ios_base::failbit
);
197 return os
<< str
.AsInternal();
201 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
203 return os
<< str
.c_str();
206 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedCharBuffer
& str
)
208 return os
<< str
.data();
212 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedWCharBuffer
& str
)
214 return os
<< str
.data();
218 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
220 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
222 return wos
<< str
.wc_str();
225 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
227 return wos
<< str
.AsWChar();
230 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxScopedWCharBuffer
& str
)
232 return wos
<< str
.data();
235 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
237 #endif // wxUSE_STD_IOSTREAM
239 // ===========================================================================
240 // wxString class core
241 // ===========================================================================
243 #if wxUSE_UNICODE_UTF8
245 void wxString::PosLenToImpl(size_t pos
, size_t len
,
246 size_t *implPos
, size_t *implLen
) const
252 else // have valid start position
254 const const_iterator b
= GetIterForNthChar(pos
);
255 *implPos
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin();
260 else // have valid length too
262 // we need to handle the case of length specifying a substring
263 // going beyond the end of the string, just as std::string does
264 const const_iterator
e(end());
266 while ( len
&& i
<= e
)
272 *implLen
= i
.impl() - b
.impl();
277 #endif // wxUSE_UNICODE_UTF8
279 // ----------------------------------------------------------------------------
280 // wxCStrData converted strings caching
281 // ----------------------------------------------------------------------------
283 // FIXME-UTF8: temporarily disabled because it doesn't work with global
284 // string objects; re-enable after fixing this bug and benchmarking
285 // performance to see if using a hash is a good idea at all
288 // For backward compatibility reasons, it must be possible to assign the value
289 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
290 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
291 // because the memory would be freed immediately, but it has to be valid as long
292 // as the string is not modified, so that code like this still works:
294 // const wxChar *s = str.c_str();
295 // while ( s ) { ... }
297 // FIXME-UTF8: not thread safe!
298 // FIXME-UTF8: we currently clear the cached conversion only when the string is
299 // destroyed, but we should do it when the string is modified, to
300 // keep memory usage down
301 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
302 // invalidated the cache on every change, we could keep the previous
304 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
305 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
308 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
310 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
311 if ( i
!= hash
.end() )
319 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
320 // so we have to use wxString* here and const-cast when used
321 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
322 wxStringCharConversionCache
);
323 static wxStringCharConversionCache gs_stringsCharCache
;
325 const char* wxCStrData::AsChar() const
327 // remove previously cache value, if any (see FIXMEs above):
328 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
330 // convert the string and keep it:
331 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
332 m_str
->mb_str().release();
336 #endif // wxUSE_UNICODE
338 #if !wxUSE_UNICODE_WCHAR
339 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
340 wxStringWCharConversionCache
);
341 static wxStringWCharConversionCache gs_stringsWCharCache
;
343 const wchar_t* wxCStrData::AsWChar() const
345 // remove previously cache value, if any (see FIXMEs above):
346 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
348 // convert the string and keep it:
349 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
350 m_str
->wc_str().release();
354 #endif // !wxUSE_UNICODE_WCHAR
356 wxString::~wxString()
359 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
360 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
362 #if !wxUSE_UNICODE_WCHAR
363 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
368 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
369 const char* wxCStrData::AsChar() const
371 #if wxUSE_UNICODE_UTF8
372 if ( wxLocaleIsUtf8
)
375 // under non-UTF8 locales, we have to convert the internal UTF-8
376 // representation using wxConvLibc and cache the result
378 wxString
*str
= wxConstCast(m_str
, wxString
);
380 // convert the string:
382 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
383 // have it) but it's unfortunately not obvious to implement
384 // because we don't know how big buffer do we need for the
385 // given string length (in case of multibyte encodings, e.g.
386 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
388 // One idea would be to store more than just m_convertedToChar
389 // in wxString: then we could record the length of the string
390 // which was converted the last time and try to reuse the same
391 // buffer if the current length is not greater than it (this
392 // could still fail because string could have been modified in
393 // place but it would work most of the time, so we'd do it and
394 // only allocate the new buffer if in-place conversion returned
395 // an error). We could also store a bit saying if the string
396 // was modified since the last conversion (and update it in all
397 // operation modifying the string, of course) to avoid unneeded
398 // consequential conversions. But both of these ideas require
399 // adding more fields to wxString and require profiling results
400 // to be sure that we really gain enough from them to justify
402 wxScopedCharBuffer
buf(str
->mb_str());
404 // if it failed, return empty string and not NULL to avoid crashes in code
405 // written with either wxWidgets 2 wxString or std::string behaviour in
406 // mind: neither of them ever returns NULL and so we shouldn't neither
410 if ( str
->m_convertedToChar
&&
411 strlen(buf
) == strlen(str
->m_convertedToChar
) )
413 // keep the same buffer for as long as possible, so that several calls
414 // to c_str() in a row still work:
415 strcpy(str
->m_convertedToChar
, buf
);
419 str
->m_convertedToChar
= buf
.release();
423 return str
->m_convertedToChar
+ m_offset
;
425 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
427 #if !wxUSE_UNICODE_WCHAR
428 const wchar_t* wxCStrData::AsWChar() const
430 wxString
*str
= wxConstCast(m_str
, wxString
);
432 // convert the string:
433 wxScopedWCharBuffer
buf(str
->wc_str());
435 // notice that here, unlike above in AsChar(), conversion can't fail as our
436 // internal UTF-8 is always well-formed -- or the string was corrupted and
437 // all bets are off anyhow
439 // FIXME-UTF8: do the conversion in-place in the existing buffer
440 if ( str
->m_convertedToWChar
&&
441 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
443 // keep the same buffer for as long as possible, so that several calls
444 // to c_str() in a row still work:
445 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
449 str
->m_convertedToWChar
= buf
.release();
453 return str
->m_convertedToWChar
+ m_offset
;
455 #endif // !wxUSE_UNICODE_WCHAR
457 // ===========================================================================
458 // wxString class core
459 // ===========================================================================
461 // ---------------------------------------------------------------------------
462 // construction and conversion
463 // ---------------------------------------------------------------------------
465 #if wxUSE_UNICODE_WCHAR
467 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
468 const wxMBConv
& conv
)
471 if ( !psz
|| nLength
== 0 )
472 return SubstrBufFromMB(wxWCharBuffer(L
""), 0);
474 if ( nLength
== npos
)
478 wxScopedWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
480 return SubstrBufFromMB(wxWCharBuffer(L
""), 0);
482 return SubstrBufFromMB(wcBuf
, wcLen
);
484 #endif // wxUSE_UNICODE_WCHAR
486 #if wxUSE_UNICODE_UTF8
488 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
489 const wxMBConv
& conv
)
492 if ( !psz
|| nLength
== 0 )
493 return SubstrBufFromMB(wxCharBuffer(""), 0);
495 // if psz is already in UTF-8, we don't have to do the roundtrip to
496 // wchar_t* and back:
499 // we need to validate the input because UTF8 iterators assume valid
500 // UTF-8 sequence and psz may be invalid:
501 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
503 // we must pass the real string length to SubstrBufFromMB ctor
504 if ( nLength
== npos
)
505 nLength
= psz
? strlen(psz
) : 0;
506 return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz
, nLength
),
509 // else: do the roundtrip through wchar_t*
512 if ( nLength
== npos
)
515 // first convert to wide string:
517 wxScopedWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
519 return SubstrBufFromMB(wxCharBuffer(""), 0);
521 // and then to UTF-8:
522 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
523 // widechar -> UTF-8 conversion isn't supposed to ever fail:
524 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
528 #endif // wxUSE_UNICODE_UTF8
530 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
532 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
533 const wxMBConv
& conv
)
536 if ( !pwz
|| nLength
== 0 )
537 return SubstrBufFromWC(wxCharBuffer(""), 0);
539 if ( nLength
== npos
)
543 wxScopedCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
545 return SubstrBufFromWC(wxCharBuffer(""), 0);
547 return SubstrBufFromWC(mbBuf
, mbLen
);
549 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
552 #if wxUSE_UNICODE_WCHAR
554 //Convert wxString in Unicode mode to a multi-byte string
555 const wxScopedCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
557 // NB: Length passed to cWC2MB() doesn't include terminating NUL, it's
558 // added by it automatically. If we passed length()+1 here, it would
559 // create a buffer with 2 trailing NULs of length one greater than
561 return conv
.cWC2MB(wx_str(), length(), NULL
);
564 #elif wxUSE_UNICODE_UTF8
566 const wxScopedWCharBuffer
wxString::wc_str() const
568 // NB: Length passed to cMB2WC() doesn't include terminating NUL, it's
569 // added by it automatically. If we passed length()+1 here, it would
570 // create a buffer with 2 trailing NULs of length one greater than
572 return wxMBConvStrictUTF8().cMB2WC
580 const wxScopedCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
583 return wxScopedCharBuffer::CreateNonOwned(m_impl
.c_str(), m_impl
.length());
585 wxScopedWCharBuffer
wcBuf(wc_str());
586 if ( !wcBuf
.length() )
587 return wxCharBuffer("");
589 return conv
.cWC2MB(wcBuf
.data(), wcBuf
.length(), NULL
);
594 //Converts this string to a wide character string if unicode
595 //mode is not enabled and wxUSE_WCHAR_T is enabled
596 const wxScopedWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
598 // NB: Length passed to cMB2WC() doesn't include terminating NUL, it's
599 // added by it automatically. If we passed length()+1 here, it would
600 // create a buffer with 2 trailing NULs of length one greater than
602 return conv
.cMB2WC(wx_str(), length(), NULL
);
605 #endif // Unicode/ANSI
607 // shrink to minimal size (releasing extra memory)
608 bool wxString::Shrink()
610 wxString
tmp(begin(), end());
612 return tmp
.length() == length();
615 // deprecated compatibility code:
616 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
617 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
619 return DoGetWriteBuf(nLen
);
622 void wxString::UngetWriteBuf()
627 void wxString::UngetWriteBuf(size_t nLen
)
629 DoUngetWriteBuf(nLen
);
631 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
634 // ---------------------------------------------------------------------------
636 // ---------------------------------------------------------------------------
638 // all functions are inline in string.h
640 // ---------------------------------------------------------------------------
641 // concatenation operators
642 // ---------------------------------------------------------------------------
645 * concatenation functions come in 5 flavours:
647 * char + string and string + char
648 * C str + string and string + C str
651 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
653 #if !wxUSE_STL_BASED_WXSTRING
654 wxASSERT( str1
.IsValid() );
655 wxASSERT( str2
.IsValid() );
664 wxString
operator+(const wxString
& str
, wxUniChar ch
)
666 #if !wxUSE_STL_BASED_WXSTRING
667 wxASSERT( str
.IsValid() );
676 wxString
operator+(wxUniChar ch
, const wxString
& str
)
678 #if !wxUSE_STL_BASED_WXSTRING
679 wxASSERT( str
.IsValid() );
688 wxString
operator+(const wxString
& str
, const char *psz
)
690 #if !wxUSE_STL_BASED_WXSTRING
691 wxASSERT( str
.IsValid() );
695 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
696 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
704 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
706 #if !wxUSE_STL_BASED_WXSTRING
707 wxASSERT( str
.IsValid() );
711 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
712 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
720 wxString
operator+(const char *psz
, const wxString
& str
)
722 #if !wxUSE_STL_BASED_WXSTRING
723 wxASSERT( str
.IsValid() );
727 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
728 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
736 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
738 #if !wxUSE_STL_BASED_WXSTRING
739 wxASSERT( str
.IsValid() );
743 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
744 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
752 // ---------------------------------------------------------------------------
754 // ---------------------------------------------------------------------------
756 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
758 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
759 : wxToupper(GetChar(0u)) == wxToupper(c
));
762 #ifdef HAVE_STD_STRING_COMPARE
764 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
765 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
766 // sort strings in characters code point order by sorting the byte sequence
767 // in byte values order (i.e. what strcmp() and memcmp() do).
769 int wxString::compare(const wxString
& str
) const
771 return m_impl
.compare(str
.m_impl
);
774 int wxString::compare(size_t nStart
, size_t nLen
,
775 const wxString
& str
) const
778 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
779 return m_impl
.compare(pos
, len
, str
.m_impl
);
782 int wxString::compare(size_t nStart
, size_t nLen
,
784 size_t nStart2
, size_t nLen2
) const
787 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
790 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
792 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
795 int wxString::compare(const char* sz
) const
797 return m_impl
.compare(ImplStr(sz
));
800 int wxString::compare(const wchar_t* sz
) const
802 return m_impl
.compare(ImplStr(sz
));
805 int wxString::compare(size_t nStart
, size_t nLen
,
806 const char* sz
, size_t nCount
) const
809 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
811 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
813 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
816 int wxString::compare(size_t nStart
, size_t nLen
,
817 const wchar_t* sz
, size_t nCount
) const
820 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
822 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
824 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
827 #else // !HAVE_STD_STRING_COMPARE
829 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
830 const wxStringCharType
* s2
, size_t l2
)
833 return wxStringMemcmp(s1
, s2
, l1
);
836 int ret
= wxStringMemcmp(s1
, s2
, l1
);
837 return ret
== 0 ? -1 : ret
;
841 int ret
= wxStringMemcmp(s1
, s2
, l2
);
842 return ret
== 0 ? +1 : ret
;
846 int wxString::compare(const wxString
& str
) const
848 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
849 str
.m_impl
.data(), str
.m_impl
.length());
852 int wxString::compare(size_t nStart
, size_t nLen
,
853 const wxString
& str
) const
855 wxASSERT(nStart
<= length());
856 size_type strLen
= length() - nStart
;
857 nLen
= strLen
< nLen
? strLen
: nLen
;
860 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
862 return ::wxDoCmp(m_impl
.data() + pos
, len
,
863 str
.m_impl
.data(), str
.m_impl
.length());
866 int wxString::compare(size_t nStart
, size_t nLen
,
868 size_t nStart2
, size_t nLen2
) const
870 wxASSERT(nStart
<= length());
871 wxASSERT(nStart2
<= str
.length());
872 size_type strLen
= length() - nStart
,
873 strLen2
= str
.length() - nStart2
;
874 nLen
= strLen
< nLen
? strLen
: nLen
;
875 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
878 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
880 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
882 return ::wxDoCmp(m_impl
.data() + pos
, len
,
883 str
.m_impl
.data() + pos2
, len2
);
886 int wxString::compare(const char* sz
) const
888 SubstrBufFromMB
str(ImplStr(sz
, npos
));
889 if ( str
.len
== npos
)
890 str
.len
= wxStringStrlen(str
.data
);
891 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
894 int wxString::compare(const wchar_t* sz
) const
896 SubstrBufFromWC
str(ImplStr(sz
, npos
));
897 if ( str
.len
== npos
)
898 str
.len
= wxStringStrlen(str
.data
);
899 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
902 int wxString::compare(size_t nStart
, size_t nLen
,
903 const char* sz
, size_t nCount
) const
905 wxASSERT(nStart
<= length());
906 size_type strLen
= length() - nStart
;
907 nLen
= strLen
< nLen
? strLen
: nLen
;
910 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
912 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
913 if ( str
.len
== npos
)
914 str
.len
= wxStringStrlen(str
.data
);
916 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
919 int wxString::compare(size_t nStart
, size_t nLen
,
920 const wchar_t* sz
, size_t nCount
) const
922 wxASSERT(nStart
<= length());
923 size_type strLen
= length() - nStart
;
924 nLen
= strLen
< nLen
? strLen
: nLen
;
927 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
929 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
930 if ( str
.len
== npos
)
931 str
.len
= wxStringStrlen(str
.data
);
933 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
936 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
939 // ---------------------------------------------------------------------------
940 // find_{first,last}_[not]_of functions
941 // ---------------------------------------------------------------------------
943 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
945 // NB: All these functions are implemented with the argument being wxChar*,
946 // i.e. widechar string in any Unicode build, even though native string
947 // representation is char* in the UTF-8 build. This is because we couldn't
948 // use memchr() to determine if a character is in a set encoded as UTF-8.
950 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
952 return find_first_of(sz
, nStart
, wxStrlen(sz
));
955 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
957 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
960 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
962 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
965 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
967 if ( wxTmemchr(sz
, *i
, n
) )
974 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
976 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
979 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
981 if ( !wxTmemchr(sz
, *i
, n
) )
989 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
991 return find_last_of(sz
, nStart
, wxStrlen(sz
));
994 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
996 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
999 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
1001 size_t len
= length();
1003 if ( nStart
== npos
)
1009 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1012 size_t idx
= nStart
;
1013 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1014 i
!= rend(); --idx
, ++i
)
1016 if ( wxTmemchr(sz
, *i
, n
) )
1023 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
1025 size_t len
= length();
1027 if ( nStart
== npos
)
1033 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1036 size_t idx
= nStart
;
1037 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1038 i
!= rend(); --idx
, ++i
)
1040 if ( !wxTmemchr(sz
, *i
, n
) )
1047 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
1049 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
1051 size_t idx
= nStart
;
1052 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1061 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1063 size_t len
= length();
1065 if ( nStart
== npos
)
1071 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1074 size_t idx
= nStart
;
1075 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1076 i
!= rend(); --idx
, ++i
)
1085 // the functions above were implemented for wchar_t* arguments in Unicode
1086 // build and char* in ANSI build; below are implementations for the other
1089 #define wxOtherCharType char
1090 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1092 #define wxOtherCharType wchar_t
1093 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1096 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1097 { return find_first_of(STRCONV(sz
), nStart
); }
1099 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1101 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1102 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1103 { return find_last_of(STRCONV(sz
), nStart
); }
1104 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1106 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1107 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1108 { return find_first_not_of(STRCONV(sz
), nStart
); }
1109 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1111 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1112 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1113 { return find_last_not_of(STRCONV(sz
), nStart
); }
1114 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1116 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1118 #undef wxOtherCharType
1121 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1123 // ===========================================================================
1124 // other common string functions
1125 // ===========================================================================
1127 int wxString::CmpNoCase(const wxString
& s
) const
1129 #if defined(__WXMSW__) && !wxUSE_UNICODE_UTF8
1130 // prefer to use CompareString() if available as it's more efficient than
1131 // doing it manual or even using wxStricmp() (see #10375)
1132 switch ( ::CompareString(LOCALE_USER_DEFAULT
, NORM_IGNORECASE
,
1133 m_impl
.c_str(), m_impl
.length(),
1134 s
.m_impl
.c_str(), s
.m_impl
.length()) )
1136 case CSTR_LESS_THAN
:
1142 case CSTR_GREATER_THAN
:
1146 wxFAIL_MSG( "unexpected CompareString() return value" );
1150 wxLogLastError("CompareString");
1151 // use generic code below
1153 #endif // __WXMSW__ && !wxUSE_UNICODE_UTF8
1155 // do the comparison manually: notice that we can't use wxStricmp() as it
1156 // doesn't handle embedded NULs
1158 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1159 const_iterator i1
= begin();
1160 const_iterator end1
= end();
1161 const_iterator i2
= s
.begin();
1162 const_iterator end2
= s
.end();
1164 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1166 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1167 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1168 if ( lower1
!= lower2
)
1169 return lower1
< lower2
? -1 : 1;
1172 size_t len1
= length();
1173 size_t len2
= s
.length();
1177 else if ( len1
> len2
)
1186 #ifndef __SCHAR_MAX__
1187 #define __SCHAR_MAX__ 127
1191 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1193 if (!ascii
|| len
== 0)
1194 return wxEmptyString
;
1199 wxStringInternalBuffer
buf(res
, len
);
1200 wxStringCharType
*dest
= buf
;
1202 for ( ; len
> 0; --len
)
1204 unsigned char c
= (unsigned char)*ascii
++;
1205 wxASSERT_MSG( c
< 0x80,
1206 _T("Non-ASCII value passed to FromAscii().") );
1208 *dest
++ = (wchar_t)c
;
1215 wxString
wxString::FromAscii(const char *ascii
)
1217 return FromAscii(ascii
, wxStrlen(ascii
));
1220 wxString
wxString::FromAscii(char ascii
)
1222 // What do we do with '\0' ?
1224 unsigned char c
= (unsigned char)ascii
;
1226 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
1228 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1229 return wxString(wxUniChar((wchar_t)c
));
1232 const wxScopedCharBuffer
wxString::ToAscii() const
1234 // this will allocate enough space for the terminating NUL too
1235 wxCharBuffer
buffer(length());
1236 char *dest
= buffer
.data();
1238 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1241 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1242 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1244 // the output string can't have embedded NULs anyhow, so we can safely
1245 // stop at first of them even if we do have any
1253 #endif // wxUSE_UNICODE
1255 // extract string of length nCount starting at nFirst
1256 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1258 size_t nLen
= length();
1260 // default value of nCount is npos and means "till the end"
1261 if ( nCount
== npos
)
1263 nCount
= nLen
- nFirst
;
1266 // out-of-bounds requests return sensible things
1267 if ( nFirst
+ nCount
> nLen
)
1269 nCount
= nLen
- nFirst
;
1272 if ( nFirst
> nLen
)
1274 // AllocCopy() will return empty string
1275 return wxEmptyString
;
1278 wxString
dest(*this, nFirst
, nCount
);
1279 if ( dest
.length() != nCount
)
1281 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1287 // check that the string starts with prefix and return the rest of the string
1288 // in the provided pointer if it is not NULL, otherwise return false
1289 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1291 if ( compare(0, prefix
.length(), prefix
) != 0 )
1296 // put the rest of the string into provided pointer
1297 rest
->assign(*this, prefix
.length(), npos
);
1304 // check that the string ends with suffix and return the rest of it in the
1305 // provided pointer if it is not NULL, otherwise return false
1306 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1308 int start
= length() - suffix
.length();
1310 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1315 // put the rest of the string into provided pointer
1316 rest
->assign(*this, 0, start
);
1323 // extract nCount last (rightmost) characters
1324 wxString
wxString::Right(size_t nCount
) const
1326 if ( nCount
> length() )
1329 wxString
dest(*this, length() - nCount
, nCount
);
1330 if ( dest
.length() != nCount
) {
1331 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1336 // get all characters after the last occurrence of ch
1337 // (returns the whole string if ch not found)
1338 wxString
wxString::AfterLast(wxUniChar ch
) const
1341 int iPos
= Find(ch
, true);
1342 if ( iPos
== wxNOT_FOUND
)
1345 str
.assign(*this, iPos
+ 1, npos
);
1350 // extract nCount first (leftmost) characters
1351 wxString
wxString::Left(size_t nCount
) const
1353 if ( nCount
> length() )
1356 wxString
dest(*this, 0, nCount
);
1357 if ( dest
.length() != nCount
) {
1358 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1363 // get all characters before the first occurrence of ch
1364 // (returns the whole string if ch not found)
1365 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1367 int iPos
= Find(ch
);
1368 if ( iPos
== wxNOT_FOUND
)
1370 return wxString(*this, 0, iPos
);
1373 /// get all characters before the last occurrence of ch
1374 /// (returns empty string if ch not found)
1375 wxString
wxString::BeforeLast(wxUniChar ch
) const
1378 int iPos
= Find(ch
, true);
1379 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1380 str
= wxString(c_str(), iPos
);
1385 /// get all characters after the first occurrence of ch
1386 /// (returns empty string if ch not found)
1387 wxString
wxString::AfterFirst(wxUniChar ch
) const
1390 int iPos
= Find(ch
);
1391 if ( iPos
!= wxNOT_FOUND
)
1392 str
.assign(*this, iPos
+ 1, npos
);
1397 // replace first (or all) occurrences of some substring with another one
1398 size_t wxString::Replace(const wxString
& strOld
,
1399 const wxString
& strNew
, bool bReplaceAll
)
1401 // if we tried to replace an empty string we'd enter an infinite loop below
1402 wxCHECK_MSG( !strOld
.empty(), 0,
1403 _T("wxString::Replace(): invalid parameter") );
1405 wxSTRING_INVALIDATE_CACHE();
1407 size_t uiCount
= 0; // count of replacements made
1409 // optimize the special common case: replacement of one character by
1410 // another one (in UTF-8 case we can only do this for ASCII characters)
1412 // benchmarks show that this special version is around 3 times faster
1413 // (depending on the proportion of matching characters and UTF-8/wchar_t
1415 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1417 const wxStringCharType chOld
= strOld
.m_impl
[0],
1418 chNew
= strNew
.m_impl
[0];
1420 // this loop is the simplified version of the one below
1421 for ( size_t pos
= 0; ; )
1423 pos
= m_impl
.find(chOld
, pos
);
1427 m_impl
[pos
++] = chNew
;
1435 else if ( !bReplaceAll
)
1437 size_t pos
= m_impl
.find(strOld
, 0);
1440 m_impl
.replace(pos
, strOld
.m_impl
.length(), strNew
.m_impl
);
1444 else // replace all occurrences
1446 const size_t uiOldLen
= strOld
.m_impl
.length();
1447 const size_t uiNewLen
= strNew
.m_impl
.length();
1449 // first scan the string to find all positions at which the replacement
1451 wxVector
<size_t> replacePositions
;
1454 for ( pos
= m_impl
.find(strOld
.m_impl
, 0);
1456 pos
= m_impl
.find(strOld
.m_impl
, pos
+ uiOldLen
))
1458 replacePositions
.push_back(pos
);
1465 // allocate enough memory for the whole new string
1467 tmp
.m_impl
.reserve(m_impl
.length() + uiCount
*(uiNewLen
- uiOldLen
));
1469 // copy this string to tmp doing replacements on the fly
1471 for ( pos
= 0; replNum
< uiCount
; replNum
++ )
1473 const size_t nextReplPos
= replacePositions
[replNum
];
1475 if ( pos
!= nextReplPos
)
1477 tmp
.m_impl
.append(m_impl
, pos
, nextReplPos
- pos
);
1480 tmp
.m_impl
.append(strNew
.m_impl
);
1481 pos
= nextReplPos
+ uiOldLen
;
1484 if ( pos
!= m_impl
.length() )
1486 // append the rest of the string unchanged
1487 tmp
.m_impl
.append(m_impl
, pos
, m_impl
.length() - pos
);
1496 bool wxString::IsAscii() const
1498 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1500 if ( !(*i
).IsAscii() )
1507 bool wxString::IsWord() const
1509 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1511 if ( !wxIsalpha(*i
) )
1518 bool wxString::IsNumber() const
1523 const_iterator i
= begin();
1525 if ( *i
== _T('-') || *i
== _T('+') )
1528 for ( ; i
!= end(); ++i
)
1530 if ( !wxIsdigit(*i
) )
1537 wxString
wxString::Strip(stripType w
) const
1540 if ( w
& leading
) s
.Trim(false);
1541 if ( w
& trailing
) s
.Trim(true);
1545 // ---------------------------------------------------------------------------
1547 // ---------------------------------------------------------------------------
1549 wxString
& wxString::MakeUpper()
1551 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1552 *it
= (wxChar
)wxToupper(*it
);
1557 wxString
& wxString::MakeLower()
1559 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1560 *it
= (wxChar
)wxTolower(*it
);
1565 wxString
& wxString::MakeCapitalized()
1567 const iterator en
= end();
1568 iterator it
= begin();
1571 *it
= (wxChar
)wxToupper(*it
);
1572 for ( ++it
; it
!= en
; ++it
)
1573 *it
= (wxChar
)wxTolower(*it
);
1579 // ---------------------------------------------------------------------------
1580 // trimming and padding
1581 // ---------------------------------------------------------------------------
1583 // some compilers (VC++ 6.0 not to name them) return true for a call to
1584 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1585 // to live with this by checking that the character is a 7 bit one - even if
1586 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1587 // space-like symbols somewhere except in the first 128 chars), it is arguably
1588 // still better than trimming away accented letters
1589 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1591 // trims spaces (in the sense of isspace) from left or right side
1592 wxString
& wxString::Trim(bool bFromRight
)
1594 // first check if we're going to modify the string at all
1597 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1598 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1604 // find last non-space character
1605 reverse_iterator psz
= rbegin();
1606 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1609 // truncate at trailing space start
1610 erase(psz
.base(), end());
1614 // find first non-space character
1615 iterator psz
= begin();
1616 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1619 // fix up data and length
1620 erase(begin(), psz
);
1627 // adds nCount characters chPad to the string from either side
1628 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1630 wxString
s(chPad
, nCount
);
1643 // truncate the string
1644 wxString
& wxString::Truncate(size_t uiLen
)
1646 if ( uiLen
< length() )
1648 erase(begin() + uiLen
, end());
1650 //else: nothing to do, string is already short enough
1655 // ---------------------------------------------------------------------------
1656 // finding (return wxNOT_FOUND if not found and index otherwise)
1657 // ---------------------------------------------------------------------------
1660 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1662 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1664 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1667 // ----------------------------------------------------------------------------
1668 // conversion to numbers
1669 // ----------------------------------------------------------------------------
1671 // The implementation of all the functions below is exactly the same so factor
1672 // it out. Note that number extraction works correctly on UTF-8 strings, so
1673 // we can use wxStringCharType and wx_str() for maximum efficiency.
1676 #define DO_IF_NOT_WINCE(x) x
1678 #define DO_IF_NOT_WINCE(x)
1681 #define WX_STRING_TO_X_TYPE_START \
1682 wxCHECK_MSG( pVal, false, _T("NULL output pointer") ); \
1683 DO_IF_NOT_WINCE( errno = 0; ) \
1684 const wxStringCharType *start = wx_str(); \
1685 wxStringCharType *end;
1687 #define WX_STRING_TO_X_TYPE_END \
1688 /* return true only if scan was stopped by the terminating NUL and */ \
1689 /* if the string was not empty to start with and no under/overflow */ \
1691 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1696 bool wxString::ToLong(long *pVal
, int base
) const
1698 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1700 WX_STRING_TO_X_TYPE_START
1701 long val
= wxStrtol(start
, &end
, base
);
1702 WX_STRING_TO_X_TYPE_END
1705 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1707 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1709 WX_STRING_TO_X_TYPE_START
1710 unsigned long val
= wxStrtoul(start
, &end
, base
);
1711 WX_STRING_TO_X_TYPE_END
1714 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1716 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1718 WX_STRING_TO_X_TYPE_START
1719 wxLongLong_t val
= wxStrtoll(start
, &end
, base
);
1720 WX_STRING_TO_X_TYPE_END
1723 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1725 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1727 WX_STRING_TO_X_TYPE_START
1728 wxULongLong_t val
= wxStrtoull(start
, &end
, base
);
1729 WX_STRING_TO_X_TYPE_END
1732 bool wxString::ToDouble(double *pVal
) const
1734 WX_STRING_TO_X_TYPE_START
1735 double val
= wxStrtod(start
, &end
);
1736 WX_STRING_TO_X_TYPE_END
1741 bool wxString::ToCLong(long *pVal
, int base
) const
1743 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1745 WX_STRING_TO_X_TYPE_START
1746 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1747 long val
= wxStrtol_lA(start
, &end
, base
, wxCLocale
);
1749 long val
= wxStrtol_l(start
, &end
, base
, wxCLocale
);
1751 WX_STRING_TO_X_TYPE_END
1754 bool wxString::ToCULong(unsigned long *pVal
, int base
) const
1756 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1758 WX_STRING_TO_X_TYPE_START
1759 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1760 unsigned long val
= wxStrtoul_lA(start
, &end
, base
, wxCLocale
);
1762 unsigned long val
= wxStrtoul_l(start
, &end
, base
, wxCLocale
);
1764 WX_STRING_TO_X_TYPE_END
1767 bool wxString::ToCDouble(double *pVal
) const
1769 WX_STRING_TO_X_TYPE_START
1770 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1771 double val
= wxStrtod_lA(start
, &end
, wxCLocale
);
1773 double val
= wxStrtod_l(start
, &end
, wxCLocale
);
1775 WX_STRING_TO_X_TYPE_END
1778 #endif // wxUSE_XLOCALE
1780 // ---------------------------------------------------------------------------
1782 // ---------------------------------------------------------------------------
1784 #if !wxUSE_UTF8_LOCALE_ONLY
1786 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1787 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1789 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1793 va_start(argptr
, format
);
1796 s
.PrintfV(format
, argptr
);
1802 #endif // !wxUSE_UTF8_LOCALE_ONLY
1804 #if wxUSE_UNICODE_UTF8
1806 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1809 va_start(argptr
, format
);
1812 s
.PrintfV(format
, argptr
);
1818 #endif // wxUSE_UNICODE_UTF8
1821 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1824 s
.PrintfV(format
, argptr
);
1828 #if !wxUSE_UTF8_LOCALE_ONLY
1829 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1830 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1832 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1836 va_start(argptr
, format
);
1838 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1839 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1840 // because it's the only cast that works safely for downcasting when
1841 // multiple inheritance is used:
1842 wxString
*str
= static_cast<wxString
*>(this);
1844 wxString
*str
= this;
1847 int iLen
= str
->PrintfV(format
, argptr
);
1853 #endif // !wxUSE_UTF8_LOCALE_ONLY
1855 #if wxUSE_UNICODE_UTF8
1856 int wxString::DoPrintfUtf8(const char *format
, ...)
1859 va_start(argptr
, format
);
1861 int iLen
= PrintfV(format
, argptr
);
1867 #endif // wxUSE_UNICODE_UTF8
1870 Uses wxVsnprintf and places the result into the this string.
1872 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1873 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1874 the ISO C99 (and thus SUSv3) standard the return value for the case of
1875 an undersized buffer is inconsistent. For conforming vsnprintf
1876 implementations the function must return the number of characters that
1877 would have been printed had the buffer been large enough. For conforming
1878 vswprintf implementations the function must return a negative number
1881 What vswprintf sets errno to is undefined but Darwin seems to set it to
1882 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1883 those are defined in the standard and backed up by several conformance
1884 statements. Note that ENOMEM mentioned in the manual page does not
1885 apply to swprintf, only wprintf and fwprintf.
1887 Official manual page:
1888 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1890 Some conformance statements (AIX, Solaris):
1891 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1892 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1894 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1895 EILSEQ and EINVAL are specifically defined to mean the error is other than
1896 an undersized buffer and no other errno are defined we treat those two
1897 as meaning hard errors and everything else gets the old behavior which
1898 is to keep looping and increasing buffer size until the function succeeds.
1900 In practice it's impossible to determine before compilation which behavior
1901 may be used. The vswprintf function may have vsnprintf-like behavior or
1902 vice-versa. Behavior detected on one release can theoretically change
1903 with an updated release. Not to mention that configure testing for it
1904 would require the test to be run on the host system, not the build system
1905 which makes cross compilation difficult. Therefore, we make no assumptions
1906 about behavior and try our best to handle every known case, including the
1907 case where wxVsnprintf returns a negative number and fails to set errno.
1909 There is yet one more non-standard implementation and that is our own.
1910 Fortunately, that can be detected at compile-time.
1912 On top of all that, ISO C99 explicitly defines snprintf to write a null
1913 character to the last position of the specified buffer. That would be at
1914 at the given buffer size minus 1. It is supposed to do this even if it
1915 turns out that the buffer is sized too small.
1917 Darwin (tested on 10.5) follows the C99 behavior exactly.
1919 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1920 errno even when it fails. However, it only seems to ever fail due
1921 to an undersized buffer.
1923 #if wxUSE_UNICODE_UTF8
1924 template<typename BufferType
>
1926 // we only need one version in non-UTF8 builds and at least two Windows
1927 // compilers have problems with this function template, so use just one
1928 // normal function here
1930 static int DoStringPrintfV(wxString
& str
,
1931 const wxString
& format
, va_list argptr
)
1937 #if wxUSE_UNICODE_UTF8
1938 BufferType
tmp(str
, size
+ 1);
1939 typename
BufferType::CharType
*buf
= tmp
;
1941 wxStringBuffer
tmp(str
, size
+ 1);
1949 // in UTF-8 build, leaving uninitialized junk in the buffer
1950 // could result in invalid non-empty UTF-8 string, so just
1951 // reset the string to empty on failure:
1956 // wxVsnprintf() may modify the original arg pointer, so pass it
1959 wxVaCopy(argptrcopy
, argptr
);
1962 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1965 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1968 // some implementations of vsnprintf() don't NUL terminate
1969 // the string if there is not enough space for it so
1970 // always do it manually
1971 // FIXME: This really seems to be the wrong and would be an off-by-one
1972 // bug except the code above allocates an extra character.
1973 buf
[size
] = _T('\0');
1975 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1976 // total number of characters which would have been written if the
1977 // buffer were large enough (newer standards such as Unix98)
1980 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1981 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1982 // is true if *both* of them use our own implementation,
1983 // otherwise we can't be sure
1984 #if wxUSE_WXVSNPRINTF
1985 // we know that our own implementation of wxVsnprintf() returns -1
1986 // only for a format error - thus there's something wrong with
1987 // the user's format string
1990 #else // possibly using system version
1991 // assume it only returns error if there is not enough space, but
1992 // as we don't know how much we need, double the current size of
1995 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1996 // If errno was set to one of the two well-known hard errors
1997 // then fail immediately to avoid an infinite loop.
2000 #endif // __WXWINCE__
2001 // still not enough, as we don't know how much we need, double the
2002 // current size of the buffer
2004 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
2006 else if ( len
>= size
)
2008 #if wxUSE_WXVSNPRINTF
2009 // we know that our own implementation of wxVsnprintf() returns
2010 // size+1 when there's not enough space but that's not the size
2011 // of the required buffer!
2012 size
*= 2; // so we just double the current size of the buffer
2014 // some vsnprintf() implementations NUL-terminate the buffer and
2015 // some don't in len == size case, to be safe always add 1
2016 // FIXME: I don't quite understand this comment. The vsnprintf
2017 // function is specifically defined to return the number of
2018 // characters printed not including the null terminator.
2019 // So OF COURSE you need to add 1 to get the right buffer size.
2020 // The following line is definitely correct, no question.
2024 else // ok, there was enough space
2030 // we could have overshot
2033 return str
.length();
2036 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
2038 #if wxUSE_UNICODE_UTF8
2039 #if wxUSE_STL_BASED_WXSTRING
2040 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
2042 typedef wxStringInternalBuffer Utf8Buffer
;
2046 #if wxUSE_UTF8_LOCALE_ONLY
2047 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2049 #if wxUSE_UNICODE_UTF8
2050 if ( wxLocaleIsUtf8
)
2051 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2054 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
2056 return DoStringPrintfV(*this, format
, argptr
);
2057 #endif // UTF8/WCHAR
2061 // ----------------------------------------------------------------------------
2062 // misc other operations
2063 // ----------------------------------------------------------------------------
2065 // returns true if the string matches the pattern which may contain '*' and
2066 // '?' metacharacters (as usual, '?' matches any character and '*' any number
2068 bool wxString::Matches(const wxString
& mask
) const
2070 // I disable this code as it doesn't seem to be faster (in fact, it seems
2071 // to be much slower) than the old, hand-written code below and using it
2072 // here requires always linking with libregex even if the user code doesn't
2074 #if 0 // wxUSE_REGEX
2075 // first translate the shell-like mask into a regex
2077 pattern
.reserve(wxStrlen(pszMask
));
2089 pattern
+= _T(".*");
2100 // these characters are special in a RE, quote them
2101 // (however note that we don't quote '[' and ']' to allow
2102 // using them for Unix shell like matching)
2103 pattern
+= _T('\\');
2107 pattern
+= *pszMask
;
2115 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
2116 #else // !wxUSE_REGEX
2117 // TODO: this is, of course, awfully inefficient...
2119 // FIXME-UTF8: implement using iterators, remove #if
2120 #if wxUSE_UNICODE_UTF8
2121 const wxScopedWCharBuffer maskBuf
= mask
.wc_str();
2122 const wxScopedWCharBuffer txtBuf
= wc_str();
2123 const wxChar
*pszMask
= maskBuf
.data();
2124 const wxChar
*pszTxt
= txtBuf
.data();
2126 const wxChar
*pszMask
= mask
.wx_str();
2127 // the char currently being checked
2128 const wxChar
*pszTxt
= wx_str();
2131 // the last location where '*' matched
2132 const wxChar
*pszLastStarInText
= NULL
;
2133 const wxChar
*pszLastStarInMask
= NULL
;
2136 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
2137 switch ( *pszMask
) {
2139 if ( *pszTxt
== wxT('\0') )
2142 // pszTxt and pszMask will be incremented in the loop statement
2148 // remember where we started to be able to backtrack later
2149 pszLastStarInText
= pszTxt
;
2150 pszLastStarInMask
= pszMask
;
2152 // ignore special chars immediately following this one
2153 // (should this be an error?)
2154 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2157 // if there is nothing more, match
2158 if ( *pszMask
== wxT('\0') )
2161 // are there any other metacharacters in the mask?
2163 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2165 if ( pEndMask
!= NULL
) {
2166 // we have to match the string between two metachars
2167 uiLenMask
= pEndMask
- pszMask
;
2170 // we have to match the remainder of the string
2171 uiLenMask
= wxStrlen(pszMask
);
2174 wxString
strToMatch(pszMask
, uiLenMask
);
2175 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2176 if ( pMatch
== NULL
)
2179 // -1 to compensate "++" in the loop
2180 pszTxt
= pMatch
+ uiLenMask
- 1;
2181 pszMask
+= uiLenMask
- 1;
2186 if ( *pszMask
!= *pszTxt
)
2192 // match only if nothing left
2193 if ( *pszTxt
== wxT('\0') )
2196 // if we failed to match, backtrack if we can
2197 if ( pszLastStarInText
) {
2198 pszTxt
= pszLastStarInText
+ 1;
2199 pszMask
= pszLastStarInMask
;
2201 pszLastStarInText
= NULL
;
2203 // don't bother resetting pszLastStarInMask, it's unnecessary
2209 #endif // wxUSE_REGEX/!wxUSE_REGEX
2212 // Count the number of chars
2213 int wxString::Freq(wxUniChar ch
) const
2216 for ( const_iterator i
= begin(); i
!= end(); ++i
)