1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
38 #include "wx/hashmap.h"
39 #include "wx/vector.h"
40 #include "wx/xlocale.h"
42 // string handling functions used by wxString:
43 #if wxUSE_UNICODE_UTF8
44 #define wxStringMemcpy memcpy
45 #define wxStringMemcmp memcmp
46 #define wxStringMemchr memchr
47 #define wxStringStrlen strlen
49 #define wxStringMemcpy wxTmemcpy
50 #define wxStringMemcmp wxTmemcmp
51 #define wxStringMemchr wxTmemchr
52 #define wxStringStrlen wxStrlen
55 // ----------------------------------------------------------------------------
57 // ----------------------------------------------------------------------------
62 static UntypedBufferData
s_untypedNullData(NULL
, 0);
64 UntypedBufferData
* const untypedNullDataPtr
= &s_untypedNullData
;
66 } // namespace wxPrivate
68 // ---------------------------------------------------------------------------
69 // static class variables definition
70 // ---------------------------------------------------------------------------
72 //According to STL _must_ be a -1 size_t
73 const size_t wxString::npos
= (size_t) -1;
75 #if wxUSE_STRING_POS_CACHE
77 #ifdef wxHAS_COMPILER_TLS
79 wxTLS_TYPE(wxString::Cache
) wxString::ms_cache
;
81 #else // !wxHAS_COMPILER_TLS
83 struct wxStrCacheInitializer
85 wxStrCacheInitializer()
87 // calling this function triggers s_cache initialization in it, and
88 // from now on it becomes safe to call from multiple threads
94 wxString::Cache& wxString::GetCache()
96 static wxTLS_TYPE(Cache) s_cache;
98 return wxTLS_VALUE(s_cache);
102 static wxStrCacheInitializer gs_stringCacheInit
;
104 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
106 // gdb seems to be unable to display thread-local variables correctly, at least
107 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
108 #if wxDEBUG_LEVEL >= 2
110 struct wxStrCacheDumper
112 static void ShowAll()
114 puts("*** wxString cache dump:");
115 for ( unsigned n
= 0; n
< wxString::Cache::SIZE
; n
++ )
117 const wxString::Cache::Element
&
118 c
= wxString::GetCacheBegin()[n
];
120 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
122 n
== wxString::LastUsedCacheElement() ? " [*]" : "",
124 (unsigned long)c
.pos
,
125 (unsigned long)c
.impl
,
131 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
133 #endif // wxDEBUG_LEVEL >= 2
135 #ifdef wxPROFILE_STRING_CACHE
137 wxString::CacheStats
wxString::ms_cacheStats
;
139 struct wxStrCacheStatsDumper
141 ~wxStrCacheStatsDumper()
143 const wxString::CacheStats
& stats
= wxString::ms_cacheStats
;
147 puts("*** wxString cache statistics:");
148 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
150 printf("\tHits %u (of which %u not used) or %.2f%%\n",
153 100.*float(stats
.poshits
- stats
.mishits
)/stats
.postot
);
154 printf("\tAverage position requested: %.2f\n",
155 float(stats
.sumpos
) / stats
.postot
);
156 printf("\tAverage offset after cached hint: %.2f\n",
157 float(stats
.sumofs
) / stats
.postot
);
162 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
163 stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
);
168 static wxStrCacheStatsDumper s_showCacheStats
;
170 #endif // wxPROFILE_STRING_CACHE
172 #endif // wxUSE_STRING_POS_CACHE
174 // ----------------------------------------------------------------------------
176 // ----------------------------------------------------------------------------
178 #if wxUSE_STD_IOSTREAM
182 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
184 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
185 const wxScopedCharBuffer
buf(str
.AsCharBuf());
187 os
.clear(wxSTD
ios_base::failbit
);
193 return os
<< str
.AsInternal();
197 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
199 return os
<< str
.c_str();
202 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedCharBuffer
& str
)
204 return os
<< str
.data();
208 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedWCharBuffer
& str
)
210 return os
<< str
.data();
214 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
216 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
218 return wos
<< str
.wc_str();
221 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
223 return wos
<< str
.AsWChar();
226 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxScopedWCharBuffer
& str
)
228 return wos
<< str
.data();
231 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
233 #endif // wxUSE_STD_IOSTREAM
235 // ===========================================================================
236 // wxString class core
237 // ===========================================================================
239 #if wxUSE_UNICODE_UTF8
241 void wxString::PosLenToImpl(size_t pos
, size_t len
,
242 size_t *implPos
, size_t *implLen
) const
248 else // have valid start position
250 const const_iterator b
= GetIterForNthChar(pos
);
251 *implPos
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin();
256 else // have valid length too
258 // we need to handle the case of length specifying a substring
259 // going beyond the end of the string, just as std::string does
260 const const_iterator
e(end());
262 while ( len
&& i
<= e
)
268 *implLen
= i
.impl() - b
.impl();
273 #endif // wxUSE_UNICODE_UTF8
275 // ----------------------------------------------------------------------------
276 // wxCStrData converted strings caching
277 // ----------------------------------------------------------------------------
279 // FIXME-UTF8: temporarily disabled because it doesn't work with global
280 // string objects; re-enable after fixing this bug and benchmarking
281 // performance to see if using a hash is a good idea at all
284 // For backward compatibility reasons, it must be possible to assign the value
285 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
286 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
287 // because the memory would be freed immediately, but it has to be valid as long
288 // as the string is not modified, so that code like this still works:
290 // const wxChar *s = str.c_str();
291 // while ( s ) { ... }
293 // FIXME-UTF8: not thread safe!
294 // FIXME-UTF8: we currently clear the cached conversion only when the string is
295 // destroyed, but we should do it when the string is modified, to
296 // keep memory usage down
297 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
298 // invalidated the cache on every change, we could keep the previous
300 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
301 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
304 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
306 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
307 if ( i
!= hash
.end() )
315 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
316 // so we have to use wxString* here and const-cast when used
317 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
318 wxStringCharConversionCache
);
319 static wxStringCharConversionCache gs_stringsCharCache
;
321 const char* wxCStrData::AsChar() const
323 // remove previously cache value, if any (see FIXMEs above):
324 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
326 // convert the string and keep it:
327 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
328 m_str
->mb_str().release();
332 #endif // wxUSE_UNICODE
334 #if !wxUSE_UNICODE_WCHAR
335 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
336 wxStringWCharConversionCache
);
337 static wxStringWCharConversionCache gs_stringsWCharCache
;
339 const wchar_t* wxCStrData::AsWChar() const
341 // remove previously cache value, if any (see FIXMEs above):
342 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
344 // convert the string and keep it:
345 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
346 m_str
->wc_str().release();
350 #endif // !wxUSE_UNICODE_WCHAR
352 wxString::~wxString()
355 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
356 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
358 #if !wxUSE_UNICODE_WCHAR
359 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
364 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
365 const char* wxCStrData::AsChar() const
367 #if wxUSE_UNICODE_UTF8
368 if ( wxLocaleIsUtf8
)
371 // under non-UTF8 locales, we have to convert the internal UTF-8
372 // representation using wxConvLibc and cache the result
374 wxString
*str
= wxConstCast(m_str
, wxString
);
376 // convert the string:
378 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
379 // have it) but it's unfortunately not obvious to implement
380 // because we don't know how big buffer do we need for the
381 // given string length (in case of multibyte encodings, e.g.
382 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
384 // One idea would be to store more than just m_convertedToChar
385 // in wxString: then we could record the length of the string
386 // which was converted the last time and try to reuse the same
387 // buffer if the current length is not greater than it (this
388 // could still fail because string could have been modified in
389 // place but it would work most of the time, so we'd do it and
390 // only allocate the new buffer if in-place conversion returned
391 // an error). We could also store a bit saying if the string
392 // was modified since the last conversion (and update it in all
393 // operation modifying the string, of course) to avoid unneeded
394 // consequential conversions. But both of these ideas require
395 // adding more fields to wxString and require profiling results
396 // to be sure that we really gain enough from them to justify
398 wxScopedCharBuffer
buf(str
->mb_str());
400 // if it failed, return empty string and not NULL to avoid crashes in code
401 // written with either wxWidgets 2 wxString or std::string behaviour in
402 // mind: neither of them ever returns NULL and so we shouldn't neither
406 if ( str
->m_convertedToChar
&&
407 strlen(buf
) == strlen(str
->m_convertedToChar
) )
409 // keep the same buffer for as long as possible, so that several calls
410 // to c_str() in a row still work:
411 strcpy(str
->m_convertedToChar
, buf
);
415 str
->m_convertedToChar
= buf
.release();
419 return str
->m_convertedToChar
+ m_offset
;
421 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
423 #if !wxUSE_UNICODE_WCHAR
424 const wchar_t* wxCStrData::AsWChar() const
426 wxString
*str
= wxConstCast(m_str
, wxString
);
428 // convert the string:
429 wxScopedWCharBuffer
buf(str
->wc_str());
431 // notice that here, unlike above in AsChar(), conversion can't fail as our
432 // internal UTF-8 is always well-formed -- or the string was corrupted and
433 // all bets are off anyhow
435 // FIXME-UTF8: do the conversion in-place in the existing buffer
436 if ( str
->m_convertedToWChar
&&
437 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
439 // keep the same buffer for as long as possible, so that several calls
440 // to c_str() in a row still work:
441 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
445 str
->m_convertedToWChar
= buf
.release();
449 return str
->m_convertedToWChar
+ m_offset
;
451 #endif // !wxUSE_UNICODE_WCHAR
453 // ===========================================================================
454 // wxString class core
455 // ===========================================================================
457 // ---------------------------------------------------------------------------
458 // construction and conversion
459 // ---------------------------------------------------------------------------
461 #if wxUSE_UNICODE_WCHAR
463 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
464 const wxMBConv
& conv
)
467 if ( !psz
|| nLength
== 0 )
468 return SubstrBufFromMB(wxWCharBuffer(L
""), 0);
470 if ( nLength
== npos
)
474 wxScopedWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
476 return SubstrBufFromMB(wxWCharBuffer(L
""), 0);
478 return SubstrBufFromMB(wcBuf
, wcLen
);
480 #endif // wxUSE_UNICODE_WCHAR
482 #if wxUSE_UNICODE_UTF8
484 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
485 const wxMBConv
& conv
)
488 if ( !psz
|| nLength
== 0 )
489 return SubstrBufFromMB(wxCharBuffer(""), 0);
491 // if psz is already in UTF-8, we don't have to do the roundtrip to
492 // wchar_t* and back:
495 // we need to validate the input because UTF8 iterators assume valid
496 // UTF-8 sequence and psz may be invalid:
497 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
499 // we must pass the real string length to SubstrBufFromMB ctor
500 if ( nLength
== npos
)
501 nLength
= psz
? strlen(psz
) : 0;
502 return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz
, nLength
),
505 // else: do the roundtrip through wchar_t*
508 if ( nLength
== npos
)
511 // first convert to wide string:
513 wxScopedWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
515 return SubstrBufFromMB(wxCharBuffer(""), 0);
517 // and then to UTF-8:
518 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
519 // widechar -> UTF-8 conversion isn't supposed to ever fail:
520 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
524 #endif // wxUSE_UNICODE_UTF8
526 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
528 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
529 const wxMBConv
& conv
)
532 if ( !pwz
|| nLength
== 0 )
533 return SubstrBufFromWC(wxCharBuffer(""), 0);
535 if ( nLength
== npos
)
539 wxScopedCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
541 return SubstrBufFromWC(wxCharBuffer(""), 0);
543 return SubstrBufFromWC(mbBuf
, mbLen
);
545 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
548 #if wxUSE_UNICODE_WCHAR
550 //Convert wxString in Unicode mode to a multi-byte string
551 const wxScopedCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
553 // NB: Length passed to cWC2MB() doesn't include terminating NUL, it's
554 // added by it automatically. If we passed length()+1 here, it would
555 // create a buffer with 2 trailing NULs of length one greater than
557 return conv
.cWC2MB(wx_str(), length(), NULL
);
560 #elif wxUSE_UNICODE_UTF8
562 const wxScopedWCharBuffer
wxString::wc_str() const
564 // NB: Length passed to cMB2WC() doesn't include terminating NUL, it's
565 // added by it automatically. If we passed length()+1 here, it would
566 // create a buffer with 2 trailing NULs of length one greater than
568 return wxMBConvStrictUTF8().cMB2WC
576 const wxScopedCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
579 return wxScopedCharBuffer::CreateNonOwned(m_impl
.c_str(), m_impl
.length());
581 wxScopedWCharBuffer
wcBuf(wc_str());
582 if ( !wcBuf
.length() )
583 return wxCharBuffer("");
585 return conv
.cWC2MB(wcBuf
.data(), wcBuf
.length(), NULL
);
590 //Converts this string to a wide character string if unicode
591 //mode is not enabled and wxUSE_WCHAR_T is enabled
592 const wxScopedWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
594 // NB: Length passed to cMB2WC() doesn't include terminating NUL, it's
595 // added by it automatically. If we passed length()+1 here, it would
596 // create a buffer with 2 trailing NULs of length one greater than
598 return conv
.cMB2WC(wx_str(), length(), NULL
);
601 #endif // Unicode/ANSI
603 // shrink to minimal size (releasing extra memory)
604 bool wxString::Shrink()
606 wxString
tmp(begin(), end());
608 return tmp
.length() == length();
611 // deprecated compatibility code:
612 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
613 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
615 return DoGetWriteBuf(nLen
);
618 void wxString::UngetWriteBuf()
623 void wxString::UngetWriteBuf(size_t nLen
)
625 DoUngetWriteBuf(nLen
);
627 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
630 // ---------------------------------------------------------------------------
632 // ---------------------------------------------------------------------------
634 // all functions are inline in string.h
636 // ---------------------------------------------------------------------------
637 // concatenation operators
638 // ---------------------------------------------------------------------------
641 * concatenation functions come in 5 flavours:
643 * char + string and string + char
644 * C str + string and string + C str
647 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
649 #if !wxUSE_STL_BASED_WXSTRING
650 wxASSERT( str1
.IsValid() );
651 wxASSERT( str2
.IsValid() );
660 wxString
operator+(const wxString
& str
, wxUniChar ch
)
662 #if !wxUSE_STL_BASED_WXSTRING
663 wxASSERT( str
.IsValid() );
672 wxString
operator+(wxUniChar ch
, const wxString
& str
)
674 #if !wxUSE_STL_BASED_WXSTRING
675 wxASSERT( str
.IsValid() );
684 wxString
operator+(const wxString
& str
, const char *psz
)
686 #if !wxUSE_STL_BASED_WXSTRING
687 wxASSERT( str
.IsValid() );
691 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
692 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
700 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
702 #if !wxUSE_STL_BASED_WXSTRING
703 wxASSERT( str
.IsValid() );
707 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
708 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
716 wxString
operator+(const char *psz
, const wxString
& str
)
718 #if !wxUSE_STL_BASED_WXSTRING
719 wxASSERT( str
.IsValid() );
723 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
724 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
732 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
734 #if !wxUSE_STL_BASED_WXSTRING
735 wxASSERT( str
.IsValid() );
739 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
740 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
748 // ---------------------------------------------------------------------------
750 // ---------------------------------------------------------------------------
752 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
754 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
755 : wxToupper(GetChar(0u)) == wxToupper(c
));
758 #ifdef HAVE_STD_STRING_COMPARE
760 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
761 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
762 // sort strings in characters code point order by sorting the byte sequence
763 // in byte values order (i.e. what strcmp() and memcmp() do).
765 int wxString::compare(const wxString
& str
) const
767 return m_impl
.compare(str
.m_impl
);
770 int wxString::compare(size_t nStart
, size_t nLen
,
771 const wxString
& str
) const
774 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
775 return m_impl
.compare(pos
, len
, str
.m_impl
);
778 int wxString::compare(size_t nStart
, size_t nLen
,
780 size_t nStart2
, size_t nLen2
) const
783 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
786 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
788 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
791 int wxString::compare(const char* sz
) const
793 return m_impl
.compare(ImplStr(sz
));
796 int wxString::compare(const wchar_t* sz
) const
798 return m_impl
.compare(ImplStr(sz
));
801 int wxString::compare(size_t nStart
, size_t nLen
,
802 const char* sz
, size_t nCount
) const
805 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
807 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
809 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
812 int wxString::compare(size_t nStart
, size_t nLen
,
813 const wchar_t* sz
, size_t nCount
) const
816 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
818 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
820 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
823 #else // !HAVE_STD_STRING_COMPARE
825 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
826 const wxStringCharType
* s2
, size_t l2
)
829 return wxStringMemcmp(s1
, s2
, l1
);
832 int ret
= wxStringMemcmp(s1
, s2
, l1
);
833 return ret
== 0 ? -1 : ret
;
837 int ret
= wxStringMemcmp(s1
, s2
, l2
);
838 return ret
== 0 ? +1 : ret
;
842 int wxString::compare(const wxString
& str
) const
844 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
845 str
.m_impl
.data(), str
.m_impl
.length());
848 int wxString::compare(size_t nStart
, size_t nLen
,
849 const wxString
& str
) const
851 wxASSERT(nStart
<= length());
852 size_type strLen
= length() - nStart
;
853 nLen
= strLen
< nLen
? strLen
: nLen
;
856 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
858 return ::wxDoCmp(m_impl
.data() + pos
, len
,
859 str
.m_impl
.data(), str
.m_impl
.length());
862 int wxString::compare(size_t nStart
, size_t nLen
,
864 size_t nStart2
, size_t nLen2
) const
866 wxASSERT(nStart
<= length());
867 wxASSERT(nStart2
<= str
.length());
868 size_type strLen
= length() - nStart
,
869 strLen2
= str
.length() - nStart2
;
870 nLen
= strLen
< nLen
? strLen
: nLen
;
871 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
874 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
876 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
878 return ::wxDoCmp(m_impl
.data() + pos
, len
,
879 str
.m_impl
.data() + pos2
, len2
);
882 int wxString::compare(const char* sz
) const
884 SubstrBufFromMB
str(ImplStr(sz
, npos
));
885 if ( str
.len
== npos
)
886 str
.len
= wxStringStrlen(str
.data
);
887 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
890 int wxString::compare(const wchar_t* sz
) const
892 SubstrBufFromWC
str(ImplStr(sz
, npos
));
893 if ( str
.len
== npos
)
894 str
.len
= wxStringStrlen(str
.data
);
895 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
898 int wxString::compare(size_t nStart
, size_t nLen
,
899 const char* sz
, size_t nCount
) const
901 wxASSERT(nStart
<= length());
902 size_type strLen
= length() - nStart
;
903 nLen
= strLen
< nLen
? strLen
: nLen
;
906 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
908 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
909 if ( str
.len
== npos
)
910 str
.len
= wxStringStrlen(str
.data
);
912 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
915 int wxString::compare(size_t nStart
, size_t nLen
,
916 const wchar_t* sz
, size_t nCount
) const
918 wxASSERT(nStart
<= length());
919 size_type strLen
= length() - nStart
;
920 nLen
= strLen
< nLen
? strLen
: nLen
;
923 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
925 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
926 if ( str
.len
== npos
)
927 str
.len
= wxStringStrlen(str
.data
);
929 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
932 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
935 // ---------------------------------------------------------------------------
936 // find_{first,last}_[not]_of functions
937 // ---------------------------------------------------------------------------
939 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
941 // NB: All these functions are implemented with the argument being wxChar*,
942 // i.e. widechar string in any Unicode build, even though native string
943 // representation is char* in the UTF-8 build. This is because we couldn't
944 // use memchr() to determine if a character is in a set encoded as UTF-8.
946 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
948 return find_first_of(sz
, nStart
, wxStrlen(sz
));
951 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
953 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
956 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
958 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
961 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
963 if ( wxTmemchr(sz
, *i
, n
) )
970 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
972 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
975 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
977 if ( !wxTmemchr(sz
, *i
, n
) )
985 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
987 return find_last_of(sz
, nStart
, wxStrlen(sz
));
990 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
992 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
995 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
997 size_t len
= length();
999 if ( nStart
== npos
)
1005 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1008 size_t idx
= nStart
;
1009 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1010 i
!= rend(); --idx
, ++i
)
1012 if ( wxTmemchr(sz
, *i
, n
) )
1019 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
1021 size_t len
= length();
1023 if ( nStart
== npos
)
1029 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1032 size_t idx
= nStart
;
1033 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1034 i
!= rend(); --idx
, ++i
)
1036 if ( !wxTmemchr(sz
, *i
, n
) )
1043 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
1045 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
1047 size_t idx
= nStart
;
1048 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1057 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1059 size_t len
= length();
1061 if ( nStart
== npos
)
1067 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1070 size_t idx
= nStart
;
1071 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1072 i
!= rend(); --idx
, ++i
)
1081 // the functions above were implemented for wchar_t* arguments in Unicode
1082 // build and char* in ANSI build; below are implementations for the other
1085 #define wxOtherCharType char
1086 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1088 #define wxOtherCharType wchar_t
1089 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1092 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1093 { return find_first_of(STRCONV(sz
), nStart
); }
1095 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1097 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1098 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1099 { return find_last_of(STRCONV(sz
), nStart
); }
1100 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1102 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1103 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1104 { return find_first_not_of(STRCONV(sz
), nStart
); }
1105 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1107 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1108 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1109 { return find_last_not_of(STRCONV(sz
), nStart
); }
1110 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1112 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1114 #undef wxOtherCharType
1117 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1119 // ===========================================================================
1120 // other common string functions
1121 // ===========================================================================
1123 int wxString::CmpNoCase(const wxString
& s
) const
1125 #if wxUSE_UNICODE_UTF8
1126 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1128 const_iterator i1
= begin();
1129 const_iterator end1
= end();
1130 const_iterator i2
= s
.begin();
1131 const_iterator end2
= s
.end();
1133 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1135 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1136 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1137 if ( lower1
!= lower2
)
1138 return lower1
< lower2
? -1 : 1;
1141 size_t len1
= length();
1142 size_t len2
= s
.length();
1146 else if ( len1
> len2
)
1149 #else // wxUSE_UNICODE_WCHAR or ANSI
1150 return wxStricmp(m_impl
.c_str(), s
.m_impl
.c_str());
1158 #ifndef __SCHAR_MAX__
1159 #define __SCHAR_MAX__ 127
1163 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1165 if (!ascii
|| len
== 0)
1166 return wxEmptyString
;
1171 wxStringInternalBuffer
buf(res
, len
);
1172 wxStringCharType
*dest
= buf
;
1174 for ( ; len
> 0; --len
)
1176 unsigned char c
= (unsigned char)*ascii
++;
1177 wxASSERT_MSG( c
< 0x80,
1178 _T("Non-ASCII value passed to FromAscii().") );
1180 *dest
++ = (wchar_t)c
;
1187 wxString
wxString::FromAscii(const char *ascii
)
1189 return FromAscii(ascii
, wxStrlen(ascii
));
1192 wxString
wxString::FromAscii(char ascii
)
1194 // What do we do with '\0' ?
1196 unsigned char c
= (unsigned char)ascii
;
1198 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
1200 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1201 return wxString(wxUniChar((wchar_t)c
));
1204 const wxScopedCharBuffer
wxString::ToAscii() const
1206 // this will allocate enough space for the terminating NUL too
1207 wxCharBuffer
buffer(length());
1208 char *dest
= buffer
.data();
1210 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1213 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1214 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1216 // the output string can't have embedded NULs anyhow, so we can safely
1217 // stop at first of them even if we do have any
1225 #endif // wxUSE_UNICODE
1227 // extract string of length nCount starting at nFirst
1228 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1230 size_t nLen
= length();
1232 // default value of nCount is npos and means "till the end"
1233 if ( nCount
== npos
)
1235 nCount
= nLen
- nFirst
;
1238 // out-of-bounds requests return sensible things
1239 if ( nFirst
+ nCount
> nLen
)
1241 nCount
= nLen
- nFirst
;
1244 if ( nFirst
> nLen
)
1246 // AllocCopy() will return empty string
1247 return wxEmptyString
;
1250 wxString
dest(*this, nFirst
, nCount
);
1251 if ( dest
.length() != nCount
)
1253 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1259 // check that the string starts with prefix and return the rest of the string
1260 // in the provided pointer if it is not NULL, otherwise return false
1261 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1263 if ( compare(0, prefix
.length(), prefix
) != 0 )
1268 // put the rest of the string into provided pointer
1269 rest
->assign(*this, prefix
.length(), npos
);
1276 // check that the string ends with suffix and return the rest of it in the
1277 // provided pointer if it is not NULL, otherwise return false
1278 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1280 int start
= length() - suffix
.length();
1282 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1287 // put the rest of the string into provided pointer
1288 rest
->assign(*this, 0, start
);
1295 // extract nCount last (rightmost) characters
1296 wxString
wxString::Right(size_t nCount
) const
1298 if ( nCount
> length() )
1301 wxString
dest(*this, length() - nCount
, nCount
);
1302 if ( dest
.length() != nCount
) {
1303 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1308 // get all characters after the last occurrence of ch
1309 // (returns the whole string if ch not found)
1310 wxString
wxString::AfterLast(wxUniChar ch
) const
1313 int iPos
= Find(ch
, true);
1314 if ( iPos
== wxNOT_FOUND
)
1317 str
.assign(*this, iPos
+ 1, npos
);
1322 // extract nCount first (leftmost) characters
1323 wxString
wxString::Left(size_t nCount
) const
1325 if ( nCount
> length() )
1328 wxString
dest(*this, 0, nCount
);
1329 if ( dest
.length() != nCount
) {
1330 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1335 // get all characters before the first occurrence of ch
1336 // (returns the whole string if ch not found)
1337 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1339 int iPos
= Find(ch
);
1340 if ( iPos
== wxNOT_FOUND
)
1342 return wxString(*this, 0, iPos
);
1345 /// get all characters before the last occurrence of ch
1346 /// (returns empty string if ch not found)
1347 wxString
wxString::BeforeLast(wxUniChar ch
) const
1350 int iPos
= Find(ch
, true);
1351 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1352 str
= wxString(c_str(), iPos
);
1357 /// get all characters after the first occurrence of ch
1358 /// (returns empty string if ch not found)
1359 wxString
wxString::AfterFirst(wxUniChar ch
) const
1362 int iPos
= Find(ch
);
1363 if ( iPos
!= wxNOT_FOUND
)
1364 str
.assign(*this, iPos
+ 1, npos
);
1369 // replace first (or all) occurrences of some substring with another one
1370 size_t wxString::Replace(const wxString
& strOld
,
1371 const wxString
& strNew
, bool bReplaceAll
)
1373 // if we tried to replace an empty string we'd enter an infinite loop below
1374 wxCHECK_MSG( !strOld
.empty(), 0,
1375 _T("wxString::Replace(): invalid parameter") );
1377 wxSTRING_INVALIDATE_CACHE();
1379 size_t uiCount
= 0; // count of replacements made
1381 // optimize the special common case: replacement of one character by
1382 // another one (in UTF-8 case we can only do this for ASCII characters)
1384 // benchmarks show that this special version is around 3 times faster
1385 // (depending on the proportion of matching characters and UTF-8/wchar_t
1387 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1389 const wxStringCharType chOld
= strOld
.m_impl
[0],
1390 chNew
= strNew
.m_impl
[0];
1392 // this loop is the simplified version of the one below
1393 for ( size_t pos
= 0; ; )
1395 pos
= m_impl
.find(chOld
, pos
);
1399 m_impl
[pos
++] = chNew
;
1407 else if ( !bReplaceAll
)
1409 size_t pos
= m_impl
.find(strOld
, 0);
1412 m_impl
.replace(pos
, strOld
.m_impl
.length(), strNew
.m_impl
);
1416 else // replace all occurrences
1418 const size_t uiOldLen
= strOld
.m_impl
.length();
1419 const size_t uiNewLen
= strNew
.m_impl
.length();
1421 // first scan the string to find all positions at which the replacement
1423 wxVector
<size_t> replacePositions
;
1426 for ( pos
= m_impl
.find(strOld
.m_impl
, 0);
1428 pos
= m_impl
.find(strOld
.m_impl
, pos
+ uiOldLen
))
1430 replacePositions
.push_back(pos
);
1437 // allocate enough memory for the whole new string
1439 tmp
.m_impl
.reserve(m_impl
.length() + uiCount
*(uiNewLen
- uiOldLen
));
1441 // copy this string to tmp doing replacements on the fly
1443 for ( pos
= 0; replNum
< uiCount
; replNum
++ )
1445 const size_t nextReplPos
= replacePositions
[replNum
];
1447 if ( pos
!= nextReplPos
)
1449 tmp
.m_impl
.append(m_impl
, pos
, nextReplPos
- pos
);
1452 tmp
.m_impl
.append(strNew
.m_impl
);
1453 pos
= nextReplPos
+ uiOldLen
;
1456 if ( pos
!= m_impl
.length() )
1458 // append the rest of the string unchanged
1459 tmp
.m_impl
.append(m_impl
, pos
, m_impl
.length() - pos
);
1468 bool wxString::IsAscii() const
1470 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1472 if ( !(*i
).IsAscii() )
1479 bool wxString::IsWord() const
1481 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1483 if ( !wxIsalpha(*i
) )
1490 bool wxString::IsNumber() const
1495 const_iterator i
= begin();
1497 if ( *i
== _T('-') || *i
== _T('+') )
1500 for ( ; i
!= end(); ++i
)
1502 if ( !wxIsdigit(*i
) )
1509 wxString
wxString::Strip(stripType w
) const
1512 if ( w
& leading
) s
.Trim(false);
1513 if ( w
& trailing
) s
.Trim(true);
1517 // ---------------------------------------------------------------------------
1519 // ---------------------------------------------------------------------------
1521 wxString
& wxString::MakeUpper()
1523 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1524 *it
= (wxChar
)wxToupper(*it
);
1529 wxString
& wxString::MakeLower()
1531 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1532 *it
= (wxChar
)wxTolower(*it
);
1537 wxString
& wxString::MakeCapitalized()
1539 const iterator en
= end();
1540 iterator it
= begin();
1543 *it
= (wxChar
)wxToupper(*it
);
1544 for ( ++it
; it
!= en
; ++it
)
1545 *it
= (wxChar
)wxTolower(*it
);
1551 // ---------------------------------------------------------------------------
1552 // trimming and padding
1553 // ---------------------------------------------------------------------------
1555 // some compilers (VC++ 6.0 not to name them) return true for a call to
1556 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1557 // to live with this by checking that the character is a 7 bit one - even if
1558 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1559 // space-like symbols somewhere except in the first 128 chars), it is arguably
1560 // still better than trimming away accented letters
1561 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1563 // trims spaces (in the sense of isspace) from left or right side
1564 wxString
& wxString::Trim(bool bFromRight
)
1566 // first check if we're going to modify the string at all
1569 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1570 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1576 // find last non-space character
1577 reverse_iterator psz
= rbegin();
1578 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1581 // truncate at trailing space start
1582 erase(psz
.base(), end());
1586 // find first non-space character
1587 iterator psz
= begin();
1588 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1591 // fix up data and length
1592 erase(begin(), psz
);
1599 // adds nCount characters chPad to the string from either side
1600 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1602 wxString
s(chPad
, nCount
);
1615 // truncate the string
1616 wxString
& wxString::Truncate(size_t uiLen
)
1618 if ( uiLen
< length() )
1620 erase(begin() + uiLen
, end());
1622 //else: nothing to do, string is already short enough
1627 // ---------------------------------------------------------------------------
1628 // finding (return wxNOT_FOUND if not found and index otherwise)
1629 // ---------------------------------------------------------------------------
1632 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1634 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1636 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1639 // ----------------------------------------------------------------------------
1640 // conversion to numbers
1641 // ----------------------------------------------------------------------------
1643 // The implementation of all the functions below is exactly the same so factor
1644 // it out. Note that number extraction works correctly on UTF-8 strings, so
1645 // we can use wxStringCharType and wx_str() for maximum efficiency.
1648 #define DO_IF_NOT_WINCE(x) x
1650 #define DO_IF_NOT_WINCE(x)
1653 #define WX_STRING_TO_X_TYPE_START \
1654 wxCHECK_MSG( pVal, false, _T("NULL output pointer") ); \
1655 DO_IF_NOT_WINCE( errno = 0; ) \
1656 const wxStringCharType *start = wx_str(); \
1657 wxStringCharType *end;
1659 #define WX_STRING_TO_X_TYPE_END \
1660 /* return true only if scan was stopped by the terminating NUL and */ \
1661 /* if the string was not empty to start with and no under/overflow */ \
1663 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1668 bool wxString::ToLong(long *pVal
, int base
) const
1670 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1672 WX_STRING_TO_X_TYPE_START
1673 long val
= wxStrtol(start
, &end
, base
);
1674 WX_STRING_TO_X_TYPE_END
1677 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1679 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1681 WX_STRING_TO_X_TYPE_START
1682 unsigned long val
= wxStrtoul(start
, &end
, base
);
1683 WX_STRING_TO_X_TYPE_END
1686 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1688 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1690 WX_STRING_TO_X_TYPE_START
1691 wxLongLong_t val
= wxStrtoll(start
, &end
, base
);
1692 WX_STRING_TO_X_TYPE_END
1695 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1697 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1699 WX_STRING_TO_X_TYPE_START
1700 wxULongLong_t val
= wxStrtoull(start
, &end
, base
);
1701 WX_STRING_TO_X_TYPE_END
1704 bool wxString::ToDouble(double *pVal
) const
1706 WX_STRING_TO_X_TYPE_START
1707 double val
= wxStrtod(start
, &end
);
1708 WX_STRING_TO_X_TYPE_END
1713 bool wxString::ToCLong(long *pVal
, int base
) const
1715 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1717 WX_STRING_TO_X_TYPE_START
1718 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1719 long val
= wxStrtol_lA(start
, &end
, base
, wxCLocale
);
1721 long val
= wxStrtol_l(start
, &end
, base
, wxCLocale
);
1723 WX_STRING_TO_X_TYPE_END
1726 bool wxString::ToCULong(unsigned long *pVal
, int base
) const
1728 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1730 WX_STRING_TO_X_TYPE_START
1731 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1732 unsigned long val
= wxStrtoul_lA(start
, &end
, base
, wxCLocale
);
1734 unsigned long val
= wxStrtoul_l(start
, &end
, base
, wxCLocale
);
1736 WX_STRING_TO_X_TYPE_END
1739 bool wxString::ToCDouble(double *pVal
) const
1741 WX_STRING_TO_X_TYPE_START
1742 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1743 double val
= wxStrtod_lA(start
, &end
, wxCLocale
);
1745 double val
= wxStrtod_l(start
, &end
, wxCLocale
);
1747 WX_STRING_TO_X_TYPE_END
1750 #endif // wxUSE_XLOCALE
1752 // ---------------------------------------------------------------------------
1754 // ---------------------------------------------------------------------------
1756 #if !wxUSE_UTF8_LOCALE_ONLY
1758 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1759 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1761 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1765 va_start(argptr
, format
);
1768 s
.PrintfV(format
, argptr
);
1774 #endif // !wxUSE_UTF8_LOCALE_ONLY
1776 #if wxUSE_UNICODE_UTF8
1778 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1781 va_start(argptr
, format
);
1784 s
.PrintfV(format
, argptr
);
1790 #endif // wxUSE_UNICODE_UTF8
1793 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1796 s
.PrintfV(format
, argptr
);
1800 #if !wxUSE_UTF8_LOCALE_ONLY
1801 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1802 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1804 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1808 va_start(argptr
, format
);
1810 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1811 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1812 // because it's the only cast that works safely for downcasting when
1813 // multiple inheritance is used:
1814 wxString
*str
= static_cast<wxString
*>(this);
1816 wxString
*str
= this;
1819 int iLen
= str
->PrintfV(format
, argptr
);
1825 #endif // !wxUSE_UTF8_LOCALE_ONLY
1827 #if wxUSE_UNICODE_UTF8
1828 int wxString::DoPrintfUtf8(const char *format
, ...)
1831 va_start(argptr
, format
);
1833 int iLen
= PrintfV(format
, argptr
);
1839 #endif // wxUSE_UNICODE_UTF8
1842 Uses wxVsnprintf and places the result into the this string.
1844 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1845 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1846 the ISO C99 (and thus SUSv3) standard the return value for the case of
1847 an undersized buffer is inconsistent. For conforming vsnprintf
1848 implementations the function must return the number of characters that
1849 would have been printed had the buffer been large enough. For conforming
1850 vswprintf implementations the function must return a negative number
1853 What vswprintf sets errno to is undefined but Darwin seems to set it to
1854 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1855 those are defined in the standard and backed up by several conformance
1856 statements. Note that ENOMEM mentioned in the manual page does not
1857 apply to swprintf, only wprintf and fwprintf.
1859 Official manual page:
1860 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1862 Some conformance statements (AIX, Solaris):
1863 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1864 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1866 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1867 EILSEQ and EINVAL are specifically defined to mean the error is other than
1868 an undersized buffer and no other errno are defined we treat those two
1869 as meaning hard errors and everything else gets the old behavior which
1870 is to keep looping and increasing buffer size until the function succeeds.
1872 In practice it's impossible to determine before compilation which behavior
1873 may be used. The vswprintf function may have vsnprintf-like behavior or
1874 vice-versa. Behavior detected on one release can theoretically change
1875 with an updated release. Not to mention that configure testing for it
1876 would require the test to be run on the host system, not the build system
1877 which makes cross compilation difficult. Therefore, we make no assumptions
1878 about behavior and try our best to handle every known case, including the
1879 case where wxVsnprintf returns a negative number and fails to set errno.
1881 There is yet one more non-standard implementation and that is our own.
1882 Fortunately, that can be detected at compile-time.
1884 On top of all that, ISO C99 explicitly defines snprintf to write a null
1885 character to the last position of the specified buffer. That would be at
1886 at the given buffer size minus 1. It is supposed to do this even if it
1887 turns out that the buffer is sized too small.
1889 Darwin (tested on 10.5) follows the C99 behavior exactly.
1891 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1892 errno even when it fails. However, it only seems to ever fail due
1893 to an undersized buffer.
1895 #if wxUSE_UNICODE_UTF8
1896 template<typename BufferType
>
1898 // we only need one version in non-UTF8 builds and at least two Windows
1899 // compilers have problems with this function template, so use just one
1900 // normal function here
1902 static int DoStringPrintfV(wxString
& str
,
1903 const wxString
& format
, va_list argptr
)
1909 #if wxUSE_UNICODE_UTF8
1910 BufferType
tmp(str
, size
+ 1);
1911 typename
BufferType::CharType
*buf
= tmp
;
1913 wxStringBuffer
tmp(str
, size
+ 1);
1921 // in UTF-8 build, leaving uninitialized junk in the buffer
1922 // could result in invalid non-empty UTF-8 string, so just
1923 // reset the string to empty on failure:
1928 // wxVsnprintf() may modify the original arg pointer, so pass it
1931 wxVaCopy(argptrcopy
, argptr
);
1934 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1937 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1940 // some implementations of vsnprintf() don't NUL terminate
1941 // the string if there is not enough space for it so
1942 // always do it manually
1943 // FIXME: This really seems to be the wrong and would be an off-by-one
1944 // bug except the code above allocates an extra character.
1945 buf
[size
] = _T('\0');
1947 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1948 // total number of characters which would have been written if the
1949 // buffer were large enough (newer standards such as Unix98)
1952 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1953 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1954 // is true if *both* of them use our own implementation,
1955 // otherwise we can't be sure
1956 #if wxUSE_WXVSNPRINTF
1957 // we know that our own implementation of wxVsnprintf() returns -1
1958 // only for a format error - thus there's something wrong with
1959 // the user's format string
1962 #else // possibly using system version
1963 // assume it only returns error if there is not enough space, but
1964 // as we don't know how much we need, double the current size of
1967 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1968 // If errno was set to one of the two well-known hard errors
1969 // then fail immediately to avoid an infinite loop.
1972 #endif // __WXWINCE__
1973 // still not enough, as we don't know how much we need, double the
1974 // current size of the buffer
1976 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1978 else if ( len
>= size
)
1980 #if wxUSE_WXVSNPRINTF
1981 // we know that our own implementation of wxVsnprintf() returns
1982 // size+1 when there's not enough space but that's not the size
1983 // of the required buffer!
1984 size
*= 2; // so we just double the current size of the buffer
1986 // some vsnprintf() implementations NUL-terminate the buffer and
1987 // some don't in len == size case, to be safe always add 1
1988 // FIXME: I don't quite understand this comment. The vsnprintf
1989 // function is specifically defined to return the number of
1990 // characters printed not including the null terminator.
1991 // So OF COURSE you need to add 1 to get the right buffer size.
1992 // The following line is definitely correct, no question.
1996 else // ok, there was enough space
2002 // we could have overshot
2005 return str
.length();
2008 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
2010 #if wxUSE_UNICODE_UTF8
2011 #if wxUSE_STL_BASED_WXSTRING
2012 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
2014 typedef wxStringInternalBuffer Utf8Buffer
;
2018 #if wxUSE_UTF8_LOCALE_ONLY
2019 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2021 #if wxUSE_UNICODE_UTF8
2022 if ( wxLocaleIsUtf8
)
2023 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2026 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
2028 return DoStringPrintfV(*this, format
, argptr
);
2029 #endif // UTF8/WCHAR
2033 // ----------------------------------------------------------------------------
2034 // misc other operations
2035 // ----------------------------------------------------------------------------
2037 // returns true if the string matches the pattern which may contain '*' and
2038 // '?' metacharacters (as usual, '?' matches any character and '*' any number
2040 bool wxString::Matches(const wxString
& mask
) const
2042 // I disable this code as it doesn't seem to be faster (in fact, it seems
2043 // to be much slower) than the old, hand-written code below and using it
2044 // here requires always linking with libregex even if the user code doesn't
2046 #if 0 // wxUSE_REGEX
2047 // first translate the shell-like mask into a regex
2049 pattern
.reserve(wxStrlen(pszMask
));
2061 pattern
+= _T(".*");
2072 // these characters are special in a RE, quote them
2073 // (however note that we don't quote '[' and ']' to allow
2074 // using them for Unix shell like matching)
2075 pattern
+= _T('\\');
2079 pattern
+= *pszMask
;
2087 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
2088 #else // !wxUSE_REGEX
2089 // TODO: this is, of course, awfully inefficient...
2091 // FIXME-UTF8: implement using iterators, remove #if
2092 #if wxUSE_UNICODE_UTF8
2093 const wxScopedWCharBuffer maskBuf
= mask
.wc_str();
2094 const wxScopedWCharBuffer txtBuf
= wc_str();
2095 const wxChar
*pszMask
= maskBuf
.data();
2096 const wxChar
*pszTxt
= txtBuf
.data();
2098 const wxChar
*pszMask
= mask
.wx_str();
2099 // the char currently being checked
2100 const wxChar
*pszTxt
= wx_str();
2103 // the last location where '*' matched
2104 const wxChar
*pszLastStarInText
= NULL
;
2105 const wxChar
*pszLastStarInMask
= NULL
;
2108 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
2109 switch ( *pszMask
) {
2111 if ( *pszTxt
== wxT('\0') )
2114 // pszTxt and pszMask will be incremented in the loop statement
2120 // remember where we started to be able to backtrack later
2121 pszLastStarInText
= pszTxt
;
2122 pszLastStarInMask
= pszMask
;
2124 // ignore special chars immediately following this one
2125 // (should this be an error?)
2126 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2129 // if there is nothing more, match
2130 if ( *pszMask
== wxT('\0') )
2133 // are there any other metacharacters in the mask?
2135 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2137 if ( pEndMask
!= NULL
) {
2138 // we have to match the string between two metachars
2139 uiLenMask
= pEndMask
- pszMask
;
2142 // we have to match the remainder of the string
2143 uiLenMask
= wxStrlen(pszMask
);
2146 wxString
strToMatch(pszMask
, uiLenMask
);
2147 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2148 if ( pMatch
== NULL
)
2151 // -1 to compensate "++" in the loop
2152 pszTxt
= pMatch
+ uiLenMask
- 1;
2153 pszMask
+= uiLenMask
- 1;
2158 if ( *pszMask
!= *pszTxt
)
2164 // match only if nothing left
2165 if ( *pszTxt
== wxT('\0') )
2168 // if we failed to match, backtrack if we can
2169 if ( pszLastStarInText
) {
2170 pszTxt
= pszLastStarInText
+ 1;
2171 pszMask
= pszLastStarInMask
;
2173 pszLastStarInText
= NULL
;
2175 // don't bother resetting pszLastStarInMask, it's unnecessary
2181 #endif // wxUSE_REGEX/!wxUSE_REGEX
2184 // Count the number of chars
2185 int wxString::Freq(wxUniChar ch
) const
2188 for ( const_iterator i
= begin(); i
!= end(); ++i
)