1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
38 #include "wx/hashmap.h"
39 #include "wx/vector.h"
40 #include "wx/xlocale.h"
42 // string handling functions used by wxString:
43 #if wxUSE_UNICODE_UTF8
44 #define wxStringMemcpy memcpy
45 #define wxStringMemcmp memcmp
46 #define wxStringMemchr memchr
47 #define wxStringStrlen strlen
49 #define wxStringMemcpy wxTmemcpy
50 #define wxStringMemcmp wxTmemcmp
51 #define wxStringMemchr wxTmemchr
52 #define wxStringStrlen wxStrlen
55 // ----------------------------------------------------------------------------
57 // ----------------------------------------------------------------------------
62 static UntypedBufferData
s_untypedNullData(NULL
, 0);
64 UntypedBufferData
* const untypedNullDataPtr
= &s_untypedNullData
;
66 } // namespace wxPrivate
68 // ---------------------------------------------------------------------------
69 // static class variables definition
70 // ---------------------------------------------------------------------------
72 //According to STL _must_ be a -1 size_t
73 const size_t wxString::npos
= (size_t) -1;
75 #if wxUSE_STRING_POS_CACHE
77 #ifdef wxHAS_COMPILER_TLS
79 wxTLS_TYPE(wxString::Cache
) wxString::ms_cache
;
81 #else // !wxHAS_COMPILER_TLS
83 struct wxStrCacheInitializer
85 wxStrCacheInitializer()
87 // calling this function triggers s_cache initialization in it, and
88 // from now on it becomes safe to call from multiple threads
94 wxString::Cache& wxString::GetCache()
96 static wxTLS_TYPE(Cache) s_cache;
98 return wxTLS_VALUE(s_cache);
102 static wxStrCacheInitializer gs_stringCacheInit
;
104 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
106 // gdb seems to be unable to display thread-local variables correctly, at least
107 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
108 #if wxDEBUG_LEVEL >= 2
110 struct wxStrCacheDumper
112 static void ShowAll()
114 puts("*** wxString cache dump:");
115 for ( unsigned n
= 0; n
< wxString::Cache::SIZE
; n
++ )
117 const wxString::Cache::Element
&
118 c
= wxString::GetCacheBegin()[n
];
120 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
122 n
== wxString::LastUsedCacheElement() ? " [*]" : "",
124 (unsigned long)c
.pos
,
125 (unsigned long)c
.impl
,
131 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
133 #endif // wxDEBUG_LEVEL >= 2
135 #ifdef wxPROFILE_STRING_CACHE
137 wxString::CacheStats
wxString::ms_cacheStats
;
139 struct wxStrCacheStatsDumper
141 ~wxStrCacheStatsDumper()
143 const wxString::CacheStats
& stats
= wxString::ms_cacheStats
;
147 puts("*** wxString cache statistics:");
148 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
150 printf("\tHits %u (of which %u not used) or %.2f%%\n",
153 100.*float(stats
.poshits
- stats
.mishits
)/stats
.postot
);
154 printf("\tAverage position requested: %.2f\n",
155 float(stats
.sumpos
) / stats
.postot
);
156 printf("\tAverage offset after cached hint: %.2f\n",
157 float(stats
.sumofs
) / stats
.postot
);
162 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
163 stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
);
168 static wxStrCacheStatsDumper s_showCacheStats
;
170 #endif // wxPROFILE_STRING_CACHE
172 #endif // wxUSE_STRING_POS_CACHE
174 // ----------------------------------------------------------------------------
176 // ----------------------------------------------------------------------------
178 #if wxUSE_STD_IOSTREAM
182 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
184 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
185 const wxScopedCharBuffer
buf(str
.AsCharBuf());
187 os
.clear(wxSTD
ios_base::failbit
);
193 return os
<< str
.AsInternal();
197 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
199 return os
<< str
.c_str();
202 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedCharBuffer
& str
)
204 return os
<< str
.data();
208 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedWCharBuffer
& str
)
210 return os
<< str
.data();
214 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
216 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
218 return wos
<< str
.wc_str();
221 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
223 return wos
<< str
.AsWChar();
226 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxScopedWCharBuffer
& str
)
228 return wos
<< str
.data();
231 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
233 #endif // wxUSE_STD_IOSTREAM
235 // ===========================================================================
236 // wxString class core
237 // ===========================================================================
239 #if wxUSE_UNICODE_UTF8
241 void wxString::PosLenToImpl(size_t pos
, size_t len
,
242 size_t *implPos
, size_t *implLen
) const
248 else // have valid start position
250 const const_iterator b
= GetIterForNthChar(pos
);
251 *implPos
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin();
256 else // have valid length too
258 // we need to handle the case of length specifying a substring
259 // going beyond the end of the string, just as std::string does
260 const const_iterator
e(end());
262 while ( len
&& i
<= e
)
268 *implLen
= i
.impl() - b
.impl();
273 #endif // wxUSE_UNICODE_UTF8
275 // ----------------------------------------------------------------------------
276 // wxCStrData converted strings caching
277 // ----------------------------------------------------------------------------
279 // FIXME-UTF8: temporarily disabled because it doesn't work with global
280 // string objects; re-enable after fixing this bug and benchmarking
281 // performance to see if using a hash is a good idea at all
284 // For backward compatibility reasons, it must be possible to assign the value
285 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
286 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
287 // because the memory would be freed immediately, but it has to be valid as long
288 // as the string is not modified, so that code like this still works:
290 // const wxChar *s = str.c_str();
291 // while ( s ) { ... }
293 // FIXME-UTF8: not thread safe!
294 // FIXME-UTF8: we currently clear the cached conversion only when the string is
295 // destroyed, but we should do it when the string is modified, to
296 // keep memory usage down
297 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
298 // invalidated the cache on every change, we could keep the previous
300 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
301 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
304 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
306 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
307 if ( i
!= hash
.end() )
315 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
316 // so we have to use wxString* here and const-cast when used
317 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
318 wxStringCharConversionCache
);
319 static wxStringCharConversionCache gs_stringsCharCache
;
321 const char* wxCStrData::AsChar() const
323 // remove previously cache value, if any (see FIXMEs above):
324 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
326 // convert the string and keep it:
327 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
328 m_str
->mb_str().release();
332 #endif // wxUSE_UNICODE
334 #if !wxUSE_UNICODE_WCHAR
335 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
336 wxStringWCharConversionCache
);
337 static wxStringWCharConversionCache gs_stringsWCharCache
;
339 const wchar_t* wxCStrData::AsWChar() const
341 // remove previously cache value, if any (see FIXMEs above):
342 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
344 // convert the string and keep it:
345 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
346 m_str
->wc_str().release();
350 #endif // !wxUSE_UNICODE_WCHAR
352 wxString::~wxString()
355 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
356 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
358 #if !wxUSE_UNICODE_WCHAR
359 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
364 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
365 const char* wxCStrData::AsChar() const
367 #if wxUSE_UNICODE_UTF8
368 if ( wxLocaleIsUtf8
)
371 // under non-UTF8 locales, we have to convert the internal UTF-8
372 // representation using wxConvLibc and cache the result
374 wxString
*str
= wxConstCast(m_str
, wxString
);
376 // convert the string:
378 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
379 // have it) but it's unfortunately not obvious to implement
380 // because we don't know how big buffer do we need for the
381 // given string length (in case of multibyte encodings, e.g.
382 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
384 // One idea would be to store more than just m_convertedToChar
385 // in wxString: then we could record the length of the string
386 // which was converted the last time and try to reuse the same
387 // buffer if the current length is not greater than it (this
388 // could still fail because string could have been modified in
389 // place but it would work most of the time, so we'd do it and
390 // only allocate the new buffer if in-place conversion returned
391 // an error). We could also store a bit saying if the string
392 // was modified since the last conversion (and update it in all
393 // operation modifying the string, of course) to avoid unneeded
394 // consequential conversions. But both of these ideas require
395 // adding more fields to wxString and require profiling results
396 // to be sure that we really gain enough from them to justify
398 wxScopedCharBuffer
buf(str
->mb_str());
400 // if it failed, return empty string and not NULL to avoid crashes in code
401 // written with either wxWidgets 2 wxString or std::string behaviour in
402 // mind: neither of them ever returns NULL and so we shouldn't neither
406 if ( str
->m_convertedToChar
&&
407 strlen(buf
) == strlen(str
->m_convertedToChar
) )
409 // keep the same buffer for as long as possible, so that several calls
410 // to c_str() in a row still work:
411 strcpy(str
->m_convertedToChar
, buf
);
415 str
->m_convertedToChar
= buf
.release();
419 return str
->m_convertedToChar
+ m_offset
;
421 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
423 #if !wxUSE_UNICODE_WCHAR
424 const wchar_t* wxCStrData::AsWChar() const
426 wxString
*str
= wxConstCast(m_str
, wxString
);
428 // convert the string:
429 wxScopedWCharBuffer
buf(str
->wc_str());
431 // notice that here, unlike above in AsChar(), conversion can't fail as our
432 // internal UTF-8 is always well-formed -- or the string was corrupted and
433 // all bets are off anyhow
435 // FIXME-UTF8: do the conversion in-place in the existing buffer
436 if ( str
->m_convertedToWChar
&&
437 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
439 // keep the same buffer for as long as possible, so that several calls
440 // to c_str() in a row still work:
441 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
445 str
->m_convertedToWChar
= buf
.release();
449 return str
->m_convertedToWChar
+ m_offset
;
451 #endif // !wxUSE_UNICODE_WCHAR
453 // ===========================================================================
454 // wxString class core
455 // ===========================================================================
457 // ---------------------------------------------------------------------------
458 // construction and conversion
459 // ---------------------------------------------------------------------------
461 #if wxUSE_UNICODE_WCHAR
463 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
464 const wxMBConv
& conv
)
467 if ( !psz
|| nLength
== 0 )
468 return SubstrBufFromMB(wxWCharBuffer(L
""), 0);
470 if ( nLength
== npos
)
474 wxScopedWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
476 return SubstrBufFromMB(wxWCharBuffer(L
""), 0);
478 return SubstrBufFromMB(wcBuf
, wcLen
);
480 #endif // wxUSE_UNICODE_WCHAR
482 #if wxUSE_UNICODE_UTF8
484 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
485 const wxMBConv
& conv
)
488 if ( !psz
|| nLength
== 0 )
489 return SubstrBufFromMB(wxCharBuffer(""), 0);
491 // if psz is already in UTF-8, we don't have to do the roundtrip to
492 // wchar_t* and back:
495 // we need to validate the input because UTF8 iterators assume valid
496 // UTF-8 sequence and psz may be invalid:
497 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
499 // we must pass the real string length to SubstrBufFromMB ctor
500 if ( nLength
== npos
)
501 nLength
= psz
? strlen(psz
) : 0;
502 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz
, nLength
),
505 // else: do the roundtrip through wchar_t*
508 if ( nLength
== npos
)
511 // first convert to wide string:
513 wxScopedWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
515 return SubstrBufFromMB(wxCharBuffer(""), 0);
517 // and then to UTF-8:
518 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
519 // widechar -> UTF-8 conversion isn't supposed to ever fail:
520 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
524 #endif // wxUSE_UNICODE_UTF8
526 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
528 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
529 const wxMBConv
& conv
)
532 if ( !pwz
|| nLength
== 0 )
533 return SubstrBufFromWC(wxCharBuffer(""), 0);
535 if ( nLength
== npos
)
539 wxScopedCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
541 return SubstrBufFromWC(wxCharBuffer(""), 0);
543 return SubstrBufFromWC(mbBuf
, mbLen
);
545 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
548 #if wxUSE_UNICODE_WCHAR
550 //Convert wxString in Unicode mode to a multi-byte string
551 const wxScopedCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
553 return conv
.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL
);
556 #elif wxUSE_UNICODE_UTF8
558 const wxScopedWCharBuffer
wxString::wc_str() const
560 return wxMBConvStrictUTF8().cMB2WC
563 m_impl
.length() + 1, // size, not length
568 const wxScopedCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
571 return wxScopedCharBuffer::CreateNonOwned(m_impl
.c_str(), m_impl
.length());
573 // FIXME-UTF8: use wc_str() here once we have buffers with length
576 wxScopedWCharBuffer wcBuf
578 wxMBConvStrictUTF8().cMB2WC
581 m_impl
.length() + 1, // size
586 return wxCharBuffer("");
588 return conv
.cWC2MB(wcBuf
, wcLen
+1, NULL
);
593 //Converts this string to a wide character string if unicode
594 //mode is not enabled and wxUSE_WCHAR_T is enabled
595 const wxScopedWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
597 return conv
.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL
);
600 #endif // Unicode/ANSI
602 // shrink to minimal size (releasing extra memory)
603 bool wxString::Shrink()
605 wxString
tmp(begin(), end());
607 return tmp
.length() == length();
610 // deprecated compatibility code:
611 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
612 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
614 return DoGetWriteBuf(nLen
);
617 void wxString::UngetWriteBuf()
622 void wxString::UngetWriteBuf(size_t nLen
)
624 DoUngetWriteBuf(nLen
);
626 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
629 // ---------------------------------------------------------------------------
631 // ---------------------------------------------------------------------------
633 // all functions are inline in string.h
635 // ---------------------------------------------------------------------------
636 // concatenation operators
637 // ---------------------------------------------------------------------------
640 * concatenation functions come in 5 flavours:
642 * char + string and string + char
643 * C str + string and string + C str
646 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
648 #if !wxUSE_STL_BASED_WXSTRING
649 wxASSERT( str1
.IsValid() );
650 wxASSERT( str2
.IsValid() );
659 wxString
operator+(const wxString
& str
, wxUniChar ch
)
661 #if !wxUSE_STL_BASED_WXSTRING
662 wxASSERT( str
.IsValid() );
671 wxString
operator+(wxUniChar ch
, const wxString
& str
)
673 #if !wxUSE_STL_BASED_WXSTRING
674 wxASSERT( str
.IsValid() );
683 wxString
operator+(const wxString
& str
, const char *psz
)
685 #if !wxUSE_STL_BASED_WXSTRING
686 wxASSERT( str
.IsValid() );
690 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
691 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
699 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
701 #if !wxUSE_STL_BASED_WXSTRING
702 wxASSERT( str
.IsValid() );
706 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
707 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
715 wxString
operator+(const char *psz
, const wxString
& str
)
717 #if !wxUSE_STL_BASED_WXSTRING
718 wxASSERT( str
.IsValid() );
722 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
723 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
731 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
733 #if !wxUSE_STL_BASED_WXSTRING
734 wxASSERT( str
.IsValid() );
738 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
739 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
747 // ---------------------------------------------------------------------------
749 // ---------------------------------------------------------------------------
751 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
753 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
754 : wxToupper(GetChar(0u)) == wxToupper(c
));
757 #ifdef HAVE_STD_STRING_COMPARE
759 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
760 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
761 // sort strings in characters code point order by sorting the byte sequence
762 // in byte values order (i.e. what strcmp() and memcmp() do).
764 int wxString::compare(const wxString
& str
) const
766 return m_impl
.compare(str
.m_impl
);
769 int wxString::compare(size_t nStart
, size_t nLen
,
770 const wxString
& str
) const
773 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
774 return m_impl
.compare(pos
, len
, str
.m_impl
);
777 int wxString::compare(size_t nStart
, size_t nLen
,
779 size_t nStart2
, size_t nLen2
) const
782 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
785 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
787 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
790 int wxString::compare(const char* sz
) const
792 return m_impl
.compare(ImplStr(sz
));
795 int wxString::compare(const wchar_t* sz
) const
797 return m_impl
.compare(ImplStr(sz
));
800 int wxString::compare(size_t nStart
, size_t nLen
,
801 const char* sz
, size_t nCount
) const
804 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
806 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
808 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
811 int wxString::compare(size_t nStart
, size_t nLen
,
812 const wchar_t* sz
, size_t nCount
) const
815 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
817 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
819 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
822 #else // !HAVE_STD_STRING_COMPARE
824 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
825 const wxStringCharType
* s2
, size_t l2
)
828 return wxStringMemcmp(s1
, s2
, l1
);
831 int ret
= wxStringMemcmp(s1
, s2
, l1
);
832 return ret
== 0 ? -1 : ret
;
836 int ret
= wxStringMemcmp(s1
, s2
, l2
);
837 return ret
== 0 ? +1 : ret
;
841 int wxString::compare(const wxString
& str
) const
843 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
844 str
.m_impl
.data(), str
.m_impl
.length());
847 int wxString::compare(size_t nStart
, size_t nLen
,
848 const wxString
& str
) const
850 wxASSERT(nStart
<= length());
851 size_type strLen
= length() - nStart
;
852 nLen
= strLen
< nLen
? strLen
: nLen
;
855 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
857 return ::wxDoCmp(m_impl
.data() + pos
, len
,
858 str
.m_impl
.data(), str
.m_impl
.length());
861 int wxString::compare(size_t nStart
, size_t nLen
,
863 size_t nStart2
, size_t nLen2
) const
865 wxASSERT(nStart
<= length());
866 wxASSERT(nStart2
<= str
.length());
867 size_type strLen
= length() - nStart
,
868 strLen2
= str
.length() - nStart2
;
869 nLen
= strLen
< nLen
? strLen
: nLen
;
870 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
873 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
875 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
877 return ::wxDoCmp(m_impl
.data() + pos
, len
,
878 str
.m_impl
.data() + pos2
, len2
);
881 int wxString::compare(const char* sz
) const
883 SubstrBufFromMB
str(ImplStr(sz
, npos
));
884 if ( str
.len
== npos
)
885 str
.len
= wxStringStrlen(str
.data
);
886 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
889 int wxString::compare(const wchar_t* sz
) const
891 SubstrBufFromWC
str(ImplStr(sz
, npos
));
892 if ( str
.len
== npos
)
893 str
.len
= wxStringStrlen(str
.data
);
894 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
897 int wxString::compare(size_t nStart
, size_t nLen
,
898 const char* sz
, size_t nCount
) const
900 wxASSERT(nStart
<= length());
901 size_type strLen
= length() - nStart
;
902 nLen
= strLen
< nLen
? strLen
: nLen
;
905 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
907 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
908 if ( str
.len
== npos
)
909 str
.len
= wxStringStrlen(str
.data
);
911 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
914 int wxString::compare(size_t nStart
, size_t nLen
,
915 const wchar_t* sz
, size_t nCount
) const
917 wxASSERT(nStart
<= length());
918 size_type strLen
= length() - nStart
;
919 nLen
= strLen
< nLen
? strLen
: nLen
;
922 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
924 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
925 if ( str
.len
== npos
)
926 str
.len
= wxStringStrlen(str
.data
);
928 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
931 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
934 // ---------------------------------------------------------------------------
935 // find_{first,last}_[not]_of functions
936 // ---------------------------------------------------------------------------
938 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
940 // NB: All these functions are implemented with the argument being wxChar*,
941 // i.e. widechar string in any Unicode build, even though native string
942 // representation is char* in the UTF-8 build. This is because we couldn't
943 // use memchr() to determine if a character is in a set encoded as UTF-8.
945 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
947 return find_first_of(sz
, nStart
, wxStrlen(sz
));
950 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
952 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
955 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
957 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
960 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
962 if ( wxTmemchr(sz
, *i
, n
) )
969 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
971 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
974 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
976 if ( !wxTmemchr(sz
, *i
, n
) )
984 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
986 return find_last_of(sz
, nStart
, wxStrlen(sz
));
989 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
991 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
994 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
996 size_t len
= length();
998 if ( nStart
== npos
)
1004 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1007 size_t idx
= nStart
;
1008 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1009 i
!= rend(); --idx
, ++i
)
1011 if ( wxTmemchr(sz
, *i
, n
) )
1018 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
1020 size_t len
= length();
1022 if ( nStart
== npos
)
1028 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1031 size_t idx
= nStart
;
1032 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1033 i
!= rend(); --idx
, ++i
)
1035 if ( !wxTmemchr(sz
, *i
, n
) )
1042 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
1044 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
1046 size_t idx
= nStart
;
1047 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1056 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1058 size_t len
= length();
1060 if ( nStart
== npos
)
1066 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1069 size_t idx
= nStart
;
1070 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1071 i
!= rend(); --idx
, ++i
)
1080 // the functions above were implemented for wchar_t* arguments in Unicode
1081 // build and char* in ANSI build; below are implementations for the other
1084 #define wxOtherCharType char
1085 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1087 #define wxOtherCharType wchar_t
1088 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1091 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1092 { return find_first_of(STRCONV(sz
), nStart
); }
1094 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1096 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1097 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1098 { return find_last_of(STRCONV(sz
), nStart
); }
1099 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1101 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1102 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1103 { return find_first_not_of(STRCONV(sz
), nStart
); }
1104 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1106 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1107 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1108 { return find_last_not_of(STRCONV(sz
), nStart
); }
1109 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1111 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1113 #undef wxOtherCharType
1116 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1118 // ===========================================================================
1119 // other common string functions
1120 // ===========================================================================
1122 int wxString::CmpNoCase(const wxString
& s
) const
1124 #if wxUSE_UNICODE_UTF8
1125 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1127 const_iterator i1
= begin();
1128 const_iterator end1
= end();
1129 const_iterator i2
= s
.begin();
1130 const_iterator end2
= s
.end();
1132 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1134 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1135 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1136 if ( lower1
!= lower2
)
1137 return lower1
< lower2
? -1 : 1;
1140 size_t len1
= length();
1141 size_t len2
= s
.length();
1145 else if ( len1
> len2
)
1148 #else // wxUSE_UNICODE_WCHAR or ANSI
1149 return wxStricmp(m_impl
.c_str(), s
.m_impl
.c_str());
1157 #ifndef __SCHAR_MAX__
1158 #define __SCHAR_MAX__ 127
1162 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1164 if (!ascii
|| len
== 0)
1165 return wxEmptyString
;
1170 wxStringInternalBuffer
buf(res
, len
);
1171 wxStringCharType
*dest
= buf
;
1173 for ( ; len
> 0; --len
)
1175 unsigned char c
= (unsigned char)*ascii
++;
1176 wxASSERT_MSG( c
< 0x80,
1177 _T("Non-ASCII value passed to FromAscii().") );
1179 *dest
++ = (wchar_t)c
;
1186 wxString
wxString::FromAscii(const char *ascii
)
1188 return FromAscii(ascii
, wxStrlen(ascii
));
1191 wxString
wxString::FromAscii(char ascii
)
1193 // What do we do with '\0' ?
1195 unsigned char c
= (unsigned char)ascii
;
1197 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
1199 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1200 return wxString(wxUniChar((wchar_t)c
));
1203 const wxScopedCharBuffer
wxString::ToAscii() const
1205 // this will allocate enough space for the terminating NUL too
1206 wxCharBuffer
buffer(length());
1207 char *dest
= buffer
.data();
1209 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1212 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1213 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1215 // the output string can't have embedded NULs anyhow, so we can safely
1216 // stop at first of them even if we do have any
1224 #endif // wxUSE_UNICODE
1226 // extract string of length nCount starting at nFirst
1227 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1229 size_t nLen
= length();
1231 // default value of nCount is npos and means "till the end"
1232 if ( nCount
== npos
)
1234 nCount
= nLen
- nFirst
;
1237 // out-of-bounds requests return sensible things
1238 if ( nFirst
+ nCount
> nLen
)
1240 nCount
= nLen
- nFirst
;
1243 if ( nFirst
> nLen
)
1245 // AllocCopy() will return empty string
1246 return wxEmptyString
;
1249 wxString
dest(*this, nFirst
, nCount
);
1250 if ( dest
.length() != nCount
)
1252 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1258 // check that the string starts with prefix and return the rest of the string
1259 // in the provided pointer if it is not NULL, otherwise return false
1260 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1262 if ( compare(0, prefix
.length(), prefix
) != 0 )
1267 // put the rest of the string into provided pointer
1268 rest
->assign(*this, prefix
.length(), npos
);
1275 // check that the string ends with suffix and return the rest of it in the
1276 // provided pointer if it is not NULL, otherwise return false
1277 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1279 int start
= length() - suffix
.length();
1281 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1286 // put the rest of the string into provided pointer
1287 rest
->assign(*this, 0, start
);
1294 // extract nCount last (rightmost) characters
1295 wxString
wxString::Right(size_t nCount
) const
1297 if ( nCount
> length() )
1300 wxString
dest(*this, length() - nCount
, nCount
);
1301 if ( dest
.length() != nCount
) {
1302 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1307 // get all characters after the last occurrence of ch
1308 // (returns the whole string if ch not found)
1309 wxString
wxString::AfterLast(wxUniChar ch
) const
1312 int iPos
= Find(ch
, true);
1313 if ( iPos
== wxNOT_FOUND
)
1316 str
.assign(*this, iPos
+ 1, npos
);
1321 // extract nCount first (leftmost) characters
1322 wxString
wxString::Left(size_t nCount
) const
1324 if ( nCount
> length() )
1327 wxString
dest(*this, 0, nCount
);
1328 if ( dest
.length() != nCount
) {
1329 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1334 // get all characters before the first occurrence of ch
1335 // (returns the whole string if ch not found)
1336 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1338 int iPos
= Find(ch
);
1339 if ( iPos
== wxNOT_FOUND
)
1341 return wxString(*this, 0, iPos
);
1344 /// get all characters before the last occurrence of ch
1345 /// (returns empty string if ch not found)
1346 wxString
wxString::BeforeLast(wxUniChar ch
) const
1349 int iPos
= Find(ch
, true);
1350 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1351 str
= wxString(c_str(), iPos
);
1356 /// get all characters after the first occurrence of ch
1357 /// (returns empty string if ch not found)
1358 wxString
wxString::AfterFirst(wxUniChar ch
) const
1361 int iPos
= Find(ch
);
1362 if ( iPos
!= wxNOT_FOUND
)
1363 str
.assign(*this, iPos
+ 1, npos
);
1368 // replace first (or all) occurrences of some substring with another one
1369 size_t wxString::Replace(const wxString
& strOld
,
1370 const wxString
& strNew
, bool bReplaceAll
)
1372 // if we tried to replace an empty string we'd enter an infinite loop below
1373 wxCHECK_MSG( !strOld
.empty(), 0,
1374 _T("wxString::Replace(): invalid parameter") );
1376 wxSTRING_INVALIDATE_CACHE();
1378 size_t uiCount
= 0; // count of replacements made
1380 // optimize the special common case: replacement of one character by
1381 // another one (in UTF-8 case we can only do this for ASCII characters)
1383 // benchmarks show that this special version is around 3 times faster
1384 // (depending on the proportion of matching characters and UTF-8/wchar_t
1386 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1388 const wxStringCharType chOld
= strOld
.m_impl
[0],
1389 chNew
= strNew
.m_impl
[0];
1391 // this loop is the simplified version of the one below
1392 for ( size_t pos
= 0; ; )
1394 pos
= m_impl
.find(chOld
, pos
);
1398 m_impl
[pos
++] = chNew
;
1406 else if ( !bReplaceAll
)
1408 size_t pos
= m_impl
.find(strOld
, 0);
1411 m_impl
.replace(pos
, strOld
.m_impl
.length(), strNew
.m_impl
);
1415 else // replace all occurrences
1417 const size_t uiOldLen
= strOld
.m_impl
.length();
1418 const size_t uiNewLen
= strNew
.m_impl
.length();
1420 // first scan the string to find all positions at which the replacement
1422 wxVector
<size_t> replacePositions
;
1425 for ( pos
= m_impl
.find(strOld
.m_impl
, 0);
1427 pos
= m_impl
.find(strOld
.m_impl
, pos
+ uiOldLen
))
1429 replacePositions
.push_back(pos
);
1436 // allocate enough memory for the whole new string
1438 tmp
.m_impl
.reserve(m_impl
.length() + uiCount
*(uiNewLen
- uiOldLen
));
1440 // copy this string to tmp doing replacements on the fly
1442 for ( pos
= 0; replNum
< uiCount
; replNum
++ )
1444 const size_t nextReplPos
= replacePositions
[replNum
];
1446 if ( pos
!= nextReplPos
)
1448 tmp
.m_impl
.append(m_impl
, pos
, nextReplPos
- pos
);
1451 tmp
.m_impl
.append(strNew
.m_impl
);
1452 pos
= nextReplPos
+ uiOldLen
;
1455 if ( pos
!= m_impl
.length() )
1457 // append the rest of the string unchanged
1458 tmp
.m_impl
.append(m_impl
, pos
, m_impl
.length() - pos
);
1467 bool wxString::IsAscii() const
1469 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1471 if ( !(*i
).IsAscii() )
1478 bool wxString::IsWord() const
1480 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1482 if ( !wxIsalpha(*i
) )
1489 bool wxString::IsNumber() const
1494 const_iterator i
= begin();
1496 if ( *i
== _T('-') || *i
== _T('+') )
1499 for ( ; i
!= end(); ++i
)
1501 if ( !wxIsdigit(*i
) )
1508 wxString
wxString::Strip(stripType w
) const
1511 if ( w
& leading
) s
.Trim(false);
1512 if ( w
& trailing
) s
.Trim(true);
1516 // ---------------------------------------------------------------------------
1518 // ---------------------------------------------------------------------------
1520 wxString
& wxString::MakeUpper()
1522 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1523 *it
= (wxChar
)wxToupper(*it
);
1528 wxString
& wxString::MakeLower()
1530 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1531 *it
= (wxChar
)wxTolower(*it
);
1536 wxString
& wxString::MakeCapitalized()
1538 const iterator en
= end();
1539 iterator it
= begin();
1542 *it
= (wxChar
)wxToupper(*it
);
1543 for ( ++it
; it
!= en
; ++it
)
1544 *it
= (wxChar
)wxTolower(*it
);
1550 // ---------------------------------------------------------------------------
1551 // trimming and padding
1552 // ---------------------------------------------------------------------------
1554 // some compilers (VC++ 6.0 not to name them) return true for a call to
1555 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1556 // to live with this by checking that the character is a 7 bit one - even if
1557 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1558 // space-like symbols somewhere except in the first 128 chars), it is arguably
1559 // still better than trimming away accented letters
1560 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1562 // trims spaces (in the sense of isspace) from left or right side
1563 wxString
& wxString::Trim(bool bFromRight
)
1565 // first check if we're going to modify the string at all
1568 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1569 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1575 // find last non-space character
1576 reverse_iterator psz
= rbegin();
1577 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1580 // truncate at trailing space start
1581 erase(psz
.base(), end());
1585 // find first non-space character
1586 iterator psz
= begin();
1587 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1590 // fix up data and length
1591 erase(begin(), psz
);
1598 // adds nCount characters chPad to the string from either side
1599 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1601 wxString
s(chPad
, nCount
);
1614 // truncate the string
1615 wxString
& wxString::Truncate(size_t uiLen
)
1617 if ( uiLen
< length() )
1619 erase(begin() + uiLen
, end());
1621 //else: nothing to do, string is already short enough
1626 // ---------------------------------------------------------------------------
1627 // finding (return wxNOT_FOUND if not found and index otherwise)
1628 // ---------------------------------------------------------------------------
1631 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1633 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1635 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1638 // ----------------------------------------------------------------------------
1639 // conversion to numbers
1640 // ----------------------------------------------------------------------------
1642 // The implementation of all the functions below is exactly the same so factor
1643 // it out. Note that number extraction works correctly on UTF-8 strings, so
1644 // we can use wxStringCharType and wx_str() for maximum efficiency.
1647 #define DO_IF_NOT_WINCE(x) x
1649 #define DO_IF_NOT_WINCE(x)
1652 #define WX_STRING_TO_X_TYPE_START \
1653 wxCHECK_MSG( pVal, false, _T("NULL output pointer") ); \
1654 DO_IF_NOT_WINCE( errno = 0; ) \
1655 const wxStringCharType *start = wx_str(); \
1656 wxStringCharType *end;
1658 #define WX_STRING_TO_X_TYPE_END \
1659 /* return true only if scan was stopped by the terminating NUL and */ \
1660 /* if the string was not empty to start with and no under/overflow */ \
1662 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1667 bool wxString::ToLong(long *pVal
, int base
) const
1669 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1671 WX_STRING_TO_X_TYPE_START
1672 long val
= wxStrtol(start
, &end
, base
);
1673 WX_STRING_TO_X_TYPE_END
1676 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1678 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1680 WX_STRING_TO_X_TYPE_START
1681 unsigned long val
= wxStrtoul(start
, &end
, base
);
1682 WX_STRING_TO_X_TYPE_END
1685 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1687 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1689 WX_STRING_TO_X_TYPE_START
1690 wxLongLong_t val
= wxStrtoll(start
, &end
, base
);
1691 WX_STRING_TO_X_TYPE_END
1694 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1696 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1698 WX_STRING_TO_X_TYPE_START
1699 wxULongLong_t val
= wxStrtoull(start
, &end
, base
);
1700 WX_STRING_TO_X_TYPE_END
1703 bool wxString::ToDouble(double *pVal
) const
1705 WX_STRING_TO_X_TYPE_START
1706 double val
= wxStrtod(start
, &end
);
1707 WX_STRING_TO_X_TYPE_END
1712 bool wxString::ToCLong(long *pVal
, int base
) const
1714 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1716 WX_STRING_TO_X_TYPE_START
1717 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1718 long val
= wxStrtol_lA(start
, &end
, base
, wxCLocale
);
1720 long val
= wxStrtol_l(start
, &end
, base
, wxCLocale
);
1722 WX_STRING_TO_X_TYPE_END
1725 bool wxString::ToCULong(unsigned long *pVal
, int base
) const
1727 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1729 WX_STRING_TO_X_TYPE_START
1730 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1731 unsigned long val
= wxStrtoul_lA(start
, &end
, base
, wxCLocale
);
1733 unsigned long val
= wxStrtoul_l(start
, &end
, base
, wxCLocale
);
1735 WX_STRING_TO_X_TYPE_END
1738 bool wxString::ToCDouble(double *pVal
) const
1740 WX_STRING_TO_X_TYPE_START
1741 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1742 double val
= wxStrtod_lA(start
, &end
, wxCLocale
);
1744 double val
= wxStrtod_l(start
, &end
, wxCLocale
);
1746 WX_STRING_TO_X_TYPE_END
1749 #endif // wxUSE_XLOCALE
1751 // ---------------------------------------------------------------------------
1753 // ---------------------------------------------------------------------------
1755 #if !wxUSE_UTF8_LOCALE_ONLY
1757 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1758 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1760 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1764 va_start(argptr
, format
);
1767 s
.PrintfV(format
, argptr
);
1773 #endif // !wxUSE_UTF8_LOCALE_ONLY
1775 #if wxUSE_UNICODE_UTF8
1777 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1780 va_start(argptr
, format
);
1783 s
.PrintfV(format
, argptr
);
1789 #endif // wxUSE_UNICODE_UTF8
1792 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1795 s
.PrintfV(format
, argptr
);
1799 #if !wxUSE_UTF8_LOCALE_ONLY
1800 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1801 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1803 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1807 va_start(argptr
, format
);
1809 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1810 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1811 // because it's the only cast that works safely for downcasting when
1812 // multiple inheritance is used:
1813 wxString
*str
= static_cast<wxString
*>(this);
1815 wxString
*str
= this;
1818 int iLen
= str
->PrintfV(format
, argptr
);
1824 #endif // !wxUSE_UTF8_LOCALE_ONLY
1826 #if wxUSE_UNICODE_UTF8
1827 int wxString::DoPrintfUtf8(const char *format
, ...)
1830 va_start(argptr
, format
);
1832 int iLen
= PrintfV(format
, argptr
);
1838 #endif // wxUSE_UNICODE_UTF8
1841 Uses wxVsnprintf and places the result into the this string.
1843 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1844 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1845 the ISO C99 (and thus SUSv3) standard the return value for the case of
1846 an undersized buffer is inconsistent. For conforming vsnprintf
1847 implementations the function must return the number of characters that
1848 would have been printed had the buffer been large enough. For conforming
1849 vswprintf implementations the function must return a negative number
1852 What vswprintf sets errno to is undefined but Darwin seems to set it to
1853 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1854 those are defined in the standard and backed up by several conformance
1855 statements. Note that ENOMEM mentioned in the manual page does not
1856 apply to swprintf, only wprintf and fwprintf.
1858 Official manual page:
1859 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1861 Some conformance statements (AIX, Solaris):
1862 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1863 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1865 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1866 EILSEQ and EINVAL are specifically defined to mean the error is other than
1867 an undersized buffer and no other errno are defined we treat those two
1868 as meaning hard errors and everything else gets the old behavior which
1869 is to keep looping and increasing buffer size until the function succeeds.
1871 In practice it's impossible to determine before compilation which behavior
1872 may be used. The vswprintf function may have vsnprintf-like behavior or
1873 vice-versa. Behavior detected on one release can theoretically change
1874 with an updated release. Not to mention that configure testing for it
1875 would require the test to be run on the host system, not the build system
1876 which makes cross compilation difficult. Therefore, we make no assumptions
1877 about behavior and try our best to handle every known case, including the
1878 case where wxVsnprintf returns a negative number and fails to set errno.
1880 There is yet one more non-standard implementation and that is our own.
1881 Fortunately, that can be detected at compile-time.
1883 On top of all that, ISO C99 explicitly defines snprintf to write a null
1884 character to the last position of the specified buffer. That would be at
1885 at the given buffer size minus 1. It is supposed to do this even if it
1886 turns out that the buffer is sized too small.
1888 Darwin (tested on 10.5) follows the C99 behavior exactly.
1890 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1891 errno even when it fails. However, it only seems to ever fail due
1892 to an undersized buffer.
1894 #if wxUSE_UNICODE_UTF8
1895 template<typename BufferType
>
1897 // we only need one version in non-UTF8 builds and at least two Windows
1898 // compilers have problems with this function template, so use just one
1899 // normal function here
1901 static int DoStringPrintfV(wxString
& str
,
1902 const wxString
& format
, va_list argptr
)
1908 #if wxUSE_UNICODE_UTF8
1909 BufferType
tmp(str
, size
+ 1);
1910 typename
BufferType::CharType
*buf
= tmp
;
1912 wxStringBuffer
tmp(str
, size
+ 1);
1920 // in UTF-8 build, leaving uninitialized junk in the buffer
1921 // could result in invalid non-empty UTF-8 string, so just
1922 // reset the string to empty on failure:
1927 // wxVsnprintf() may modify the original arg pointer, so pass it
1930 wxVaCopy(argptrcopy
, argptr
);
1933 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1936 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1939 // some implementations of vsnprintf() don't NUL terminate
1940 // the string if there is not enough space for it so
1941 // always do it manually
1942 // FIXME: This really seems to be the wrong and would be an off-by-one
1943 // bug except the code above allocates an extra character.
1944 buf
[size
] = _T('\0');
1946 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1947 // total number of characters which would have been written if the
1948 // buffer were large enough (newer standards such as Unix98)
1951 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1952 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1953 // is true if *both* of them use our own implementation,
1954 // otherwise we can't be sure
1955 #if wxUSE_WXVSNPRINTF
1956 // we know that our own implementation of wxVsnprintf() returns -1
1957 // only for a format error - thus there's something wrong with
1958 // the user's format string
1961 #else // possibly using system version
1962 // assume it only returns error if there is not enough space, but
1963 // as we don't know how much we need, double the current size of
1966 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1967 // If errno was set to one of the two well-known hard errors
1968 // then fail immediately to avoid an infinite loop.
1971 #endif // __WXWINCE__
1972 // still not enough, as we don't know how much we need, double the
1973 // current size of the buffer
1975 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1977 else if ( len
>= size
)
1979 #if wxUSE_WXVSNPRINTF
1980 // we know that our own implementation of wxVsnprintf() returns
1981 // size+1 when there's not enough space but that's not the size
1982 // of the required buffer!
1983 size
*= 2; // so we just double the current size of the buffer
1985 // some vsnprintf() implementations NUL-terminate the buffer and
1986 // some don't in len == size case, to be safe always add 1
1987 // FIXME: I don't quite understand this comment. The vsnprintf
1988 // function is specifically defined to return the number of
1989 // characters printed not including the null terminator.
1990 // So OF COURSE you need to add 1 to get the right buffer size.
1991 // The following line is definitely correct, no question.
1995 else // ok, there was enough space
2001 // we could have overshot
2004 return str
.length();
2007 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
2009 #if wxUSE_UNICODE_UTF8
2010 #if wxUSE_STL_BASED_WXSTRING
2011 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
2013 typedef wxStringInternalBuffer Utf8Buffer
;
2017 #if wxUSE_UTF8_LOCALE_ONLY
2018 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2020 #if wxUSE_UNICODE_UTF8
2021 if ( wxLocaleIsUtf8
)
2022 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2025 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
2027 return DoStringPrintfV(*this, format
, argptr
);
2028 #endif // UTF8/WCHAR
2032 // ----------------------------------------------------------------------------
2033 // misc other operations
2034 // ----------------------------------------------------------------------------
2036 // returns true if the string matches the pattern which may contain '*' and
2037 // '?' metacharacters (as usual, '?' matches any character and '*' any number
2039 bool wxString::Matches(const wxString
& mask
) const
2041 // I disable this code as it doesn't seem to be faster (in fact, it seems
2042 // to be much slower) than the old, hand-written code below and using it
2043 // here requires always linking with libregex even if the user code doesn't
2045 #if 0 // wxUSE_REGEX
2046 // first translate the shell-like mask into a regex
2048 pattern
.reserve(wxStrlen(pszMask
));
2060 pattern
+= _T(".*");
2071 // these characters are special in a RE, quote them
2072 // (however note that we don't quote '[' and ']' to allow
2073 // using them for Unix shell like matching)
2074 pattern
+= _T('\\');
2078 pattern
+= *pszMask
;
2086 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
2087 #else // !wxUSE_REGEX
2088 // TODO: this is, of course, awfully inefficient...
2090 // FIXME-UTF8: implement using iterators, remove #if
2091 #if wxUSE_UNICODE_UTF8
2092 const wxScopedWCharBuffer maskBuf
= mask
.wc_str();
2093 const wxScopedWCharBuffer txtBuf
= wc_str();
2094 const wxChar
*pszMask
= maskBuf
.data();
2095 const wxChar
*pszTxt
= txtBuf
.data();
2097 const wxChar
*pszMask
= mask
.wx_str();
2098 // the char currently being checked
2099 const wxChar
*pszTxt
= wx_str();
2102 // the last location where '*' matched
2103 const wxChar
*pszLastStarInText
= NULL
;
2104 const wxChar
*pszLastStarInMask
= NULL
;
2107 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
2108 switch ( *pszMask
) {
2110 if ( *pszTxt
== wxT('\0') )
2113 // pszTxt and pszMask will be incremented in the loop statement
2119 // remember where we started to be able to backtrack later
2120 pszLastStarInText
= pszTxt
;
2121 pszLastStarInMask
= pszMask
;
2123 // ignore special chars immediately following this one
2124 // (should this be an error?)
2125 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2128 // if there is nothing more, match
2129 if ( *pszMask
== wxT('\0') )
2132 // are there any other metacharacters in the mask?
2134 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2136 if ( pEndMask
!= NULL
) {
2137 // we have to match the string between two metachars
2138 uiLenMask
= pEndMask
- pszMask
;
2141 // we have to match the remainder of the string
2142 uiLenMask
= wxStrlen(pszMask
);
2145 wxString
strToMatch(pszMask
, uiLenMask
);
2146 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2147 if ( pMatch
== NULL
)
2150 // -1 to compensate "++" in the loop
2151 pszTxt
= pMatch
+ uiLenMask
- 1;
2152 pszMask
+= uiLenMask
- 1;
2157 if ( *pszMask
!= *pszTxt
)
2163 // match only if nothing left
2164 if ( *pszTxt
== wxT('\0') )
2167 // if we failed to match, backtrack if we can
2168 if ( pszLastStarInText
) {
2169 pszTxt
= pszLastStarInText
+ 1;
2170 pszMask
= pszLastStarInMask
;
2172 pszLastStarInText
= NULL
;
2174 // don't bother resetting pszLastStarInMask, it's unnecessary
2180 #endif // wxUSE_REGEX/!wxUSE_REGEX
2183 // Count the number of chars
2184 int wxString::Freq(wxUniChar ch
) const
2187 for ( const_iterator i
= begin(); i
!= end(); ++i
)