1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
39 #include "wx/hashmap.h"
40 #include "wx/vector.h"
41 #include "wx/xlocale.h"
44 #include "wx/msw/wrapwin.h"
47 // string handling functions used by wxString:
48 #if wxUSE_UNICODE_UTF8
49 #define wxStringMemcpy memcpy
50 #define wxStringMemcmp memcmp
51 #define wxStringMemchr memchr
52 #define wxStringStrlen strlen
54 #define wxStringMemcpy wxTmemcpy
55 #define wxStringMemcmp wxTmemcmp
56 #define wxStringMemchr wxTmemchr
57 #define wxStringStrlen wxStrlen
60 // define a function declared in wx/buffer.h here as we don't have buffer.cpp
61 // and don't want to add it just because of this simple function
65 // wxXXXBuffer classes can be (implicitly) used during global statics
66 // initialization so wrap the status UntypedBufferData variable in a function
67 // to make it safe to access it even before all global statics are initialized
68 UntypedBufferData
*GetUntypedNullData()
70 static UntypedBufferData
s_untypedNullData(NULL
, 0);
72 return &s_untypedNullData
;
75 } // namespace wxPrivate
77 // ---------------------------------------------------------------------------
78 // static class variables definition
79 // ---------------------------------------------------------------------------
81 //According to STL _must_ be a -1 size_t
82 const size_t wxString::npos
= (size_t) -1;
84 #if wxUSE_STRING_POS_CACHE
86 #ifdef wxHAS_COMPILER_TLS
88 wxTLS_TYPE(wxString::Cache
) wxString::ms_cache
;
90 #else // !wxHAS_COMPILER_TLS
92 struct wxStrCacheInitializer
94 wxStrCacheInitializer()
96 // calling this function triggers s_cache initialization in it, and
97 // from now on it becomes safe to call from multiple threads
103 wxString::Cache& wxString::GetCache()
105 static wxTLS_TYPE(Cache) s_cache;
107 return wxTLS_VALUE(s_cache);
111 static wxStrCacheInitializer gs_stringCacheInit
;
113 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
115 // gdb seems to be unable to display thread-local variables correctly, at least
116 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
117 #if wxDEBUG_LEVEL >= 2
119 struct wxStrCacheDumper
121 static void ShowAll()
123 puts("*** wxString cache dump:");
124 for ( unsigned n
= 0; n
< wxString::Cache::SIZE
; n
++ )
126 const wxString::Cache::Element
&
127 c
= wxString::GetCacheBegin()[n
];
129 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
131 n
== wxString::LastUsedCacheElement() ? " [*]" : "",
133 (unsigned long)c
.pos
,
134 (unsigned long)c
.impl
,
140 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
142 #endif // wxDEBUG_LEVEL >= 2
144 #ifdef wxPROFILE_STRING_CACHE
146 wxString::CacheStats
wxString::ms_cacheStats
;
148 struct wxStrCacheStatsDumper
150 ~wxStrCacheStatsDumper()
152 const wxString::CacheStats
& stats
= wxString::ms_cacheStats
;
156 puts("*** wxString cache statistics:");
157 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
159 printf("\tHits %u (of which %u not used) or %.2f%%\n",
162 100.*float(stats
.poshits
- stats
.mishits
)/stats
.postot
);
163 printf("\tAverage position requested: %.2f\n",
164 float(stats
.sumpos
) / stats
.postot
);
165 printf("\tAverage offset after cached hint: %.2f\n",
166 float(stats
.sumofs
) / stats
.postot
);
171 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
172 stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
);
177 static wxStrCacheStatsDumper s_showCacheStats
;
179 #endif // wxPROFILE_STRING_CACHE
181 #endif // wxUSE_STRING_POS_CACHE
183 // ----------------------------------------------------------------------------
185 // ----------------------------------------------------------------------------
187 #if wxUSE_STD_IOSTREAM
191 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
193 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
194 const wxScopedCharBuffer
buf(str
.AsCharBuf());
196 os
.clear(wxSTD
ios_base::failbit
);
202 return os
<< str
.AsInternal();
206 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
208 return os
<< str
.c_str();
211 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedCharBuffer
& str
)
213 return os
<< str
.data();
217 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedWCharBuffer
& str
)
219 return os
<< str
.data();
223 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
225 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
227 return wos
<< str
.wc_str();
230 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
232 return wos
<< str
.AsWChar();
235 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxScopedWCharBuffer
& str
)
237 return wos
<< str
.data();
240 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
242 #endif // wxUSE_STD_IOSTREAM
244 // ===========================================================================
245 // wxString class core
246 // ===========================================================================
248 #if wxUSE_UNICODE_UTF8
250 void wxString::PosLenToImpl(size_t pos
, size_t len
,
251 size_t *implPos
, size_t *implLen
) const
257 else // have valid start position
259 const const_iterator b
= GetIterForNthChar(pos
);
260 *implPos
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin();
265 else // have valid length too
267 // we need to handle the case of length specifying a substring
268 // going beyond the end of the string, just as std::string does
269 const const_iterator
e(end());
271 while ( len
&& i
<= e
)
277 *implLen
= i
.impl() - b
.impl();
282 #endif // wxUSE_UNICODE_UTF8
284 // ----------------------------------------------------------------------------
285 // wxCStrData converted strings caching
286 // ----------------------------------------------------------------------------
288 // FIXME-UTF8: temporarily disabled because it doesn't work with global
289 // string objects; re-enable after fixing this bug and benchmarking
290 // performance to see if using a hash is a good idea at all
293 // For backward compatibility reasons, it must be possible to assign the value
294 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
295 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
296 // because the memory would be freed immediately, but it has to be valid as long
297 // as the string is not modified, so that code like this still works:
299 // const wxChar *s = str.c_str();
300 // while ( s ) { ... }
302 // FIXME-UTF8: not thread safe!
303 // FIXME-UTF8: we currently clear the cached conversion only when the string is
304 // destroyed, but we should do it when the string is modified, to
305 // keep memory usage down
306 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
307 // invalidated the cache on every change, we could keep the previous
309 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
310 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
313 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
315 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
316 if ( i
!= hash
.end() )
324 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
325 // so we have to use wxString* here and const-cast when used
326 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
327 wxStringCharConversionCache
);
328 static wxStringCharConversionCache gs_stringsCharCache
;
330 const char* wxCStrData::AsChar() const
332 // remove previously cache value, if any (see FIXMEs above):
333 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
335 // convert the string and keep it:
336 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
337 m_str
->mb_str().release();
341 #endif // wxUSE_UNICODE
343 #if !wxUSE_UNICODE_WCHAR
344 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
345 wxStringWCharConversionCache
);
346 static wxStringWCharConversionCache gs_stringsWCharCache
;
348 const wchar_t* wxCStrData::AsWChar() const
350 // remove previously cache value, if any (see FIXMEs above):
351 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
353 // convert the string and keep it:
354 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
355 m_str
->wc_str().release();
359 #endif // !wxUSE_UNICODE_WCHAR
361 wxString::~wxString()
364 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
365 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
367 #if !wxUSE_UNICODE_WCHAR
368 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
373 // ===========================================================================
374 // wxString class core
375 // ===========================================================================
377 // ---------------------------------------------------------------------------
378 // construction and conversion
379 // ---------------------------------------------------------------------------
381 #if wxUSE_UNICODE_WCHAR
383 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
384 const wxMBConv
& conv
)
387 if ( !psz
|| nLength
== 0 )
388 return SubstrBufFromMB(wxWCharBuffer(L
""), 0);
390 if ( nLength
== npos
)
394 wxScopedWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
396 return SubstrBufFromMB(wxWCharBuffer(L
""), 0);
398 return SubstrBufFromMB(wcBuf
, wcLen
);
400 #endif // wxUSE_UNICODE_WCHAR
402 #if wxUSE_UNICODE_UTF8
404 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
405 const wxMBConv
& conv
)
408 if ( !psz
|| nLength
== 0 )
409 return SubstrBufFromMB(wxCharBuffer(""), 0);
411 // if psz is already in UTF-8, we don't have to do the roundtrip to
412 // wchar_t* and back:
415 // we need to validate the input because UTF8 iterators assume valid
416 // UTF-8 sequence and psz may be invalid:
417 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
419 // we must pass the real string length to SubstrBufFromMB ctor
420 if ( nLength
== npos
)
421 nLength
= psz
? strlen(psz
) : 0;
422 return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz
, nLength
),
425 // else: do the roundtrip through wchar_t*
428 if ( nLength
== npos
)
431 // first convert to wide string:
433 wxScopedWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
435 return SubstrBufFromMB(wxCharBuffer(""), 0);
437 // and then to UTF-8:
438 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
439 // widechar -> UTF-8 conversion isn't supposed to ever fail:
440 wxASSERT_MSG( buf
.data
, wxT("conversion to UTF-8 failed") );
444 #endif // wxUSE_UNICODE_UTF8
446 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
448 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
449 const wxMBConv
& conv
)
452 if ( !pwz
|| nLength
== 0 )
453 return SubstrBufFromWC(wxCharBuffer(""), 0);
455 if ( nLength
== npos
)
459 wxScopedCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
461 return SubstrBufFromWC(wxCharBuffer(""), 0);
463 return SubstrBufFromWC(mbBuf
, mbLen
);
465 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
467 // This std::string::c_str()-like method returns a wide char pointer to string
468 // contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return
469 // a pointer to the internal representation. Otherwise a conversion is required
470 // and it returns a temporary buffer.
472 // However for compatibility with c_str() and to avoid breaking existing code
475 // for ( const wchar_t *p = s.wc_str(); *p; p++ )
478 // we actually need to ensure that the returned buffer is _not_ temporary and
479 // so we use wxString::m_convertedToWChar to store the returned data
480 #if !wxUSE_UNICODE_WCHAR
482 const wchar_t *wxString::AsWChar(const wxMBConv
& conv
) const
484 const char * const strMB
= m_impl
.c_str();
485 const size_t lenMB
= m_impl
.length();
487 // find out the size of the buffer needed
488 const size_t lenWC
= conv
.ToWChar(NULL
, 0, strMB
, lenMB
);
489 if ( lenWC
== wxCONV_FAILED
)
492 // keep the same buffer if the string size didn't change: this is not only
493 // an optimization but also ensure that code which modifies string
494 // character by character (without changing its length) can continue to use
495 // the pointer returned by a previous wc_str() call even after changing the
498 // TODO-UTF8: we could check for ">" instead of "!=" here as this would
499 // allow to save on buffer reallocations but at the cost of
500 // consuming (even) more memory, we should benchmark this to
501 // determine if it's worth doing
502 if ( !m_convertedToWChar
.m_str
|| lenWC
!= m_convertedToWChar
.m_len
)
504 if ( !const_cast<wxString
*>(this)->m_convertedToWChar
.Extend(lenWC
) )
508 // finally do convert
509 m_convertedToWChar
.m_str
[lenWC
] = L
'\0';
510 if ( conv
.ToWChar(m_convertedToWChar
.m_str
, lenWC
,
511 strMB
, lenMB
) == wxCONV_FAILED
)
514 return m_convertedToWChar
.m_str
;
517 #endif // !wxUSE_UNICODE_WCHAR
520 // Same thing for mb_str() which returns a normal char pointer to string
521 // contents: this always requires converting it to the specified encoding in
522 // non-ANSI build except if we need to convert to UTF-8 and this is what we
523 // already use internally.
526 const char *wxString::AsChar(const wxMBConv
& conv
) const
528 #if wxUSE_UNICODE_UTF8
530 return m_impl
.c_str();
532 const wchar_t * const strWC
= AsWChar(wxMBConvStrictUTF8());
533 const size_t lenWC
= m_convertedToWChar
.m_len
;
534 #else // wxUSE_UNICODE_WCHAR
535 const wchar_t * const strWC
= m_impl
.c_str();
536 const size_t lenWC
= m_impl
.length();
537 #endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR
539 const size_t lenMB
= conv
.FromWChar(NULL
, 0, strWC
, lenWC
);
540 if ( lenMB
== wxCONV_FAILED
)
543 if ( !m_convertedToChar
.m_str
|| lenMB
!= m_convertedToChar
.m_len
)
545 if ( !const_cast<wxString
*>(this)->m_convertedToChar
.Extend(lenMB
) )
549 m_convertedToChar
.m_str
[lenMB
] = '\0';
550 if ( conv
.FromWChar(m_convertedToChar
.m_str
, lenMB
,
551 strWC
, lenWC
) == wxCONV_FAILED
)
554 return m_convertedToChar
.m_str
;
557 #endif // wxUSE_UNICODE
559 // shrink to minimal size (releasing extra memory)
560 bool wxString::Shrink()
562 wxString
tmp(begin(), end());
564 return tmp
.length() == length();
567 // deprecated compatibility code:
568 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
569 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
571 return DoGetWriteBuf(nLen
);
574 void wxString::UngetWriteBuf()
579 void wxString::UngetWriteBuf(size_t nLen
)
581 DoUngetWriteBuf(nLen
);
583 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
586 // ---------------------------------------------------------------------------
588 // ---------------------------------------------------------------------------
590 // all functions are inline in string.h
592 // ---------------------------------------------------------------------------
593 // concatenation operators
594 // ---------------------------------------------------------------------------
597 * concatenation functions come in 5 flavours:
599 * char + string and string + char
600 * C str + string and string + C str
603 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
605 #if !wxUSE_STL_BASED_WXSTRING
606 wxASSERT( str1
.IsValid() );
607 wxASSERT( str2
.IsValid() );
616 wxString
operator+(const wxString
& str
, wxUniChar ch
)
618 #if !wxUSE_STL_BASED_WXSTRING
619 wxASSERT( str
.IsValid() );
628 wxString
operator+(wxUniChar ch
, const wxString
& str
)
630 #if !wxUSE_STL_BASED_WXSTRING
631 wxASSERT( str
.IsValid() );
640 wxString
operator+(const wxString
& str
, const char *psz
)
642 #if !wxUSE_STL_BASED_WXSTRING
643 wxASSERT( str
.IsValid() );
647 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
648 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
656 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
658 #if !wxUSE_STL_BASED_WXSTRING
659 wxASSERT( str
.IsValid() );
663 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
664 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
672 wxString
operator+(const char *psz
, const wxString
& str
)
674 #if !wxUSE_STL_BASED_WXSTRING
675 wxASSERT( str
.IsValid() );
679 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
680 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
688 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
690 #if !wxUSE_STL_BASED_WXSTRING
691 wxASSERT( str
.IsValid() );
695 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
696 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
704 // ---------------------------------------------------------------------------
706 // ---------------------------------------------------------------------------
708 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
710 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
711 : wxToupper(GetChar(0u)) == wxToupper(c
));
714 #ifdef HAVE_STD_STRING_COMPARE
716 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
717 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
718 // sort strings in characters code point order by sorting the byte sequence
719 // in byte values order (i.e. what strcmp() and memcmp() do).
721 int wxString::compare(const wxString
& str
) const
723 return m_impl
.compare(str
.m_impl
);
726 int wxString::compare(size_t nStart
, size_t nLen
,
727 const wxString
& str
) const
730 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
731 return m_impl
.compare(pos
, len
, str
.m_impl
);
734 int wxString::compare(size_t nStart
, size_t nLen
,
736 size_t nStart2
, size_t nLen2
) const
739 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
742 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
744 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
747 int wxString::compare(const char* sz
) const
749 return m_impl
.compare(ImplStr(sz
));
752 int wxString::compare(const wchar_t* sz
) const
754 return m_impl
.compare(ImplStr(sz
));
757 int wxString::compare(size_t nStart
, size_t nLen
,
758 const char* sz
, size_t nCount
) const
761 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
763 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
765 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
768 int wxString::compare(size_t nStart
, size_t nLen
,
769 const wchar_t* sz
, size_t nCount
) const
772 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
774 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
776 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
779 #else // !HAVE_STD_STRING_COMPARE
781 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
782 const wxStringCharType
* s2
, size_t l2
)
785 return wxStringMemcmp(s1
, s2
, l1
);
788 int ret
= wxStringMemcmp(s1
, s2
, l1
);
789 return ret
== 0 ? -1 : ret
;
793 int ret
= wxStringMemcmp(s1
, s2
, l2
);
794 return ret
== 0 ? +1 : ret
;
798 int wxString::compare(const wxString
& str
) const
800 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
801 str
.m_impl
.data(), str
.m_impl
.length());
804 int wxString::compare(size_t nStart
, size_t nLen
,
805 const wxString
& str
) const
807 wxASSERT(nStart
<= length());
808 size_type strLen
= length() - nStart
;
809 nLen
= strLen
< nLen
? strLen
: nLen
;
812 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
814 return ::wxDoCmp(m_impl
.data() + pos
, len
,
815 str
.m_impl
.data(), str
.m_impl
.length());
818 int wxString::compare(size_t nStart
, size_t nLen
,
820 size_t nStart2
, size_t nLen2
) const
822 wxASSERT(nStart
<= length());
823 wxASSERT(nStart2
<= str
.length());
824 size_type strLen
= length() - nStart
,
825 strLen2
= str
.length() - nStart2
;
826 nLen
= strLen
< nLen
? strLen
: nLen
;
827 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
830 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
832 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
834 return ::wxDoCmp(m_impl
.data() + pos
, len
,
835 str
.m_impl
.data() + pos2
, len2
);
838 int wxString::compare(const char* sz
) const
840 SubstrBufFromMB
str(ImplStr(sz
, npos
));
841 if ( str
.len
== npos
)
842 str
.len
= wxStringStrlen(str
.data
);
843 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
846 int wxString::compare(const wchar_t* sz
) const
848 SubstrBufFromWC
str(ImplStr(sz
, npos
));
849 if ( str
.len
== npos
)
850 str
.len
= wxStringStrlen(str
.data
);
851 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
854 int wxString::compare(size_t nStart
, size_t nLen
,
855 const char* sz
, size_t nCount
) const
857 wxASSERT(nStart
<= length());
858 size_type strLen
= length() - nStart
;
859 nLen
= strLen
< nLen
? strLen
: nLen
;
862 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
864 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
865 if ( str
.len
== npos
)
866 str
.len
= wxStringStrlen(str
.data
);
868 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
871 int wxString::compare(size_t nStart
, size_t nLen
,
872 const wchar_t* sz
, size_t nCount
) const
874 wxASSERT(nStart
<= length());
875 size_type strLen
= length() - nStart
;
876 nLen
= strLen
< nLen
? strLen
: nLen
;
879 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
881 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
882 if ( str
.len
== npos
)
883 str
.len
= wxStringStrlen(str
.data
);
885 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
888 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
891 // ---------------------------------------------------------------------------
892 // find_{first,last}_[not]_of functions
893 // ---------------------------------------------------------------------------
895 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
897 // NB: All these functions are implemented with the argument being wxChar*,
898 // i.e. widechar string in any Unicode build, even though native string
899 // representation is char* in the UTF-8 build. This is because we couldn't
900 // use memchr() to determine if a character is in a set encoded as UTF-8.
902 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
904 return find_first_of(sz
, nStart
, wxStrlen(sz
));
907 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
909 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
912 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
914 wxASSERT_MSG( nStart
<= length(), wxT("invalid index") );
917 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
919 if ( wxTmemchr(sz
, *i
, n
) )
926 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
928 wxASSERT_MSG( nStart
<= length(), wxT("invalid index") );
931 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
933 if ( !wxTmemchr(sz
, *i
, n
) )
941 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
943 return find_last_of(sz
, nStart
, wxStrlen(sz
));
946 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
948 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
951 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
953 size_t len
= length();
955 if ( nStart
== npos
)
961 wxASSERT_MSG( nStart
<= len
, wxT("invalid index") );
965 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
966 i
!= rend(); --idx
, ++i
)
968 if ( wxTmemchr(sz
, *i
, n
) )
975 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
977 size_t len
= length();
979 if ( nStart
== npos
)
985 wxASSERT_MSG( nStart
<= len
, wxT("invalid index") );
989 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
990 i
!= rend(); --idx
, ++i
)
992 if ( !wxTmemchr(sz
, *i
, n
) )
999 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
1001 wxASSERT_MSG( nStart
<= length(), wxT("invalid index") );
1003 size_t idx
= nStart
;
1004 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1013 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1015 size_t len
= length();
1017 if ( nStart
== npos
)
1023 wxASSERT_MSG( nStart
<= len
, wxT("invalid index") );
1026 size_t idx
= nStart
;
1027 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1028 i
!= rend(); --idx
, ++i
)
1037 // the functions above were implemented for wchar_t* arguments in Unicode
1038 // build and char* in ANSI build; below are implementations for the other
1041 #define wxOtherCharType char
1042 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1044 #define wxOtherCharType wchar_t
1045 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1048 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1049 { return find_first_of(STRCONV(sz
), nStart
); }
1051 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1053 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1054 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1055 { return find_last_of(STRCONV(sz
), nStart
); }
1056 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1058 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1059 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1060 { return find_first_not_of(STRCONV(sz
), nStart
); }
1061 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1063 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1064 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1065 { return find_last_not_of(STRCONV(sz
), nStart
); }
1066 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1068 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1070 #undef wxOtherCharType
1073 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1075 // ===========================================================================
1076 // other common string functions
1077 // ===========================================================================
1079 int wxString::CmpNoCase(const wxString
& s
) const
1081 #if defined(__WXMSW__) && !wxUSE_UNICODE_UTF8
1082 // prefer to use CompareString() if available as it's more efficient than
1083 // doing it manual or even using wxStricmp() (see #10375)
1084 switch ( ::CompareString(LOCALE_USER_DEFAULT
, NORM_IGNORECASE
,
1085 m_impl
.c_str(), m_impl
.length(),
1086 s
.m_impl
.c_str(), s
.m_impl
.length()) )
1088 case CSTR_LESS_THAN
:
1094 case CSTR_GREATER_THAN
:
1098 wxFAIL_MSG( "unexpected CompareString() return value" );
1102 wxLogLastError("CompareString");
1103 // use generic code below
1105 #endif // __WXMSW__ && !wxUSE_UNICODE_UTF8
1107 // do the comparison manually: notice that we can't use wxStricmp() as it
1108 // doesn't handle embedded NULs
1110 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1111 const_iterator i1
= begin();
1112 const_iterator end1
= end();
1113 const_iterator i2
= s
.begin();
1114 const_iterator end2
= s
.end();
1116 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1118 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1119 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1120 if ( lower1
!= lower2
)
1121 return lower1
< lower2
? -1 : 1;
1124 size_t len1
= length();
1125 size_t len2
= s
.length();
1129 else if ( len1
> len2
)
1138 #ifndef __SCHAR_MAX__
1139 #define __SCHAR_MAX__ 127
1143 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1145 if (!ascii
|| len
== 0)
1146 return wxEmptyString
;
1151 wxStringInternalBuffer
buf(res
, len
);
1152 wxStringCharType
*dest
= buf
;
1154 for ( ; len
> 0; --len
)
1156 unsigned char c
= (unsigned char)*ascii
++;
1157 wxASSERT_MSG( c
< 0x80,
1158 wxT("Non-ASCII value passed to FromAscii().") );
1160 *dest
++ = (wchar_t)c
;
1167 wxString
wxString::FromAscii(const char *ascii
)
1169 return FromAscii(ascii
, wxStrlen(ascii
));
1172 wxString
wxString::FromAscii(char ascii
)
1174 // What do we do with '\0' ?
1176 unsigned char c
= (unsigned char)ascii
;
1178 wxASSERT_MSG( c
< 0x80, wxT("Non-ASCII value passed to FromAscii().") );
1180 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1181 return wxString(wxUniChar((wchar_t)c
));
1184 const wxScopedCharBuffer
wxString::ToAscii() const
1186 // this will allocate enough space for the terminating NUL too
1187 wxCharBuffer
buffer(length());
1188 char *dest
= buffer
.data();
1190 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1193 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1194 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1196 // the output string can't have embedded NULs anyhow, so we can safely
1197 // stop at first of them even if we do have any
1205 #endif // wxUSE_UNICODE
1207 // extract string of length nCount starting at nFirst
1208 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1210 size_t nLen
= length();
1212 // default value of nCount is npos and means "till the end"
1213 if ( nCount
== npos
)
1215 nCount
= nLen
- nFirst
;
1218 // out-of-bounds requests return sensible things
1219 if ( nFirst
+ nCount
> nLen
)
1221 nCount
= nLen
- nFirst
;
1224 if ( nFirst
> nLen
)
1226 // AllocCopy() will return empty string
1227 return wxEmptyString
;
1230 wxString
dest(*this, nFirst
, nCount
);
1231 if ( dest
.length() != nCount
)
1233 wxFAIL_MSG( wxT("out of memory in wxString::Mid") );
1239 // check that the string starts with prefix and return the rest of the string
1240 // in the provided pointer if it is not NULL, otherwise return false
1241 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1243 if ( compare(0, prefix
.length(), prefix
) != 0 )
1248 // put the rest of the string into provided pointer
1249 rest
->assign(*this, prefix
.length(), npos
);
1256 // check that the string ends with suffix and return the rest of it in the
1257 // provided pointer if it is not NULL, otherwise return false
1258 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1260 int start
= length() - suffix
.length();
1262 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1267 // put the rest of the string into provided pointer
1268 rest
->assign(*this, 0, start
);
1275 // extract nCount last (rightmost) characters
1276 wxString
wxString::Right(size_t nCount
) const
1278 if ( nCount
> length() )
1281 wxString
dest(*this, length() - nCount
, nCount
);
1282 if ( dest
.length() != nCount
) {
1283 wxFAIL_MSG( wxT("out of memory in wxString::Right") );
1288 // get all characters after the last occurrence of ch
1289 // (returns the whole string if ch not found)
1290 wxString
wxString::AfterLast(wxUniChar ch
) const
1293 int iPos
= Find(ch
, true);
1294 if ( iPos
== wxNOT_FOUND
)
1297 str
.assign(*this, iPos
+ 1, npos
);
1302 // extract nCount first (leftmost) characters
1303 wxString
wxString::Left(size_t nCount
) const
1305 if ( nCount
> length() )
1308 wxString
dest(*this, 0, nCount
);
1309 if ( dest
.length() != nCount
) {
1310 wxFAIL_MSG( wxT("out of memory in wxString::Left") );
1315 // get all characters before the first occurrence of ch
1316 // (returns the whole string if ch not found)
1317 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1319 int iPos
= Find(ch
);
1320 if ( iPos
== wxNOT_FOUND
)
1322 return wxString(*this, 0, iPos
);
1325 /// get all characters before the last occurrence of ch
1326 /// (returns empty string if ch not found)
1327 wxString
wxString::BeforeLast(wxUniChar ch
) const
1330 int iPos
= Find(ch
, true);
1331 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1332 str
= wxString(c_str(), iPos
);
1337 /// get all characters after the first occurrence of ch
1338 /// (returns empty string if ch not found)
1339 wxString
wxString::AfterFirst(wxUniChar ch
) const
1342 int iPos
= Find(ch
);
1343 if ( iPos
!= wxNOT_FOUND
)
1344 str
.assign(*this, iPos
+ 1, npos
);
1349 // replace first (or all) occurrences of some substring with another one
1350 size_t wxString::Replace(const wxString
& strOld
,
1351 const wxString
& strNew
, bool bReplaceAll
)
1353 // if we tried to replace an empty string we'd enter an infinite loop below
1354 wxCHECK_MSG( !strOld
.empty(), 0,
1355 wxT("wxString::Replace(): invalid parameter") );
1357 wxSTRING_INVALIDATE_CACHE();
1359 size_t uiCount
= 0; // count of replacements made
1361 // optimize the special common case: replacement of one character by
1362 // another one (in UTF-8 case we can only do this for ASCII characters)
1364 // benchmarks show that this special version is around 3 times faster
1365 // (depending on the proportion of matching characters and UTF-8/wchar_t
1367 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1369 const wxStringCharType chOld
= strOld
.m_impl
[0],
1370 chNew
= strNew
.m_impl
[0];
1372 // this loop is the simplified version of the one below
1373 for ( size_t pos
= 0; ; )
1375 pos
= m_impl
.find(chOld
, pos
);
1379 m_impl
[pos
++] = chNew
;
1387 else if ( !bReplaceAll
)
1389 size_t pos
= m_impl
.find(strOld
, 0);
1392 m_impl
.replace(pos
, strOld
.m_impl
.length(), strNew
.m_impl
);
1396 else // replace all occurrences
1398 const size_t uiOldLen
= strOld
.m_impl
.length();
1399 const size_t uiNewLen
= strNew
.m_impl
.length();
1401 // first scan the string to find all positions at which the replacement
1403 wxVector
<size_t> replacePositions
;
1406 for ( pos
= m_impl
.find(strOld
.m_impl
, 0);
1408 pos
= m_impl
.find(strOld
.m_impl
, pos
+ uiOldLen
))
1410 replacePositions
.push_back(pos
);
1417 // allocate enough memory for the whole new string
1419 tmp
.m_impl
.reserve(m_impl
.length() + uiCount
*(uiNewLen
- uiOldLen
));
1421 // copy this string to tmp doing replacements on the fly
1423 for ( pos
= 0; replNum
< uiCount
; replNum
++ )
1425 const size_t nextReplPos
= replacePositions
[replNum
];
1427 if ( pos
!= nextReplPos
)
1429 tmp
.m_impl
.append(m_impl
, pos
, nextReplPos
- pos
);
1432 tmp
.m_impl
.append(strNew
.m_impl
);
1433 pos
= nextReplPos
+ uiOldLen
;
1436 if ( pos
!= m_impl
.length() )
1438 // append the rest of the string unchanged
1439 tmp
.m_impl
.append(m_impl
, pos
, m_impl
.length() - pos
);
1448 bool wxString::IsAscii() const
1450 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1452 if ( !(*i
).IsAscii() )
1459 bool wxString::IsWord() const
1461 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1463 if ( !wxIsalpha(*i
) )
1470 bool wxString::IsNumber() const
1475 const_iterator i
= begin();
1477 if ( *i
== wxT('-') || *i
== wxT('+') )
1480 for ( ; i
!= end(); ++i
)
1482 if ( !wxIsdigit(*i
) )
1489 wxString
wxString::Strip(stripType w
) const
1492 if ( w
& leading
) s
.Trim(false);
1493 if ( w
& trailing
) s
.Trim(true);
1497 // ---------------------------------------------------------------------------
1499 // ---------------------------------------------------------------------------
1501 wxString
& wxString::MakeUpper()
1503 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1504 *it
= (wxChar
)wxToupper(*it
);
1509 wxString
& wxString::MakeLower()
1511 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1512 *it
= (wxChar
)wxTolower(*it
);
1517 wxString
& wxString::MakeCapitalized()
1519 const iterator en
= end();
1520 iterator it
= begin();
1523 *it
= (wxChar
)wxToupper(*it
);
1524 for ( ++it
; it
!= en
; ++it
)
1525 *it
= (wxChar
)wxTolower(*it
);
1531 // ---------------------------------------------------------------------------
1532 // trimming and padding
1533 // ---------------------------------------------------------------------------
1535 // some compilers (VC++ 6.0 not to name them) return true for a call to
1536 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1537 // to live with this by checking that the character is a 7 bit one - even if
1538 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1539 // space-like symbols somewhere except in the first 128 chars), it is arguably
1540 // still better than trimming away accented letters
1541 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1543 // trims spaces (in the sense of isspace) from left or right side
1544 wxString
& wxString::Trim(bool bFromRight
)
1546 // first check if we're going to modify the string at all
1549 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1550 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1556 // find last non-space character
1557 reverse_iterator psz
= rbegin();
1558 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1561 // truncate at trailing space start
1562 erase(psz
.base(), end());
1566 // find first non-space character
1567 iterator psz
= begin();
1568 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1571 // fix up data and length
1572 erase(begin(), psz
);
1579 // adds nCount characters chPad to the string from either side
1580 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1582 wxString
s(chPad
, nCount
);
1595 // truncate the string
1596 wxString
& wxString::Truncate(size_t uiLen
)
1598 if ( uiLen
< length() )
1600 erase(begin() + uiLen
, end());
1602 //else: nothing to do, string is already short enough
1607 // ---------------------------------------------------------------------------
1608 // finding (return wxNOT_FOUND if not found and index otherwise)
1609 // ---------------------------------------------------------------------------
1612 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1614 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1616 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1619 // ----------------------------------------------------------------------------
1620 // conversion to numbers
1621 // ----------------------------------------------------------------------------
1623 // The implementation of all the functions below is exactly the same so factor
1624 // it out. Note that number extraction works correctly on UTF-8 strings, so
1625 // we can use wxStringCharType and wx_str() for maximum efficiency.
1628 #define DO_IF_NOT_WINCE(x) x
1630 #define DO_IF_NOT_WINCE(x)
1633 #define WX_STRING_TO_X_TYPE_START \
1634 wxCHECK_MSG( pVal, false, wxT("NULL output pointer") ); \
1635 DO_IF_NOT_WINCE( errno = 0; ) \
1636 const wxStringCharType *start = wx_str(); \
1637 wxStringCharType *end;
1639 // notice that we return false without modifying the output parameter at all if
1640 // nothing could be parsed but we do modify it and return false then if we did
1641 // parse something successfully but not the entire string
1642 #define WX_STRING_TO_X_TYPE_END \
1643 if ( end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1648 bool wxString::ToLong(long *pVal
, int base
) const
1650 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1652 WX_STRING_TO_X_TYPE_START
1653 long val
= wxStrtol(start
, &end
, base
);
1654 WX_STRING_TO_X_TYPE_END
1657 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1659 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1661 WX_STRING_TO_X_TYPE_START
1662 unsigned long val
= wxStrtoul(start
, &end
, base
);
1663 WX_STRING_TO_X_TYPE_END
1666 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1668 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1670 WX_STRING_TO_X_TYPE_START
1671 wxLongLong_t val
= wxStrtoll(start
, &end
, base
);
1672 WX_STRING_TO_X_TYPE_END
1675 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1677 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1679 WX_STRING_TO_X_TYPE_START
1680 wxULongLong_t val
= wxStrtoull(start
, &end
, base
);
1681 WX_STRING_TO_X_TYPE_END
1684 bool wxString::ToDouble(double *pVal
) const
1686 WX_STRING_TO_X_TYPE_START
1687 double val
= wxStrtod(start
, &end
);
1688 WX_STRING_TO_X_TYPE_END
1693 bool wxString::ToCLong(long *pVal
, int base
) const
1695 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1697 WX_STRING_TO_X_TYPE_START
1698 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1699 long val
= wxStrtol_lA(start
, &end
, base
, wxCLocale
);
1701 long val
= wxStrtol_l(start
, &end
, base
, wxCLocale
);
1703 WX_STRING_TO_X_TYPE_END
1706 bool wxString::ToCULong(unsigned long *pVal
, int base
) const
1708 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1710 WX_STRING_TO_X_TYPE_START
1711 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1712 unsigned long val
= wxStrtoul_lA(start
, &end
, base
, wxCLocale
);
1714 unsigned long val
= wxStrtoul_l(start
, &end
, base
, wxCLocale
);
1716 WX_STRING_TO_X_TYPE_END
1719 bool wxString::ToCDouble(double *pVal
) const
1721 WX_STRING_TO_X_TYPE_START
1722 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1723 double val
= wxStrtod_lA(start
, &end
, wxCLocale
);
1725 double val
= wxStrtod_l(start
, &end
, wxCLocale
);
1727 WX_STRING_TO_X_TYPE_END
1730 #endif // wxUSE_XLOCALE
1732 // ---------------------------------------------------------------------------
1734 // ---------------------------------------------------------------------------
1736 #if !wxUSE_UTF8_LOCALE_ONLY
1738 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1739 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1741 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1745 va_start(argptr
, format
);
1748 s
.PrintfV(format
, argptr
);
1754 #endif // !wxUSE_UTF8_LOCALE_ONLY
1756 #if wxUSE_UNICODE_UTF8
1758 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1761 va_start(argptr
, format
);
1764 s
.PrintfV(format
, argptr
);
1770 #endif // wxUSE_UNICODE_UTF8
1773 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1776 s
.PrintfV(format
, argptr
);
1780 #if !wxUSE_UTF8_LOCALE_ONLY
1781 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1782 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1784 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1788 va_start(argptr
, format
);
1790 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1791 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1792 // because it's the only cast that works safely for downcasting when
1793 // multiple inheritance is used:
1794 wxString
*str
= static_cast<wxString
*>(this);
1796 wxString
*str
= this;
1799 int iLen
= str
->PrintfV(format
, argptr
);
1805 #endif // !wxUSE_UTF8_LOCALE_ONLY
1807 #if wxUSE_UNICODE_UTF8
1808 int wxString::DoPrintfUtf8(const char *format
, ...)
1811 va_start(argptr
, format
);
1813 int iLen
= PrintfV(format
, argptr
);
1819 #endif // wxUSE_UNICODE_UTF8
1822 Uses wxVsnprintf and places the result into the this string.
1824 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1825 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1826 the ISO C99 (and thus SUSv3) standard the return value for the case of
1827 an undersized buffer is inconsistent. For conforming vsnprintf
1828 implementations the function must return the number of characters that
1829 would have been printed had the buffer been large enough. For conforming
1830 vswprintf implementations the function must return a negative number
1833 What vswprintf sets errno to is undefined but Darwin seems to set it to
1834 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1835 those are defined in the standard and backed up by several conformance
1836 statements. Note that ENOMEM mentioned in the manual page does not
1837 apply to swprintf, only wprintf and fwprintf.
1839 Official manual page:
1840 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1842 Some conformance statements (AIX, Solaris):
1843 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1844 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1846 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1847 EILSEQ and EINVAL are specifically defined to mean the error is other than
1848 an undersized buffer and no other errno are defined we treat those two
1849 as meaning hard errors and everything else gets the old behavior which
1850 is to keep looping and increasing buffer size until the function succeeds.
1852 In practice it's impossible to determine before compilation which behavior
1853 may be used. The vswprintf function may have vsnprintf-like behavior or
1854 vice-versa. Behavior detected on one release can theoretically change
1855 with an updated release. Not to mention that configure testing for it
1856 would require the test to be run on the host system, not the build system
1857 which makes cross compilation difficult. Therefore, we make no assumptions
1858 about behavior and try our best to handle every known case, including the
1859 case where wxVsnprintf returns a negative number and fails to set errno.
1861 There is yet one more non-standard implementation and that is our own.
1862 Fortunately, that can be detected at compile-time.
1864 On top of all that, ISO C99 explicitly defines snprintf to write a null
1865 character to the last position of the specified buffer. That would be at
1866 at the given buffer size minus 1. It is supposed to do this even if it
1867 turns out that the buffer is sized too small.
1869 Darwin (tested on 10.5) follows the C99 behavior exactly.
1871 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1872 errno even when it fails. However, it only seems to ever fail due
1873 to an undersized buffer.
1875 #if wxUSE_UNICODE_UTF8
1876 template<typename BufferType
>
1878 // we only need one version in non-UTF8 builds and at least two Windows
1879 // compilers have problems with this function template, so use just one
1880 // normal function here
1882 static int DoStringPrintfV(wxString
& str
,
1883 const wxString
& format
, va_list argptr
)
1889 #if wxUSE_UNICODE_UTF8
1890 BufferType
tmp(str
, size
+ 1);
1891 typename
BufferType::CharType
*buf
= tmp
;
1893 wxStringBuffer
tmp(str
, size
+ 1);
1901 // in UTF-8 build, leaving uninitialized junk in the buffer
1902 // could result in invalid non-empty UTF-8 string, so just
1903 // reset the string to empty on failure:
1908 // wxVsnprintf() may modify the original arg pointer, so pass it
1911 wxVaCopy(argptrcopy
, argptr
);
1914 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1917 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1920 // some implementations of vsnprintf() don't NUL terminate
1921 // the string if there is not enough space for it so
1922 // always do it manually
1923 // FIXME: This really seems to be the wrong and would be an off-by-one
1924 // bug except the code above allocates an extra character.
1925 buf
[size
] = wxT('\0');
1927 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1928 // total number of characters which would have been written if the
1929 // buffer were large enough (newer standards such as Unix98)
1932 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1933 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1934 // is true if *both* of them use our own implementation,
1935 // otherwise we can't be sure
1936 #if wxUSE_WXVSNPRINTF
1937 // we know that our own implementation of wxVsnprintf() returns -1
1938 // only for a format error - thus there's something wrong with
1939 // the user's format string
1942 #else // possibly using system version
1943 // assume it only returns error if there is not enough space, but
1944 // as we don't know how much we need, double the current size of
1947 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1948 // If errno was set to one of the two well-known hard errors
1949 // then fail immediately to avoid an infinite loop.
1952 #endif // __WXWINCE__
1953 // still not enough, as we don't know how much we need, double the
1954 // current size of the buffer
1956 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1958 else if ( len
>= size
)
1960 #if wxUSE_WXVSNPRINTF
1961 // we know that our own implementation of wxVsnprintf() returns
1962 // size+1 when there's not enough space but that's not the size
1963 // of the required buffer!
1964 size
*= 2; // so we just double the current size of the buffer
1966 // some vsnprintf() implementations NUL-terminate the buffer and
1967 // some don't in len == size case, to be safe always add 1
1968 // FIXME: I don't quite understand this comment. The vsnprintf
1969 // function is specifically defined to return the number of
1970 // characters printed not including the null terminator.
1971 // So OF COURSE you need to add 1 to get the right buffer size.
1972 // The following line is definitely correct, no question.
1976 else // ok, there was enough space
1982 // we could have overshot
1985 return str
.length();
1988 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
1990 #if wxUSE_UNICODE_UTF8
1991 #if wxUSE_STL_BASED_WXSTRING
1992 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
1994 typedef wxStringInternalBuffer Utf8Buffer
;
1998 #if wxUSE_UTF8_LOCALE_ONLY
1999 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2001 #if wxUSE_UNICODE_UTF8
2002 if ( wxLocaleIsUtf8
)
2003 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2006 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
2008 return DoStringPrintfV(*this, format
, argptr
);
2009 #endif // UTF8/WCHAR
2013 // ----------------------------------------------------------------------------
2014 // misc other operations
2015 // ----------------------------------------------------------------------------
2017 // returns true if the string matches the pattern which may contain '*' and
2018 // '?' metacharacters (as usual, '?' matches any character and '*' any number
2020 bool wxString::Matches(const wxString
& mask
) const
2022 // I disable this code as it doesn't seem to be faster (in fact, it seems
2023 // to be much slower) than the old, hand-written code below and using it
2024 // here requires always linking with libregex even if the user code doesn't
2026 #if 0 // wxUSE_REGEX
2027 // first translate the shell-like mask into a regex
2029 pattern
.reserve(wxStrlen(pszMask
));
2031 pattern
+= wxT('^');
2037 pattern
+= wxT('.');
2041 pattern
+= wxT(".*");
2052 // these characters are special in a RE, quote them
2053 // (however note that we don't quote '[' and ']' to allow
2054 // using them for Unix shell like matching)
2055 pattern
+= wxT('\\');
2059 pattern
+= *pszMask
;
2064 pattern
+= wxT('$');
2067 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
2068 #else // !wxUSE_REGEX
2069 // TODO: this is, of course, awfully inefficient...
2071 // FIXME-UTF8: implement using iterators, remove #if
2072 #if wxUSE_UNICODE_UTF8
2073 const wxScopedWCharBuffer maskBuf
= mask
.wc_str();
2074 const wxScopedWCharBuffer txtBuf
= wc_str();
2075 const wxChar
*pszMask
= maskBuf
.data();
2076 const wxChar
*pszTxt
= txtBuf
.data();
2078 const wxChar
*pszMask
= mask
.wx_str();
2079 // the char currently being checked
2080 const wxChar
*pszTxt
= wx_str();
2083 // the last location where '*' matched
2084 const wxChar
*pszLastStarInText
= NULL
;
2085 const wxChar
*pszLastStarInMask
= NULL
;
2088 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
2089 switch ( *pszMask
) {
2091 if ( *pszTxt
== wxT('\0') )
2094 // pszTxt and pszMask will be incremented in the loop statement
2100 // remember where we started to be able to backtrack later
2101 pszLastStarInText
= pszTxt
;
2102 pszLastStarInMask
= pszMask
;
2104 // ignore special chars immediately following this one
2105 // (should this be an error?)
2106 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2109 // if there is nothing more, match
2110 if ( *pszMask
== wxT('\0') )
2113 // are there any other metacharacters in the mask?
2115 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2117 if ( pEndMask
!= NULL
) {
2118 // we have to match the string between two metachars
2119 uiLenMask
= pEndMask
- pszMask
;
2122 // we have to match the remainder of the string
2123 uiLenMask
= wxStrlen(pszMask
);
2126 wxString
strToMatch(pszMask
, uiLenMask
);
2127 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2128 if ( pMatch
== NULL
)
2131 // -1 to compensate "++" in the loop
2132 pszTxt
= pMatch
+ uiLenMask
- 1;
2133 pszMask
+= uiLenMask
- 1;
2138 if ( *pszMask
!= *pszTxt
)
2144 // match only if nothing left
2145 if ( *pszTxt
== wxT('\0') )
2148 // if we failed to match, backtrack if we can
2149 if ( pszLastStarInText
) {
2150 pszTxt
= pszLastStarInText
+ 1;
2151 pszMask
= pszLastStarInMask
;
2153 pszLastStarInText
= NULL
;
2155 // don't bother resetting pszLastStarInMask, it's unnecessary
2161 #endif // wxUSE_REGEX/!wxUSE_REGEX
2164 // Count the number of chars
2165 int wxString::Freq(wxUniChar ch
) const
2168 for ( const_iterator i
= begin(); i
!= end(); ++i
)