1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
7 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
8 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
9 // Licence: wxWindows licence
10 /////////////////////////////////////////////////////////////////////////////
12 // ===========================================================================
13 // headers, declarations, constants
14 // ===========================================================================
16 // For compilers that support precompilation, includes "wx.h".
17 #include "wx/wxprec.h"
24 #include "wx/string.h"
25 #include "wx/wxcrtvararg.h"
39 #include "wx/hashmap.h"
40 #include "wx/vector.h"
41 #include "wx/xlocale.h"
44 #include "wx/msw/wrapwin.h"
47 #if wxUSE_STD_IOSTREAM
51 // string handling functions used by wxString:
52 #if wxUSE_UNICODE_UTF8
53 #define wxStringMemcpy memcpy
54 #define wxStringMemcmp memcmp
55 #define wxStringMemchr memchr
56 #define wxStringStrlen strlen
58 #define wxStringMemcpy wxTmemcpy
59 #define wxStringMemcmp wxTmemcmp
60 #define wxStringMemchr wxTmemchr
61 #define wxStringStrlen wxStrlen
64 // define a function declared in wx/buffer.h here as we don't have buffer.cpp
65 // and don't want to add it just because of this simple function
69 // wxXXXBuffer classes can be (implicitly) used during global statics
70 // initialization so wrap the status UntypedBufferData variable in a function
71 // to make it safe to access it even before all global statics are initialized
72 UntypedBufferData
*GetUntypedNullData()
74 static UntypedBufferData
s_untypedNullData(NULL
, 0);
76 return &s_untypedNullData
;
79 } // namespace wxPrivate
81 // ---------------------------------------------------------------------------
82 // static class variables definition
83 // ---------------------------------------------------------------------------
85 //According to STL _must_ be a -1 size_t
86 const size_t wxString::npos
= (size_t) -1;
88 #if wxUSE_STRING_POS_CACHE
90 #ifdef wxHAS_COMPILER_TLS
92 wxTLS_TYPE(wxString::Cache
) wxString::ms_cache
;
94 #else // !wxHAS_COMPILER_TLS
96 struct wxStrCacheInitializer
98 wxStrCacheInitializer()
100 // calling this function triggers s_cache initialization in it, and
101 // from now on it becomes safe to call from multiple threads
102 wxString::GetCache();
107 wxString::Cache& wxString::GetCache()
109 static wxTLS_TYPE(Cache) s_cache;
111 return wxTLS_VALUE(s_cache);
115 static wxStrCacheInitializer gs_stringCacheInit
;
117 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
119 // gdb seems to be unable to display thread-local variables correctly, at least
120 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
121 #if wxDEBUG_LEVEL >= 2
123 struct wxStrCacheDumper
125 static void ShowAll()
127 puts("*** wxString cache dump:");
128 for ( unsigned n
= 0; n
< wxString::Cache::SIZE
; n
++ )
130 const wxString::Cache::Element
&
131 c
= wxString::GetCacheBegin()[n
];
133 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
135 n
== wxString::LastUsedCacheElement() ? " [*]" : "",
137 (unsigned long)c
.pos
,
138 (unsigned long)c
.impl
,
144 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
146 #endif // wxDEBUG_LEVEL >= 2
148 #ifdef wxPROFILE_STRING_CACHE
150 wxString::CacheStats
wxString::ms_cacheStats
;
152 struct wxStrCacheStatsDumper
154 ~wxStrCacheStatsDumper()
156 const wxString::CacheStats
& stats
= wxString::ms_cacheStats
;
160 puts("*** wxString cache statistics:");
161 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
163 printf("\tHits %u (of which %u not used) or %.2f%%\n",
166 100.*float(stats
.poshits
- stats
.mishits
)/stats
.postot
);
167 printf("\tAverage position requested: %.2f\n",
168 float(stats
.sumpos
) / stats
.postot
);
169 printf("\tAverage offset after cached hint: %.2f\n",
170 float(stats
.sumofs
) / stats
.postot
);
175 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
176 stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
);
181 static wxStrCacheStatsDumper s_showCacheStats
;
183 #endif // wxPROFILE_STRING_CACHE
185 #endif // wxUSE_STRING_POS_CACHE
187 // ----------------------------------------------------------------------------
189 // ----------------------------------------------------------------------------
191 #if wxUSE_STD_IOSTREAM
195 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
197 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
198 const wxScopedCharBuffer
buf(str
.AsCharBuf());
200 os
.clear(wxSTD
ios_base::failbit
);
206 return os
<< str
.AsInternal();
210 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
212 return os
<< str
.c_str();
215 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedCharBuffer
& str
)
217 return os
<< str
.data();
221 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedWCharBuffer
& str
)
223 return os
<< str
.data();
227 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
229 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
231 return wos
<< str
.wc_str();
234 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
236 return wos
<< str
.AsWChar();
239 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxScopedWCharBuffer
& str
)
241 return wos
<< str
.data();
244 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
246 #endif // wxUSE_STD_IOSTREAM
248 // ===========================================================================
249 // wxString class core
250 // ===========================================================================
252 #if wxUSE_UNICODE_UTF8
254 void wxString::PosLenToImpl(size_t pos
, size_t len
,
255 size_t *implPos
, size_t *implLen
) const
261 else // have valid start position
263 const const_iterator b
= GetIterForNthChar(pos
);
264 *implPos
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin();
269 else // have valid length too
271 // we need to handle the case of length specifying a substring
272 // going beyond the end of the string, just as std::string does
273 const const_iterator
e(end());
275 while ( len
&& i
<= e
)
281 *implLen
= i
.impl() - b
.impl();
286 #endif // wxUSE_UNICODE_UTF8
288 // ----------------------------------------------------------------------------
289 // wxCStrData converted strings caching
290 // ----------------------------------------------------------------------------
292 // FIXME-UTF8: temporarily disabled because it doesn't work with global
293 // string objects; re-enable after fixing this bug and benchmarking
294 // performance to see if using a hash is a good idea at all
297 // For backward compatibility reasons, it must be possible to assign the value
298 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
299 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
300 // because the memory would be freed immediately, but it has to be valid as long
301 // as the string is not modified, so that code like this still works:
303 // const wxChar *s = str.c_str();
304 // while ( s ) { ... }
306 // FIXME-UTF8: not thread safe!
307 // FIXME-UTF8: we currently clear the cached conversion only when the string is
308 // destroyed, but we should do it when the string is modified, to
309 // keep memory usage down
310 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
311 // invalidated the cache on every change, we could keep the previous
313 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
314 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
317 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
319 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
320 if ( i
!= hash
.end() )
328 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
329 // so we have to use wxString* here and const-cast when used
330 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
331 wxStringCharConversionCache
);
332 static wxStringCharConversionCache gs_stringsCharCache
;
334 const char* wxCStrData::AsChar() const
336 // remove previously cache value, if any (see FIXMEs above):
337 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
339 // convert the string and keep it:
340 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
341 m_str
->mb_str().release();
345 #endif // wxUSE_UNICODE
347 #if !wxUSE_UNICODE_WCHAR
348 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
349 wxStringWCharConversionCache
);
350 static wxStringWCharConversionCache gs_stringsWCharCache
;
352 const wchar_t* wxCStrData::AsWChar() const
354 // remove previously cache value, if any (see FIXMEs above):
355 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
357 // convert the string and keep it:
358 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
359 m_str
->wc_str().release();
363 #endif // !wxUSE_UNICODE_WCHAR
365 wxString::~wxString()
368 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
369 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
371 #if !wxUSE_UNICODE_WCHAR
372 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
377 // ===========================================================================
378 // wxString class core
379 // ===========================================================================
381 // ---------------------------------------------------------------------------
382 // construction and conversion
383 // ---------------------------------------------------------------------------
385 #if wxUSE_UNICODE_WCHAR
387 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
388 const wxMBConv
& conv
)
391 if ( !psz
|| nLength
== 0 )
392 return SubstrBufFromMB(wxWCharBuffer(L
""), 0);
394 if ( nLength
== npos
)
398 wxScopedWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
400 return SubstrBufFromMB(wxWCharBuffer(L
""), 0);
402 return SubstrBufFromMB(wcBuf
, wcLen
);
404 #endif // wxUSE_UNICODE_WCHAR
406 #if wxUSE_UNICODE_UTF8
408 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
409 const wxMBConv
& conv
)
412 if ( !psz
|| nLength
== 0 )
413 return SubstrBufFromMB(wxCharBuffer(""), 0);
415 // if psz is already in UTF-8, we don't have to do the roundtrip to
416 // wchar_t* and back:
419 // we need to validate the input because UTF8 iterators assume valid
420 // UTF-8 sequence and psz may be invalid:
421 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
423 // we must pass the real string length to SubstrBufFromMB ctor
424 if ( nLength
== npos
)
425 nLength
= psz
? strlen(psz
) : 0;
426 return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz
, nLength
),
429 // else: do the roundtrip through wchar_t*
432 if ( nLength
== npos
)
435 // first convert to wide string:
437 wxScopedWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
439 return SubstrBufFromMB(wxCharBuffer(""), 0);
441 // and then to UTF-8:
442 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
443 // widechar -> UTF-8 conversion isn't supposed to ever fail:
444 wxASSERT_MSG( buf
.data
, wxT("conversion to UTF-8 failed") );
448 #endif // wxUSE_UNICODE_UTF8
450 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
452 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
453 const wxMBConv
& conv
)
456 if ( !pwz
|| nLength
== 0 )
457 return SubstrBufFromWC(wxCharBuffer(""), 0);
459 if ( nLength
== npos
)
463 wxScopedCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
465 return SubstrBufFromWC(wxCharBuffer(""), 0);
467 return SubstrBufFromWC(mbBuf
, mbLen
);
469 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
471 // This std::string::c_str()-like method returns a wide char pointer to string
472 // contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return
473 // a pointer to the internal representation. Otherwise a conversion is required
474 // and it returns a temporary buffer.
476 // However for compatibility with c_str() and to avoid breaking existing code
479 // for ( const wchar_t *p = s.wc_str(); *p; p++ )
482 // we actually need to ensure that the returned buffer is _not_ temporary and
483 // so we use wxString::m_convertedToWChar to store the returned data
484 #if !wxUSE_UNICODE_WCHAR
486 const wchar_t *wxString::AsWChar(const wxMBConv
& conv
) const
488 const char * const strMB
= m_impl
.c_str();
489 const size_t lenMB
= m_impl
.length();
491 // find out the size of the buffer needed
492 const size_t lenWC
= conv
.ToWChar(NULL
, 0, strMB
, lenMB
);
493 if ( lenWC
== wxCONV_FAILED
)
496 // keep the same buffer if the string size didn't change: this is not only
497 // an optimization but also ensure that code which modifies string
498 // character by character (without changing its length) can continue to use
499 // the pointer returned by a previous wc_str() call even after changing the
502 // TODO-UTF8: we could check for ">" instead of "!=" here as this would
503 // allow to save on buffer reallocations but at the cost of
504 // consuming (even) more memory, we should benchmark this to
505 // determine if it's worth doing
506 if ( !m_convertedToWChar
.m_str
|| lenWC
!= m_convertedToWChar
.m_len
)
508 if ( !const_cast<wxString
*>(this)->m_convertedToWChar
.Extend(lenWC
) )
512 // finally do convert
513 m_convertedToWChar
.m_str
[lenWC
] = L
'\0';
514 if ( conv
.ToWChar(m_convertedToWChar
.m_str
, lenWC
,
515 strMB
, lenMB
) == wxCONV_FAILED
)
518 return m_convertedToWChar
.m_str
;
521 #endif // !wxUSE_UNICODE_WCHAR
524 // Same thing for mb_str() which returns a normal char pointer to string
525 // contents: this always requires converting it to the specified encoding in
526 // non-ANSI build except if we need to convert to UTF-8 and this is what we
527 // already use internally.
530 const char *wxString::AsChar(const wxMBConv
& conv
) const
532 #if wxUSE_UNICODE_UTF8
534 return m_impl
.c_str();
536 const wchar_t * const strWC
= AsWChar(wxMBConvStrictUTF8());
537 const size_t lenWC
= m_convertedToWChar
.m_len
;
538 #else // wxUSE_UNICODE_WCHAR
539 const wchar_t * const strWC
= m_impl
.c_str();
540 const size_t lenWC
= m_impl
.length();
541 #endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR
543 const size_t lenMB
= conv
.FromWChar(NULL
, 0, strWC
, lenWC
);
544 if ( lenMB
== wxCONV_FAILED
)
547 if ( !m_convertedToChar
.m_str
|| lenMB
!= m_convertedToChar
.m_len
)
549 if ( !const_cast<wxString
*>(this)->m_convertedToChar
.Extend(lenMB
) )
553 m_convertedToChar
.m_str
[lenMB
] = '\0';
554 if ( conv
.FromWChar(m_convertedToChar
.m_str
, lenMB
,
555 strWC
, lenWC
) == wxCONV_FAILED
)
558 return m_convertedToChar
.m_str
;
561 #endif // wxUSE_UNICODE
563 // shrink to minimal size (releasing extra memory)
564 bool wxString::Shrink()
566 wxString
tmp(begin(), end());
568 return tmp
.length() == length();
571 // deprecated compatibility code:
572 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
573 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
575 return DoGetWriteBuf(nLen
);
578 void wxString::UngetWriteBuf()
583 void wxString::UngetWriteBuf(size_t nLen
)
585 DoUngetWriteBuf(nLen
);
587 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
590 // ---------------------------------------------------------------------------
592 // ---------------------------------------------------------------------------
594 // all functions are inline in string.h
596 // ---------------------------------------------------------------------------
597 // concatenation operators
598 // ---------------------------------------------------------------------------
601 * concatenation functions come in 5 flavours:
603 * char + string and string + char
604 * C str + string and string + C str
607 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
609 #if !wxUSE_STL_BASED_WXSTRING
610 wxASSERT( str1
.IsValid() );
611 wxASSERT( str2
.IsValid() );
620 wxString
operator+(const wxString
& str
, wxUniChar ch
)
622 #if !wxUSE_STL_BASED_WXSTRING
623 wxASSERT( str
.IsValid() );
632 wxString
operator+(wxUniChar ch
, const wxString
& str
)
634 #if !wxUSE_STL_BASED_WXSTRING
635 wxASSERT( str
.IsValid() );
644 wxString
operator+(const wxString
& str
, const char *psz
)
646 #if !wxUSE_STL_BASED_WXSTRING
647 wxASSERT( str
.IsValid() );
651 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
652 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
660 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
662 #if !wxUSE_STL_BASED_WXSTRING
663 wxASSERT( str
.IsValid() );
667 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
668 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
676 wxString
operator+(const char *psz
, const wxString
& str
)
678 #if !wxUSE_STL_BASED_WXSTRING
679 wxASSERT( str
.IsValid() );
683 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
684 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
692 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
694 #if !wxUSE_STL_BASED_WXSTRING
695 wxASSERT( str
.IsValid() );
699 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
700 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
708 // ---------------------------------------------------------------------------
710 // ---------------------------------------------------------------------------
712 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
714 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
715 : wxToupper(GetChar(0u)) == wxToupper(c
));
718 #ifdef HAVE_STD_STRING_COMPARE
720 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
721 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
722 // sort strings in characters code point order by sorting the byte sequence
723 // in byte values order (i.e. what strcmp() and memcmp() do).
725 int wxString::compare(const wxString
& str
) const
727 return m_impl
.compare(str
.m_impl
);
730 int wxString::compare(size_t nStart
, size_t nLen
,
731 const wxString
& str
) const
734 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
735 return m_impl
.compare(pos
, len
, str
.m_impl
);
738 int wxString::compare(size_t nStart
, size_t nLen
,
740 size_t nStart2
, size_t nLen2
) const
743 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
746 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
748 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
751 int wxString::compare(const char* sz
) const
753 return m_impl
.compare(ImplStr(sz
));
756 int wxString::compare(const wchar_t* sz
) const
758 return m_impl
.compare(ImplStr(sz
));
761 int wxString::compare(size_t nStart
, size_t nLen
,
762 const char* sz
, size_t nCount
) const
765 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
767 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
769 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
772 int wxString::compare(size_t nStart
, size_t nLen
,
773 const wchar_t* sz
, size_t nCount
) const
776 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
778 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
780 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
783 #else // !HAVE_STD_STRING_COMPARE
785 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
786 const wxStringCharType
* s2
, size_t l2
)
789 return wxStringMemcmp(s1
, s2
, l1
);
792 int ret
= wxStringMemcmp(s1
, s2
, l1
);
793 return ret
== 0 ? -1 : ret
;
797 int ret
= wxStringMemcmp(s1
, s2
, l2
);
798 return ret
== 0 ? +1 : ret
;
802 int wxString::compare(const wxString
& str
) const
804 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
805 str
.m_impl
.data(), str
.m_impl
.length());
808 int wxString::compare(size_t nStart
, size_t nLen
,
809 const wxString
& str
) const
811 wxASSERT(nStart
<= length());
812 size_type strLen
= length() - nStart
;
813 nLen
= strLen
< nLen
? strLen
: nLen
;
816 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
818 return ::wxDoCmp(m_impl
.data() + pos
, len
,
819 str
.m_impl
.data(), str
.m_impl
.length());
822 int wxString::compare(size_t nStart
, size_t nLen
,
824 size_t nStart2
, size_t nLen2
) const
826 wxASSERT(nStart
<= length());
827 wxASSERT(nStart2
<= str
.length());
828 size_type strLen
= length() - nStart
,
829 strLen2
= str
.length() - nStart2
;
830 nLen
= strLen
< nLen
? strLen
: nLen
;
831 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
834 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
836 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
838 return ::wxDoCmp(m_impl
.data() + pos
, len
,
839 str
.m_impl
.data() + pos2
, len2
);
842 int wxString::compare(const char* sz
) const
844 SubstrBufFromMB
str(ImplStr(sz
, npos
));
845 if ( str
.len
== npos
)
846 str
.len
= wxStringStrlen(str
.data
);
847 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
850 int wxString::compare(const wchar_t* sz
) const
852 SubstrBufFromWC
str(ImplStr(sz
, npos
));
853 if ( str
.len
== npos
)
854 str
.len
= wxStringStrlen(str
.data
);
855 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
858 int wxString::compare(size_t nStart
, size_t nLen
,
859 const char* sz
, size_t nCount
) const
861 wxASSERT(nStart
<= length());
862 size_type strLen
= length() - nStart
;
863 nLen
= strLen
< nLen
? strLen
: nLen
;
866 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
868 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
869 if ( str
.len
== npos
)
870 str
.len
= wxStringStrlen(str
.data
);
872 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
875 int wxString::compare(size_t nStart
, size_t nLen
,
876 const wchar_t* sz
, size_t nCount
) const
878 wxASSERT(nStart
<= length());
879 size_type strLen
= length() - nStart
;
880 nLen
= strLen
< nLen
? strLen
: nLen
;
883 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
885 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
886 if ( str
.len
== npos
)
887 str
.len
= wxStringStrlen(str
.data
);
889 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
892 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
895 // ---------------------------------------------------------------------------
896 // find_{first,last}_[not]_of functions
897 // ---------------------------------------------------------------------------
899 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
901 // NB: All these functions are implemented with the argument being wxChar*,
902 // i.e. widechar string in any Unicode build, even though native string
903 // representation is char* in the UTF-8 build. This is because we couldn't
904 // use memchr() to determine if a character is in a set encoded as UTF-8.
906 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
908 return find_first_of(sz
, nStart
, wxStrlen(sz
));
911 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
913 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
916 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
918 wxASSERT_MSG( nStart
<= length(), wxT("invalid index") );
921 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
923 if ( wxTmemchr(sz
, *i
, n
) )
930 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
932 wxASSERT_MSG( nStart
<= length(), wxT("invalid index") );
935 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
937 if ( !wxTmemchr(sz
, *i
, n
) )
945 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
947 return find_last_of(sz
, nStart
, wxStrlen(sz
));
950 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
952 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
955 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
957 size_t len
= length();
959 if ( nStart
== npos
)
965 wxASSERT_MSG( nStart
<= len
, wxT("invalid index") );
969 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
970 i
!= rend(); --idx
, ++i
)
972 if ( wxTmemchr(sz
, *i
, n
) )
979 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
981 size_t len
= length();
983 if ( nStart
== npos
)
989 wxASSERT_MSG( nStart
<= len
, wxT("invalid index") );
993 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
994 i
!= rend(); --idx
, ++i
)
996 if ( !wxTmemchr(sz
, *i
, n
) )
1003 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
1005 wxASSERT_MSG( nStart
<= length(), wxT("invalid index") );
1007 size_t idx
= nStart
;
1008 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1017 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1019 size_t len
= length();
1021 if ( nStart
== npos
)
1027 wxASSERT_MSG( nStart
<= len
, wxT("invalid index") );
1030 size_t idx
= nStart
;
1031 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1032 i
!= rend(); --idx
, ++i
)
1041 // the functions above were implemented for wchar_t* arguments in Unicode
1042 // build and char* in ANSI build; below are implementations for the other
1045 #define wxOtherCharType char
1046 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1048 #define wxOtherCharType wchar_t
1049 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1052 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1053 { return find_first_of(STRCONV(sz
), nStart
); }
1055 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1057 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1058 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1059 { return find_last_of(STRCONV(sz
), nStart
); }
1060 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1062 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1063 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1064 { return find_first_not_of(STRCONV(sz
), nStart
); }
1065 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1067 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1068 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1069 { return find_last_not_of(STRCONV(sz
), nStart
); }
1070 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1072 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1074 #undef wxOtherCharType
1077 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1079 // ===========================================================================
1080 // other common string functions
1081 // ===========================================================================
1083 int wxString::CmpNoCase(const wxString
& s
) const
1085 #if !wxUSE_UNICODE_UTF8
1086 // We compare NUL-delimited chunks of the strings inside the loop. We will
1087 // do as many iterations as there are embedded NULs in the string, i.e.
1088 // usually we will run it just once.
1090 typedef const wxStringImpl::value_type
*pchar_type
;
1091 const pchar_type thisBegin
= m_impl
.c_str();
1092 const pchar_type thatBegin
= s
.m_impl
.c_str();
1094 const pchar_type thisEnd
= thisBegin
+ m_impl
.length();
1095 const pchar_type thatEnd
= thatBegin
+ s
.m_impl
.length();
1097 pchar_type thisCur
= thisBegin
;
1098 pchar_type thatCur
= thatBegin
;
1103 // Compare until the next NUL, if the strings differ this is the final
1105 rc
= wxStricmp(thisCur
, thatCur
);
1109 const size_t lenChunk
= wxStrlen(thisCur
);
1110 thisCur
+= lenChunk
;
1111 thatCur
+= lenChunk
;
1113 // Skip all the NULs as wxStricmp() doesn't handle them.
1114 for ( ; !*thisCur
; thisCur
++, thatCur
++ )
1116 // Check if we exhausted either of the strings.
1117 if ( thisCur
== thisEnd
)
1119 // This one is exhausted, is the other one too?
1120 return thatCur
== thatEnd
? 0 : -1;
1123 if ( thatCur
== thatEnd
)
1125 // Because of the test above we know that this one is not
1126 // exhausted yet so it's greater than the other one that is.
1132 // Anything non-NUL is greater than NUL.
1139 #else // wxUSE_UNICODE_UTF8
1140 // CRT functions can't be used for case-insensitive comparison of UTF-8
1141 // strings so do it in the naive, simple and inefficient way.
1143 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1144 const_iterator i1
= begin();
1145 const_iterator end1
= end();
1146 const_iterator i2
= s
.begin();
1147 const_iterator end2
= s
.end();
1149 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1151 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1152 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1153 if ( lower1
!= lower2
)
1154 return lower1
< lower2
? -1 : 1;
1157 size_t len1
= length();
1158 size_t len2
= s
.length();
1162 else if ( len1
> len2
)
1165 #endif // !wxUSE_UNICODE_UTF8/wxUSE_UNICODE_UTF8
1171 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1173 if (!ascii
|| len
== 0)
1174 return wxEmptyString
;
1179 wxStringInternalBuffer
buf(res
, len
);
1180 wxStringCharType
*dest
= buf
;
1182 for ( ; len
> 0; --len
)
1184 unsigned char c
= (unsigned char)*ascii
++;
1185 wxASSERT_MSG( c
< 0x80,
1186 wxT("Non-ASCII value passed to FromAscii().") );
1188 *dest
++ = (wchar_t)c
;
1195 wxString
wxString::FromAscii(const char *ascii
)
1197 return FromAscii(ascii
, wxStrlen(ascii
));
1200 wxString
wxString::FromAscii(char ascii
)
1202 // What do we do with '\0' ?
1204 unsigned char c
= (unsigned char)ascii
;
1206 wxASSERT_MSG( c
< 0x80, wxT("Non-ASCII value passed to FromAscii().") );
1208 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1209 return wxString(wxUniChar((wchar_t)c
));
1212 const wxScopedCharBuffer
wxString::ToAscii() const
1214 // this will allocate enough space for the terminating NUL too
1215 wxCharBuffer
buffer(length());
1216 char *dest
= buffer
.data();
1218 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1221 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1222 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1224 // the output string can't have embedded NULs anyhow, so we can safely
1225 // stop at first of them even if we do have any
1233 #endif // wxUSE_UNICODE
1235 // extract string of length nCount starting at nFirst
1236 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1238 size_t nLen
= length();
1240 // default value of nCount is npos and means "till the end"
1241 if ( nCount
== npos
)
1243 nCount
= nLen
- nFirst
;
1246 // out-of-bounds requests return sensible things
1247 if ( nFirst
+ nCount
> nLen
)
1249 nCount
= nLen
- nFirst
;
1252 if ( nFirst
> nLen
)
1254 // AllocCopy() will return empty string
1255 return wxEmptyString
;
1258 wxString
dest(*this, nFirst
, nCount
);
1259 if ( dest
.length() != nCount
)
1261 wxFAIL_MSG( wxT("out of memory in wxString::Mid") );
1267 // check that the string starts with prefix and return the rest of the string
1268 // in the provided pointer if it is not NULL, otherwise return false
1269 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1271 if ( compare(0, prefix
.length(), prefix
) != 0 )
1276 // put the rest of the string into provided pointer
1277 rest
->assign(*this, prefix
.length(), npos
);
1284 // check that the string ends with suffix and return the rest of it in the
1285 // provided pointer if it is not NULL, otherwise return false
1286 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1288 int start
= length() - suffix
.length();
1290 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1295 // put the rest of the string into provided pointer
1296 rest
->assign(*this, 0, start
);
1303 // extract nCount last (rightmost) characters
1304 wxString
wxString::Right(size_t nCount
) const
1306 if ( nCount
> length() )
1309 wxString
dest(*this, length() - nCount
, nCount
);
1310 if ( dest
.length() != nCount
) {
1311 wxFAIL_MSG( wxT("out of memory in wxString::Right") );
1316 // get all characters after the last occurrence of ch
1317 // (returns the whole string if ch not found)
1318 wxString
wxString::AfterLast(wxUniChar ch
) const
1321 int iPos
= Find(ch
, true);
1322 if ( iPos
== wxNOT_FOUND
)
1325 str
.assign(*this, iPos
+ 1, npos
);
1330 // extract nCount first (leftmost) characters
1331 wxString
wxString::Left(size_t nCount
) const
1333 if ( nCount
> length() )
1336 wxString
dest(*this, 0, nCount
);
1337 if ( dest
.length() != nCount
) {
1338 wxFAIL_MSG( wxT("out of memory in wxString::Left") );
1343 // get all characters before the first occurrence of ch
1344 // (returns the whole string if ch not found)
1345 wxString
wxString::BeforeFirst(wxUniChar ch
, wxString
*rest
) const
1347 int iPos
= Find(ch
);
1348 if ( iPos
== wxNOT_FOUND
)
1357 rest
->assign(*this, iPos
+ 1, npos
);
1360 return wxString(*this, 0, iPos
);
1363 /// get all characters before the last occurrence of ch
1364 /// (returns empty string if ch not found)
1365 wxString
wxString::BeforeLast(wxUniChar ch
, wxString
*rest
) const
1368 int iPos
= Find(ch
, true);
1369 if ( iPos
!= wxNOT_FOUND
)
1372 str
.assign(*this, 0, iPos
);
1375 rest
->assign(*this, iPos
+ 1, npos
);
1386 /// get all characters after the first occurrence of ch
1387 /// (returns empty string if ch not found)
1388 wxString
wxString::AfterFirst(wxUniChar ch
) const
1391 int iPos
= Find(ch
);
1392 if ( iPos
!= wxNOT_FOUND
)
1393 str
.assign(*this, iPos
+ 1, npos
);
1398 // replace first (or all) occurrences of some substring with another one
1399 size_t wxString::Replace(const wxString
& strOld
,
1400 const wxString
& strNew
, bool bReplaceAll
)
1402 // if we tried to replace an empty string we'd enter an infinite loop below
1403 wxCHECK_MSG( !strOld
.empty(), 0,
1404 wxT("wxString::Replace(): invalid parameter") );
1406 wxSTRING_INVALIDATE_CACHE();
1408 size_t uiCount
= 0; // count of replacements made
1410 // optimize the special common case: replacement of one character by
1411 // another one (in UTF-8 case we can only do this for ASCII characters)
1413 // benchmarks show that this special version is around 3 times faster
1414 // (depending on the proportion of matching characters and UTF-8/wchar_t
1416 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1418 const wxStringCharType chOld
= strOld
.m_impl
[0],
1419 chNew
= strNew
.m_impl
[0];
1421 // this loop is the simplified version of the one below
1422 for ( size_t pos
= 0; ; )
1424 pos
= m_impl
.find(chOld
, pos
);
1428 m_impl
[pos
++] = chNew
;
1436 else if ( !bReplaceAll
)
1438 size_t pos
= m_impl
.find(strOld
.m_impl
, 0);
1441 m_impl
.replace(pos
, strOld
.m_impl
.length(), strNew
.m_impl
);
1445 else // replace all occurrences
1447 const size_t uiOldLen
= strOld
.m_impl
.length();
1448 const size_t uiNewLen
= strNew
.m_impl
.length();
1450 // first scan the string to find all positions at which the replacement
1452 wxVector
<size_t> replacePositions
;
1455 for ( pos
= m_impl
.find(strOld
.m_impl
, 0);
1457 pos
= m_impl
.find(strOld
.m_impl
, pos
+ uiOldLen
))
1459 replacePositions
.push_back(pos
);
1466 // allocate enough memory for the whole new string
1468 tmp
.m_impl
.reserve(m_impl
.length() + uiCount
*(uiNewLen
- uiOldLen
));
1470 // copy this string to tmp doing replacements on the fly
1472 for ( pos
= 0; replNum
< uiCount
; replNum
++ )
1474 const size_t nextReplPos
= replacePositions
[replNum
];
1476 if ( pos
!= nextReplPos
)
1478 tmp
.m_impl
.append(m_impl
, pos
, nextReplPos
- pos
);
1481 tmp
.m_impl
.append(strNew
.m_impl
);
1482 pos
= nextReplPos
+ uiOldLen
;
1485 if ( pos
!= m_impl
.length() )
1487 // append the rest of the string unchanged
1488 tmp
.m_impl
.append(m_impl
, pos
, m_impl
.length() - pos
);
1497 bool wxString::IsAscii() const
1499 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1501 if ( !(*i
).IsAscii() )
1508 bool wxString::IsWord() const
1510 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1512 if ( !wxIsalpha(*i
) )
1519 bool wxString::IsNumber() const
1524 const_iterator i
= begin();
1526 if ( *i
== wxT('-') || *i
== wxT('+') )
1529 for ( ; i
!= end(); ++i
)
1531 if ( !wxIsdigit(*i
) )
1538 wxString
wxString::Strip(stripType w
) const
1541 if ( w
& leading
) s
.Trim(false);
1542 if ( w
& trailing
) s
.Trim(true);
1546 // ---------------------------------------------------------------------------
1548 // ---------------------------------------------------------------------------
1550 wxString
& wxString::MakeUpper()
1552 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1553 *it
= (wxChar
)wxToupper(*it
);
1558 wxString
& wxString::MakeLower()
1560 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1561 *it
= (wxChar
)wxTolower(*it
);
1566 wxString
& wxString::MakeCapitalized()
1568 const iterator en
= end();
1569 iterator it
= begin();
1572 *it
= (wxChar
)wxToupper(*it
);
1573 for ( ++it
; it
!= en
; ++it
)
1574 *it
= (wxChar
)wxTolower(*it
);
1580 // ---------------------------------------------------------------------------
1581 // trimming and padding
1582 // ---------------------------------------------------------------------------
1584 // some compilers (VC++ 6.0 not to name them) return true for a call to
1585 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1586 // to live with this by checking that the character is a 7 bit one - even if
1587 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1588 // space-like symbols somewhere except in the first 128 chars), it is arguably
1589 // still better than trimming away accented letters
1590 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1592 // trims spaces (in the sense of isspace) from left or right side
1593 wxString
& wxString::Trim(bool bFromRight
)
1595 // first check if we're going to modify the string at all
1598 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1599 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1605 // find last non-space character
1606 reverse_iterator psz
= rbegin();
1607 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1610 // truncate at trailing space start
1611 erase(psz
.base(), end());
1615 // find first non-space character
1616 iterator psz
= begin();
1617 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1620 // fix up data and length
1621 erase(begin(), psz
);
1628 // adds nCount characters chPad to the string from either side
1629 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1631 wxString
s(chPad
, nCount
);
1644 // truncate the string
1645 wxString
& wxString::Truncate(size_t uiLen
)
1647 if ( uiLen
< length() )
1649 erase(begin() + uiLen
, end());
1651 //else: nothing to do, string is already short enough
1656 // ---------------------------------------------------------------------------
1657 // finding (return wxNOT_FOUND if not found and index otherwise)
1658 // ---------------------------------------------------------------------------
1661 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1663 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1665 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1668 // ----------------------------------------------------------------------------
1669 // conversion to numbers
1670 // ----------------------------------------------------------------------------
1672 // The implementation of all the functions below is exactly the same so factor
1673 // it out. Note that number extraction works correctly on UTF-8 strings, so
1674 // we can use wxStringCharType and wx_str() for maximum efficiency.
1677 #define DO_IF_NOT_WINCE(x) x
1679 #define DO_IF_NOT_WINCE(x)
1682 #define WX_STRING_TO_X_TYPE_START \
1683 wxCHECK_MSG( pVal, false, wxT("NULL output pointer") ); \
1684 DO_IF_NOT_WINCE( errno = 0; ) \
1685 const wxStringCharType *start = wx_str(); \
1686 wxStringCharType *end;
1688 // notice that we return false without modifying the output parameter at all if
1689 // nothing could be parsed but we do modify it and return false then if we did
1690 // parse something successfully but not the entire string
1691 #define WX_STRING_TO_X_TYPE_END \
1692 if ( end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1697 bool wxString::ToLong(long *pVal
, int base
) const
1699 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1701 WX_STRING_TO_X_TYPE_START
1702 long val
= wxStrtol(start
, &end
, base
);
1703 WX_STRING_TO_X_TYPE_END
1706 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1708 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1710 WX_STRING_TO_X_TYPE_START
1711 unsigned long val
= wxStrtoul(start
, &end
, base
);
1712 WX_STRING_TO_X_TYPE_END
1715 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1717 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1719 WX_STRING_TO_X_TYPE_START
1720 wxLongLong_t val
= wxStrtoll(start
, &end
, base
);
1721 WX_STRING_TO_X_TYPE_END
1724 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1726 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1728 WX_STRING_TO_X_TYPE_START
1729 wxULongLong_t val
= wxStrtoull(start
, &end
, base
);
1730 WX_STRING_TO_X_TYPE_END
1733 bool wxString::ToDouble(double *pVal
) const
1735 WX_STRING_TO_X_TYPE_START
1736 double val
= wxStrtod(start
, &end
);
1737 WX_STRING_TO_X_TYPE_END
1742 bool wxString::ToCLong(long *pVal
, int base
) const
1744 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1746 WX_STRING_TO_X_TYPE_START
1747 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1748 long val
= wxStrtol_lA(start
, &end
, base
, wxCLocale
);
1750 long val
= wxStrtol_l(start
, &end
, base
, wxCLocale
);
1752 WX_STRING_TO_X_TYPE_END
1755 bool wxString::ToCULong(unsigned long *pVal
, int base
) const
1757 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1759 WX_STRING_TO_X_TYPE_START
1760 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1761 unsigned long val
= wxStrtoul_lA(start
, &end
, base
, wxCLocale
);
1763 unsigned long val
= wxStrtoul_l(start
, &end
, base
, wxCLocale
);
1765 WX_STRING_TO_X_TYPE_END
1768 bool wxString::ToCDouble(double *pVal
) const
1770 WX_STRING_TO_X_TYPE_START
1771 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1772 double val
= wxStrtod_lA(start
, &end
, wxCLocale
);
1774 double val
= wxStrtod_l(start
, &end
, wxCLocale
);
1776 WX_STRING_TO_X_TYPE_END
1779 #else // wxUSE_XLOCALE
1781 // Provide implementation of these functions even when wxUSE_XLOCALE is
1782 // disabled, we still need them in wxWidgets internal code.
1784 // For integers we just assume the current locale uses the same number
1785 // representation as the C one as there is nothing else we can do.
1786 bool wxString::ToCLong(long *pVal
, int base
) const
1788 return ToLong(pVal
, base
);
1791 bool wxString::ToCULong(unsigned long *pVal
, int base
) const
1793 return ToULong(pVal
, base
);
1796 // For floating point numbers we have to handle the problem of the decimal
1797 // point which is different in different locales.
1798 bool wxString::ToCDouble(double *pVal
) const
1800 // Create a copy of this string using the decimal point instead of whatever
1801 // separator the current locale uses.
1803 wxString sep
= wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT
,
1804 wxLOCALE_CAT_NUMBER
);
1807 // We can avoid an unnecessary string copy in this case.
1808 return ToDouble(pVal
);
1810 #else // !wxUSE_INTL
1811 // We don't know what the current separator is so it might even be a point
1812 // already, try to parse the string as a double:
1813 if ( ToDouble(pVal
) )
1815 // It must have been the point, nothing else to do.
1819 // Try to guess the separator, using the most common alternative value.
1821 #endif // wxUSE_INTL/!wxUSE_INTL
1822 wxString
cstr(*this);
1823 cstr
.Replace(".", sep
);
1825 return cstr
.ToDouble(pVal
);
1828 #endif // wxUSE_XLOCALE/!wxUSE_XLOCALE
1830 // ----------------------------------------------------------------------------
1831 // number to string conversion
1832 // ----------------------------------------------------------------------------
1835 wxString
wxString::FromDouble(double val
, int precision
)
1837 wxCHECK_MSG( precision
>= -1, wxString(), "Invalid negative precision" );
1840 if ( precision
== -1 )
1844 else // Use fixed precision.
1846 format
.Printf("%%.%df", precision
);
1849 return wxString::Format(format
, val
);
1853 wxString
wxString::FromCDouble(double val
, int precision
)
1855 wxCHECK_MSG( precision
>= -1, wxString(), "Invalid negative precision" );
1857 #if wxUSE_STD_IOSTREAM && wxUSE_STD_STRING
1858 // We assume that we can use the ostream and not wstream for numbers.
1859 wxSTD ostringstream os
;
1860 if ( precision
!= -1 )
1862 os
.precision(precision
);
1863 os
.setf(std::ios::fixed
, std::ios::floatfield
);
1868 #else // !wxUSE_STD_IOSTREAM
1869 // Can't use iostream locale support, fall back to the manual method
1871 wxString s
= FromDouble(val
, precision
);
1873 wxString sep
= wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT
,
1874 wxLOCALE_CAT_NUMBER
);
1875 #else // !wxUSE_INTL
1876 // As above, this is the most common alternative value. Notice that here it
1877 // doesn't matter if we guess wrongly and the current separator is already
1878 // ".": we'll just waste a call to Replace() in this case.
1880 #endif // wxUSE_INTL/!wxUSE_INTL
1882 s
.Replace(sep
, ".");
1884 #endif // wxUSE_STD_IOSTREAM/!wxUSE_STD_IOSTREAM
1887 // ---------------------------------------------------------------------------
1889 // ---------------------------------------------------------------------------
1891 #if !wxUSE_UTF8_LOCALE_ONLY
1893 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1894 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1896 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1900 va_start(argptr
, format
);
1903 s
.PrintfV(format
, argptr
);
1909 #endif // !wxUSE_UTF8_LOCALE_ONLY
1911 #if wxUSE_UNICODE_UTF8
1913 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1916 va_start(argptr
, format
);
1919 s
.PrintfV(format
, argptr
);
1925 #endif // wxUSE_UNICODE_UTF8
1928 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1931 s
.PrintfV(format
, argptr
);
1935 #if !wxUSE_UTF8_LOCALE_ONLY
1936 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1937 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1939 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1943 va_start(argptr
, format
);
1945 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1946 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1947 // because it's the only cast that works safely for downcasting when
1948 // multiple inheritance is used:
1949 wxString
*str
= static_cast<wxString
*>(this);
1951 wxString
*str
= this;
1954 int iLen
= str
->PrintfV(format
, argptr
);
1960 #endif // !wxUSE_UTF8_LOCALE_ONLY
1962 #if wxUSE_UNICODE_UTF8
1963 int wxString::DoPrintfUtf8(const char *format
, ...)
1966 va_start(argptr
, format
);
1968 int iLen
= PrintfV(format
, argptr
);
1974 #endif // wxUSE_UNICODE_UTF8
1977 Uses wxVsnprintf and places the result into the this string.
1979 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1980 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1981 the ISO C99 (and thus SUSv3) standard the return value for the case of
1982 an undersized buffer is inconsistent. For conforming vsnprintf
1983 implementations the function must return the number of characters that
1984 would have been printed had the buffer been large enough. For conforming
1985 vswprintf implementations the function must return a negative number
1988 What vswprintf sets errno to is undefined but Darwin seems to set it to
1989 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1990 those are defined in the standard and backed up by several conformance
1991 statements. Note that ENOMEM mentioned in the manual page does not
1992 apply to swprintf, only wprintf and fwprintf.
1994 Official manual page:
1995 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1997 Some conformance statements (AIX, Solaris):
1998 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1999 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
2001 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
2002 EILSEQ and EINVAL are specifically defined to mean the error is other than
2003 an undersized buffer and no other errno are defined we treat those two
2004 as meaning hard errors and everything else gets the old behaviour which
2005 is to keep looping and increasing buffer size until the function succeeds.
2007 In practice it's impossible to determine before compilation which behaviour
2008 may be used. The vswprintf function may have vsnprintf-like behaviour or
2009 vice-versa. Behaviour detected on one release can theoretically change
2010 with an updated release. Not to mention that configure testing for it
2011 would require the test to be run on the host system, not the build system
2012 which makes cross compilation difficult. Therefore, we make no assumptions
2013 about behaviour and try our best to handle every known case, including the
2014 case where wxVsnprintf returns a negative number and fails to set errno.
2016 There is yet one more non-standard implementation and that is our own.
2017 Fortunately, that can be detected at compile-time.
2019 On top of all that, ISO C99 explicitly defines snprintf to write a null
2020 character to the last position of the specified buffer. That would be at
2021 at the given buffer size minus 1. It is supposed to do this even if it
2022 turns out that the buffer is sized too small.
2024 Darwin (tested on 10.5) follows the C99 behaviour exactly.
2026 Glibc 2.6 almost follows the C99 behaviour except vswprintf never sets
2027 errno even when it fails. However, it only seems to ever fail due
2028 to an undersized buffer.
2030 #if wxUSE_UNICODE_UTF8
2031 template<typename BufferType
>
2033 // we only need one version in non-UTF8 builds and at least two Windows
2034 // compilers have problems with this function template, so use just one
2035 // normal function here
2037 static int DoStringPrintfV(wxString
& str
,
2038 const wxString
& format
, va_list argptr
)
2044 #if wxUSE_UNICODE_UTF8
2045 BufferType
tmp(str
, size
+ 1);
2046 typename
BufferType::CharType
*buf
= tmp
;
2048 wxStringBuffer
tmp(str
, size
+ 1);
2058 // wxVsnprintf() may modify the original arg pointer, so pass it
2061 wxVaCopy(argptrcopy
, argptr
);
2064 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
2067 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
2070 // some implementations of vsnprintf() don't NUL terminate
2071 // the string if there is not enough space for it so
2072 // always do it manually
2073 // FIXME: This really seems to be the wrong and would be an off-by-one
2074 // bug except the code above allocates an extra character.
2075 buf
[size
] = wxT('\0');
2077 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
2078 // total number of characters which would have been written if the
2079 // buffer were large enough (newer standards such as Unix98)
2082 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
2083 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
2084 // is true if *both* of them use our own implementation,
2085 // otherwise we can't be sure
2086 #if wxUSE_WXVSNPRINTF
2087 // we know that our own implementation of wxVsnprintf() returns -1
2088 // only for a format error - thus there's something wrong with
2089 // the user's format string
2092 #else // possibly using system version
2093 // assume it only returns error if there is not enough space, but
2094 // as we don't know how much we need, double the current size of
2097 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
2098 // If errno was set to one of the two well-known hard errors
2099 // then fail immediately to avoid an infinite loop.
2102 #endif // __WXWINCE__
2103 // still not enough, as we don't know how much we need, double the
2104 // current size of the buffer
2106 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
2108 else if ( len
>= size
)
2110 #if wxUSE_WXVSNPRINTF
2111 // we know that our own implementation of wxVsnprintf() returns
2112 // size+1 when there's not enough space but that's not the size
2113 // of the required buffer!
2114 size
*= 2; // so we just double the current size of the buffer
2116 // some vsnprintf() implementations NUL-terminate the buffer and
2117 // some don't in len == size case, to be safe always add 1
2118 // FIXME: I don't quite understand this comment. The vsnprintf
2119 // function is specifically defined to return the number of
2120 // characters printed not including the null terminator.
2121 // So OF COURSE you need to add 1 to get the right buffer size.
2122 // The following line is definitely correct, no question.
2126 else // ok, there was enough space
2132 // we could have overshot
2135 return str
.length();
2138 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
2140 #if wxUSE_UNICODE_UTF8
2141 #if wxUSE_STL_BASED_WXSTRING
2142 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
2144 typedef wxStringInternalBuffer Utf8Buffer
;
2148 #if wxUSE_UTF8_LOCALE_ONLY
2149 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2151 #if wxUSE_UNICODE_UTF8
2152 if ( wxLocaleIsUtf8
)
2153 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2156 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
2158 return DoStringPrintfV(*this, format
, argptr
);
2159 #endif // UTF8/WCHAR
2163 // ----------------------------------------------------------------------------
2164 // misc other operations
2165 // ----------------------------------------------------------------------------
2167 // returns true if the string matches the pattern which may contain '*' and
2168 // '?' metacharacters (as usual, '?' matches any character and '*' any number
2170 bool wxString::Matches(const wxString
& mask
) const
2172 // I disable this code as it doesn't seem to be faster (in fact, it seems
2173 // to be much slower) than the old, hand-written code below and using it
2174 // here requires always linking with libregex even if the user code doesn't
2176 #if 0 // wxUSE_REGEX
2177 // first translate the shell-like mask into a regex
2179 pattern
.reserve(wxStrlen(pszMask
));
2181 pattern
+= wxT('^');
2187 pattern
+= wxT('.');
2191 pattern
+= wxT(".*");
2202 // these characters are special in a RE, quote them
2203 // (however note that we don't quote '[' and ']' to allow
2204 // using them for Unix shell like matching)
2205 pattern
+= wxT('\\');
2209 pattern
+= *pszMask
;
2214 pattern
+= wxT('$');
2217 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
2218 #else // !wxUSE_REGEX
2219 // TODO: this is, of course, awfully inefficient...
2221 // FIXME-UTF8: implement using iterators, remove #if
2222 #if wxUSE_UNICODE_UTF8
2223 const wxScopedWCharBuffer maskBuf
= mask
.wc_str();
2224 const wxScopedWCharBuffer txtBuf
= wc_str();
2225 const wxChar
*pszMask
= maskBuf
.data();
2226 const wxChar
*pszTxt
= txtBuf
.data();
2228 const wxChar
*pszMask
= mask
.wx_str();
2229 // the char currently being checked
2230 const wxChar
*pszTxt
= wx_str();
2233 // the last location where '*' matched
2234 const wxChar
*pszLastStarInText
= NULL
;
2235 const wxChar
*pszLastStarInMask
= NULL
;
2238 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
2239 switch ( *pszMask
) {
2241 if ( *pszTxt
== wxT('\0') )
2244 // pszTxt and pszMask will be incremented in the loop statement
2250 // remember where we started to be able to backtrack later
2251 pszLastStarInText
= pszTxt
;
2252 pszLastStarInMask
= pszMask
;
2254 // ignore special chars immediately following this one
2255 // (should this be an error?)
2256 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2259 // if there is nothing more, match
2260 if ( *pszMask
== wxT('\0') )
2263 // are there any other metacharacters in the mask?
2265 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2267 if ( pEndMask
!= NULL
) {
2268 // we have to match the string between two metachars
2269 uiLenMask
= pEndMask
- pszMask
;
2272 // we have to match the remainder of the string
2273 uiLenMask
= wxStrlen(pszMask
);
2276 wxString
strToMatch(pszMask
, uiLenMask
);
2277 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2278 if ( pMatch
== NULL
)
2281 // -1 to compensate "++" in the loop
2282 pszTxt
= pMatch
+ uiLenMask
- 1;
2283 pszMask
+= uiLenMask
- 1;
2288 if ( *pszMask
!= *pszTxt
)
2294 // match only if nothing left
2295 if ( *pszTxt
== wxT('\0') )
2298 // if we failed to match, backtrack if we can
2299 if ( pszLastStarInText
) {
2300 pszTxt
= pszLastStarInText
+ 1;
2301 pszMask
= pszLastStarInMask
;
2303 pszLastStarInText
= NULL
;
2305 // don't bother resetting pszLastStarInMask, it's unnecessary
2311 #endif // wxUSE_REGEX/!wxUSE_REGEX
2314 // Count the number of chars
2315 int wxString::Freq(wxUniChar ch
) const
2318 for ( const_iterator i
= begin(); i
!= end(); ++i
)