1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
40 #include "wx/hashmap.h"
41 #include "wx/vector.h"
42 #include "wx/xlocale.h"
45 #include "wx/msw/wrapwin.h"
48 #if wxUSE_STD_IOSTREAM
52 // string handling functions used by wxString:
53 #if wxUSE_UNICODE_UTF8
54 #define wxStringMemcpy memcpy
55 #define wxStringMemcmp memcmp
56 #define wxStringMemchr memchr
57 #define wxStringStrlen strlen
59 #define wxStringMemcpy wxTmemcpy
60 #define wxStringMemcmp wxTmemcmp
61 #define wxStringMemchr wxTmemchr
62 #define wxStringStrlen wxStrlen
65 // define a function declared in wx/buffer.h here as we don't have buffer.cpp
66 // and don't want to add it just because of this simple function
70 // wxXXXBuffer classes can be (implicitly) used during global statics
71 // initialization so wrap the status UntypedBufferData variable in a function
72 // to make it safe to access it even before all global statics are initialized
73 UntypedBufferData
*GetUntypedNullData()
75 static UntypedBufferData
s_untypedNullData(NULL
, 0);
77 return &s_untypedNullData
;
80 } // namespace wxPrivate
82 // ---------------------------------------------------------------------------
83 // static class variables definition
84 // ---------------------------------------------------------------------------
86 //According to STL _must_ be a -1 size_t
87 const size_t wxString::npos
= (size_t) -1;
89 #if wxUSE_STRING_POS_CACHE
91 #ifdef wxHAS_COMPILER_TLS
93 wxTLS_TYPE(wxString::Cache
) wxString::ms_cache
;
95 #else // !wxHAS_COMPILER_TLS
97 struct wxStrCacheInitializer
99 wxStrCacheInitializer()
101 // calling this function triggers s_cache initialization in it, and
102 // from now on it becomes safe to call from multiple threads
103 wxString::GetCache();
108 wxString::Cache& wxString::GetCache()
110 static wxTLS_TYPE(Cache) s_cache;
112 return wxTLS_VALUE(s_cache);
116 static wxStrCacheInitializer gs_stringCacheInit
;
118 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
120 // gdb seems to be unable to display thread-local variables correctly, at least
121 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
122 #if wxDEBUG_LEVEL >= 2
124 struct wxStrCacheDumper
126 static void ShowAll()
128 puts("*** wxString cache dump:");
129 for ( unsigned n
= 0; n
< wxString::Cache::SIZE
; n
++ )
131 const wxString::Cache::Element
&
132 c
= wxString::GetCacheBegin()[n
];
134 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
136 n
== wxString::LastUsedCacheElement() ? " [*]" : "",
138 (unsigned long)c
.pos
,
139 (unsigned long)c
.impl
,
145 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
147 #endif // wxDEBUG_LEVEL >= 2
149 #ifdef wxPROFILE_STRING_CACHE
151 wxString::CacheStats
wxString::ms_cacheStats
;
153 struct wxStrCacheStatsDumper
155 ~wxStrCacheStatsDumper()
157 const wxString::CacheStats
& stats
= wxString::ms_cacheStats
;
161 puts("*** wxString cache statistics:");
162 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
164 printf("\tHits %u (of which %u not used) or %.2f%%\n",
167 100.*float(stats
.poshits
- stats
.mishits
)/stats
.postot
);
168 printf("\tAverage position requested: %.2f\n",
169 float(stats
.sumpos
) / stats
.postot
);
170 printf("\tAverage offset after cached hint: %.2f\n",
171 float(stats
.sumofs
) / stats
.postot
);
176 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
177 stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
);
182 static wxStrCacheStatsDumper s_showCacheStats
;
184 #endif // wxPROFILE_STRING_CACHE
186 #endif // wxUSE_STRING_POS_CACHE
188 // ----------------------------------------------------------------------------
190 // ----------------------------------------------------------------------------
192 #if wxUSE_STD_IOSTREAM
196 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
198 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
199 const wxScopedCharBuffer
buf(str
.AsCharBuf());
201 os
.clear(wxSTD
ios_base::failbit
);
207 return os
<< str
.AsInternal();
211 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
213 return os
<< str
.c_str();
216 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedCharBuffer
& str
)
218 return os
<< str
.data();
222 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedWCharBuffer
& str
)
224 return os
<< str
.data();
228 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
230 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
232 return wos
<< str
.wc_str();
235 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
237 return wos
<< str
.AsWChar();
240 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxScopedWCharBuffer
& str
)
242 return wos
<< str
.data();
245 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
247 #endif // wxUSE_STD_IOSTREAM
249 // ===========================================================================
250 // wxString class core
251 // ===========================================================================
253 #if wxUSE_UNICODE_UTF8
255 void wxString::PosLenToImpl(size_t pos
, size_t len
,
256 size_t *implPos
, size_t *implLen
) const
262 else // have valid start position
264 const const_iterator b
= GetIterForNthChar(pos
);
265 *implPos
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin();
270 else // have valid length too
272 // we need to handle the case of length specifying a substring
273 // going beyond the end of the string, just as std::string does
274 const const_iterator
e(end());
276 while ( len
&& i
<= e
)
282 *implLen
= i
.impl() - b
.impl();
287 #endif // wxUSE_UNICODE_UTF8
289 // ----------------------------------------------------------------------------
290 // wxCStrData converted strings caching
291 // ----------------------------------------------------------------------------
293 // FIXME-UTF8: temporarily disabled because it doesn't work with global
294 // string objects; re-enable after fixing this bug and benchmarking
295 // performance to see if using a hash is a good idea at all
298 // For backward compatibility reasons, it must be possible to assign the value
299 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
300 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
301 // because the memory would be freed immediately, but it has to be valid as long
302 // as the string is not modified, so that code like this still works:
304 // const wxChar *s = str.c_str();
305 // while ( s ) { ... }
307 // FIXME-UTF8: not thread safe!
308 // FIXME-UTF8: we currently clear the cached conversion only when the string is
309 // destroyed, but we should do it when the string is modified, to
310 // keep memory usage down
311 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
312 // invalidated the cache on every change, we could keep the previous
314 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
315 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
318 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
320 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
321 if ( i
!= hash
.end() )
329 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
330 // so we have to use wxString* here and const-cast when used
331 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
332 wxStringCharConversionCache
);
333 static wxStringCharConversionCache gs_stringsCharCache
;
335 const char* wxCStrData::AsChar() const
337 // remove previously cache value, if any (see FIXMEs above):
338 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
340 // convert the string and keep it:
341 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
342 m_str
->mb_str().release();
346 #endif // wxUSE_UNICODE
348 #if !wxUSE_UNICODE_WCHAR
349 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
350 wxStringWCharConversionCache
);
351 static wxStringWCharConversionCache gs_stringsWCharCache
;
353 const wchar_t* wxCStrData::AsWChar() const
355 // remove previously cache value, if any (see FIXMEs above):
356 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
358 // convert the string and keep it:
359 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
360 m_str
->wc_str().release();
364 #endif // !wxUSE_UNICODE_WCHAR
366 wxString::~wxString()
369 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
370 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
372 #if !wxUSE_UNICODE_WCHAR
373 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
378 // ===========================================================================
379 // wxString class core
380 // ===========================================================================
382 // ---------------------------------------------------------------------------
383 // construction and conversion
384 // ---------------------------------------------------------------------------
386 #if wxUSE_UNICODE_WCHAR
388 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
389 const wxMBConv
& conv
)
392 if ( !psz
|| nLength
== 0 )
393 return SubstrBufFromMB(wxWCharBuffer(L
""), 0);
395 if ( nLength
== npos
)
399 wxScopedWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
401 return SubstrBufFromMB(wxWCharBuffer(L
""), 0);
403 return SubstrBufFromMB(wcBuf
, wcLen
);
405 #endif // wxUSE_UNICODE_WCHAR
407 #if wxUSE_UNICODE_UTF8
409 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
410 const wxMBConv
& conv
)
413 if ( !psz
|| nLength
== 0 )
414 return SubstrBufFromMB(wxCharBuffer(""), 0);
416 // if psz is already in UTF-8, we don't have to do the roundtrip to
417 // wchar_t* and back:
420 // we need to validate the input because UTF8 iterators assume valid
421 // UTF-8 sequence and psz may be invalid:
422 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
424 // we must pass the real string length to SubstrBufFromMB ctor
425 if ( nLength
== npos
)
426 nLength
= psz
? strlen(psz
) : 0;
427 return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz
, nLength
),
430 // else: do the roundtrip through wchar_t*
433 if ( nLength
== npos
)
436 // first convert to wide string:
438 wxScopedWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
440 return SubstrBufFromMB(wxCharBuffer(""), 0);
442 // and then to UTF-8:
443 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
444 // widechar -> UTF-8 conversion isn't supposed to ever fail:
445 wxASSERT_MSG( buf
.data
, wxT("conversion to UTF-8 failed") );
449 #endif // wxUSE_UNICODE_UTF8
451 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
453 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
454 const wxMBConv
& conv
)
457 if ( !pwz
|| nLength
== 0 )
458 return SubstrBufFromWC(wxCharBuffer(""), 0);
460 if ( nLength
== npos
)
464 wxScopedCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
466 return SubstrBufFromWC(wxCharBuffer(""), 0);
468 return SubstrBufFromWC(mbBuf
, mbLen
);
470 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
472 // This std::string::c_str()-like method returns a wide char pointer to string
473 // contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return
474 // a pointer to the internal representation. Otherwise a conversion is required
475 // and it returns a temporary buffer.
477 // However for compatibility with c_str() and to avoid breaking existing code
480 // for ( const wchar_t *p = s.wc_str(); *p; p++ )
483 // we actually need to ensure that the returned buffer is _not_ temporary and
484 // so we use wxString::m_convertedToWChar to store the returned data
485 #if !wxUSE_UNICODE_WCHAR
487 const wchar_t *wxString::AsWChar(const wxMBConv
& conv
) const
489 const char * const strMB
= m_impl
.c_str();
490 const size_t lenMB
= m_impl
.length();
492 // find out the size of the buffer needed
493 const size_t lenWC
= conv
.ToWChar(NULL
, 0, strMB
, lenMB
);
494 if ( lenWC
== wxCONV_FAILED
)
497 // keep the same buffer if the string size didn't change: this is not only
498 // an optimization but also ensure that code which modifies string
499 // character by character (without changing its length) can continue to use
500 // the pointer returned by a previous wc_str() call even after changing the
503 // TODO-UTF8: we could check for ">" instead of "!=" here as this would
504 // allow to save on buffer reallocations but at the cost of
505 // consuming (even) more memory, we should benchmark this to
506 // determine if it's worth doing
507 if ( !m_convertedToWChar
.m_str
|| lenWC
!= m_convertedToWChar
.m_len
)
509 if ( !const_cast<wxString
*>(this)->m_convertedToWChar
.Extend(lenWC
) )
513 // finally do convert
514 m_convertedToWChar
.m_str
[lenWC
] = L
'\0';
515 if ( conv
.ToWChar(m_convertedToWChar
.m_str
, lenWC
,
516 strMB
, lenMB
) == wxCONV_FAILED
)
519 return m_convertedToWChar
.m_str
;
522 #endif // !wxUSE_UNICODE_WCHAR
525 // Same thing for mb_str() which returns a normal char pointer to string
526 // contents: this always requires converting it to the specified encoding in
527 // non-ANSI build except if we need to convert to UTF-8 and this is what we
528 // already use internally.
531 const char *wxString::AsChar(const wxMBConv
& conv
) const
533 #if wxUSE_UNICODE_UTF8
535 return m_impl
.c_str();
537 const wchar_t * const strWC
= AsWChar(wxMBConvStrictUTF8());
538 const size_t lenWC
= m_convertedToWChar
.m_len
;
539 #else // wxUSE_UNICODE_WCHAR
540 const wchar_t * const strWC
= m_impl
.c_str();
541 const size_t lenWC
= m_impl
.length();
542 #endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR
544 const size_t lenMB
= conv
.FromWChar(NULL
, 0, strWC
, lenWC
);
545 if ( lenMB
== wxCONV_FAILED
)
548 if ( !m_convertedToChar
.m_str
|| lenMB
!= m_convertedToChar
.m_len
)
550 if ( !const_cast<wxString
*>(this)->m_convertedToChar
.Extend(lenMB
) )
554 m_convertedToChar
.m_str
[lenMB
] = '\0';
555 if ( conv
.FromWChar(m_convertedToChar
.m_str
, lenMB
,
556 strWC
, lenWC
) == wxCONV_FAILED
)
559 return m_convertedToChar
.m_str
;
562 #endif // wxUSE_UNICODE
564 // shrink to minimal size (releasing extra memory)
565 bool wxString::Shrink()
567 wxString
tmp(begin(), end());
569 return tmp
.length() == length();
572 // deprecated compatibility code:
573 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
574 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
576 return DoGetWriteBuf(nLen
);
579 void wxString::UngetWriteBuf()
584 void wxString::UngetWriteBuf(size_t nLen
)
586 DoUngetWriteBuf(nLen
);
588 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
591 // ---------------------------------------------------------------------------
593 // ---------------------------------------------------------------------------
595 // all functions are inline in string.h
597 // ---------------------------------------------------------------------------
598 // concatenation operators
599 // ---------------------------------------------------------------------------
602 * concatenation functions come in 5 flavours:
604 * char + string and string + char
605 * C str + string and string + C str
608 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
610 #if !wxUSE_STL_BASED_WXSTRING
611 wxASSERT( str1
.IsValid() );
612 wxASSERT( str2
.IsValid() );
621 wxString
operator+(const wxString
& str
, wxUniChar ch
)
623 #if !wxUSE_STL_BASED_WXSTRING
624 wxASSERT( str
.IsValid() );
633 wxString
operator+(wxUniChar ch
, const wxString
& str
)
635 #if !wxUSE_STL_BASED_WXSTRING
636 wxASSERT( str
.IsValid() );
645 wxString
operator+(const wxString
& str
, const char *psz
)
647 #if !wxUSE_STL_BASED_WXSTRING
648 wxASSERT( str
.IsValid() );
652 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
653 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
661 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
663 #if !wxUSE_STL_BASED_WXSTRING
664 wxASSERT( str
.IsValid() );
668 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
669 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
677 wxString
operator+(const char *psz
, const wxString
& str
)
679 #if !wxUSE_STL_BASED_WXSTRING
680 wxASSERT( str
.IsValid() );
684 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
685 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
693 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
695 #if !wxUSE_STL_BASED_WXSTRING
696 wxASSERT( str
.IsValid() );
700 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
701 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
709 // ---------------------------------------------------------------------------
711 // ---------------------------------------------------------------------------
713 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
715 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
716 : wxToupper(GetChar(0u)) == wxToupper(c
));
719 #ifdef HAVE_STD_STRING_COMPARE
721 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
722 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
723 // sort strings in characters code point order by sorting the byte sequence
724 // in byte values order (i.e. what strcmp() and memcmp() do).
726 int wxString::compare(const wxString
& str
) const
728 return m_impl
.compare(str
.m_impl
);
731 int wxString::compare(size_t nStart
, size_t nLen
,
732 const wxString
& str
) const
735 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
736 return m_impl
.compare(pos
, len
, str
.m_impl
);
739 int wxString::compare(size_t nStart
, size_t nLen
,
741 size_t nStart2
, size_t nLen2
) const
744 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
747 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
749 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
752 int wxString::compare(const char* sz
) const
754 return m_impl
.compare(ImplStr(sz
));
757 int wxString::compare(const wchar_t* sz
) const
759 return m_impl
.compare(ImplStr(sz
));
762 int wxString::compare(size_t nStart
, size_t nLen
,
763 const char* sz
, size_t nCount
) const
766 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
768 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
770 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
773 int wxString::compare(size_t nStart
, size_t nLen
,
774 const wchar_t* sz
, size_t nCount
) const
777 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
779 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
781 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
784 #else // !HAVE_STD_STRING_COMPARE
786 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
787 const wxStringCharType
* s2
, size_t l2
)
790 return wxStringMemcmp(s1
, s2
, l1
);
793 int ret
= wxStringMemcmp(s1
, s2
, l1
);
794 return ret
== 0 ? -1 : ret
;
798 int ret
= wxStringMemcmp(s1
, s2
, l2
);
799 return ret
== 0 ? +1 : ret
;
803 int wxString::compare(const wxString
& str
) const
805 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
806 str
.m_impl
.data(), str
.m_impl
.length());
809 int wxString::compare(size_t nStart
, size_t nLen
,
810 const wxString
& str
) const
812 wxASSERT(nStart
<= length());
813 size_type strLen
= length() - nStart
;
814 nLen
= strLen
< nLen
? strLen
: nLen
;
817 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
819 return ::wxDoCmp(m_impl
.data() + pos
, len
,
820 str
.m_impl
.data(), str
.m_impl
.length());
823 int wxString::compare(size_t nStart
, size_t nLen
,
825 size_t nStart2
, size_t nLen2
) const
827 wxASSERT(nStart
<= length());
828 wxASSERT(nStart2
<= str
.length());
829 size_type strLen
= length() - nStart
,
830 strLen2
= str
.length() - nStart2
;
831 nLen
= strLen
< nLen
? strLen
: nLen
;
832 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
835 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
837 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
839 return ::wxDoCmp(m_impl
.data() + pos
, len
,
840 str
.m_impl
.data() + pos2
, len2
);
843 int wxString::compare(const char* sz
) const
845 SubstrBufFromMB
str(ImplStr(sz
, npos
));
846 if ( str
.len
== npos
)
847 str
.len
= wxStringStrlen(str
.data
);
848 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
851 int wxString::compare(const wchar_t* sz
) const
853 SubstrBufFromWC
str(ImplStr(sz
, npos
));
854 if ( str
.len
== npos
)
855 str
.len
= wxStringStrlen(str
.data
);
856 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
859 int wxString::compare(size_t nStart
, size_t nLen
,
860 const char* sz
, size_t nCount
) const
862 wxASSERT(nStart
<= length());
863 size_type strLen
= length() - nStart
;
864 nLen
= strLen
< nLen
? strLen
: nLen
;
867 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
869 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
870 if ( str
.len
== npos
)
871 str
.len
= wxStringStrlen(str
.data
);
873 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
876 int wxString::compare(size_t nStart
, size_t nLen
,
877 const wchar_t* sz
, size_t nCount
) const
879 wxASSERT(nStart
<= length());
880 size_type strLen
= length() - nStart
;
881 nLen
= strLen
< nLen
? strLen
: nLen
;
884 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
886 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
887 if ( str
.len
== npos
)
888 str
.len
= wxStringStrlen(str
.data
);
890 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
893 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
896 // ---------------------------------------------------------------------------
897 // find_{first,last}_[not]_of functions
898 // ---------------------------------------------------------------------------
900 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
902 // NB: All these functions are implemented with the argument being wxChar*,
903 // i.e. widechar string in any Unicode build, even though native string
904 // representation is char* in the UTF-8 build. This is because we couldn't
905 // use memchr() to determine if a character is in a set encoded as UTF-8.
907 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
909 return find_first_of(sz
, nStart
, wxStrlen(sz
));
912 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
914 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
917 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
919 wxASSERT_MSG( nStart
<= length(), wxT("invalid index") );
922 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
924 if ( wxTmemchr(sz
, *i
, n
) )
931 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
933 wxASSERT_MSG( nStart
<= length(), wxT("invalid index") );
936 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
938 if ( !wxTmemchr(sz
, *i
, n
) )
946 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
948 return find_last_of(sz
, nStart
, wxStrlen(sz
));
951 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
953 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
956 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
958 size_t len
= length();
960 if ( nStart
== npos
)
966 wxASSERT_MSG( nStart
<= len
, wxT("invalid index") );
970 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
971 i
!= rend(); --idx
, ++i
)
973 if ( wxTmemchr(sz
, *i
, n
) )
980 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
982 size_t len
= length();
984 if ( nStart
== npos
)
990 wxASSERT_MSG( nStart
<= len
, wxT("invalid index") );
994 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
995 i
!= rend(); --idx
, ++i
)
997 if ( !wxTmemchr(sz
, *i
, n
) )
1004 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
1006 wxASSERT_MSG( nStart
<= length(), wxT("invalid index") );
1008 size_t idx
= nStart
;
1009 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1018 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1020 size_t len
= length();
1022 if ( nStart
== npos
)
1028 wxASSERT_MSG( nStart
<= len
, wxT("invalid index") );
1031 size_t idx
= nStart
;
1032 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1033 i
!= rend(); --idx
, ++i
)
1042 // the functions above were implemented for wchar_t* arguments in Unicode
1043 // build and char* in ANSI build; below are implementations for the other
1046 #define wxOtherCharType char
1047 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1049 #define wxOtherCharType wchar_t
1050 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1053 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1054 { return find_first_of(STRCONV(sz
), nStart
); }
1056 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1058 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1059 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1060 { return find_last_of(STRCONV(sz
), nStart
); }
1061 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1063 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1064 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1065 { return find_first_not_of(STRCONV(sz
), nStart
); }
1066 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1068 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1069 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1070 { return find_last_not_of(STRCONV(sz
), nStart
); }
1071 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1073 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1075 #undef wxOtherCharType
1078 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1080 // ===========================================================================
1081 // other common string functions
1082 // ===========================================================================
1084 int wxString::CmpNoCase(const wxString
& s
) const
1086 #if !wxUSE_UNICODE_UTF8
1087 // We compare NUL-delimited chunks of the strings inside the loop. We will
1088 // do as many iterations as there are embedded NULs in the string, i.e.
1089 // usually we will run it just once.
1091 typedef const wxStringImpl::value_type
*pchar_type
;
1092 const pchar_type thisBegin
= m_impl
.c_str();
1093 const pchar_type thatBegin
= s
.m_impl
.c_str();
1095 const pchar_type thisEnd
= thisBegin
+ m_impl
.length();
1096 const pchar_type thatEnd
= thatBegin
+ s
.m_impl
.length();
1098 pchar_type thisCur
= thisBegin
;
1099 pchar_type thatCur
= thatBegin
;
1104 // Compare until the next NUL, if the strings differ this is the final
1106 rc
= wxStricmp(thisCur
, thatCur
);
1110 const size_t lenChunk
= wxStrlen(thisCur
);
1111 thisCur
+= lenChunk
;
1112 thatCur
+= lenChunk
;
1114 // Skip all the NULs as wxStricmp() doesn't handle them.
1115 for ( ; !*thisCur
; thisCur
++, thatCur
++ )
1117 // Check if we exhausted either of the strings.
1118 if ( thisCur
== thisEnd
)
1120 // This one is exhausted, is the other one too?
1121 return thatCur
== thatEnd
? 0 : -1;
1124 if ( thatCur
== thatEnd
)
1126 // Because of the test above we know that this one is not
1127 // exhausted yet so it's greater than the other one that is.
1133 // Anything non-NUL is greater than NUL.
1140 #else // wxUSE_UNICODE_UTF8
1141 // CRT functions can't be used for case-insensitive comparison of UTF-8
1142 // strings so do it in the naive, simple and inefficient way.
1144 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1145 const_iterator i1
= begin();
1146 const_iterator end1
= end();
1147 const_iterator i2
= s
.begin();
1148 const_iterator end2
= s
.end();
1150 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1152 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1153 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1154 if ( lower1
!= lower2
)
1155 return lower1
< lower2
? -1 : 1;
1158 size_t len1
= length();
1159 size_t len2
= s
.length();
1163 else if ( len1
> len2
)
1166 #endif // !wxUSE_UNICODE_UTF8/wxUSE_UNICODE_UTF8
1173 #ifndef __SCHAR_MAX__
1174 #define __SCHAR_MAX__ 127
1178 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1180 if (!ascii
|| len
== 0)
1181 return wxEmptyString
;
1186 wxStringInternalBuffer
buf(res
, len
);
1187 wxStringCharType
*dest
= buf
;
1189 for ( ; len
> 0; --len
)
1191 unsigned char c
= (unsigned char)*ascii
++;
1192 wxASSERT_MSG( c
< 0x80,
1193 wxT("Non-ASCII value passed to FromAscii().") );
1195 *dest
++ = (wchar_t)c
;
1202 wxString
wxString::FromAscii(const char *ascii
)
1204 return FromAscii(ascii
, wxStrlen(ascii
));
1207 wxString
wxString::FromAscii(char ascii
)
1209 // What do we do with '\0' ?
1211 unsigned char c
= (unsigned char)ascii
;
1213 wxASSERT_MSG( c
< 0x80, wxT("Non-ASCII value passed to FromAscii().") );
1215 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1216 return wxString(wxUniChar((wchar_t)c
));
1219 const wxScopedCharBuffer
wxString::ToAscii() const
1221 // this will allocate enough space for the terminating NUL too
1222 wxCharBuffer
buffer(length());
1223 char *dest
= buffer
.data();
1225 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1228 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1229 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1231 // the output string can't have embedded NULs anyhow, so we can safely
1232 // stop at first of them even if we do have any
1240 #endif // wxUSE_UNICODE
1242 // extract string of length nCount starting at nFirst
1243 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1245 size_t nLen
= length();
1247 // default value of nCount is npos and means "till the end"
1248 if ( nCount
== npos
)
1250 nCount
= nLen
- nFirst
;
1253 // out-of-bounds requests return sensible things
1254 if ( nFirst
+ nCount
> nLen
)
1256 nCount
= nLen
- nFirst
;
1259 if ( nFirst
> nLen
)
1261 // AllocCopy() will return empty string
1262 return wxEmptyString
;
1265 wxString
dest(*this, nFirst
, nCount
);
1266 if ( dest
.length() != nCount
)
1268 wxFAIL_MSG( wxT("out of memory in wxString::Mid") );
1274 // check that the string starts with prefix and return the rest of the string
1275 // in the provided pointer if it is not NULL, otherwise return false
1276 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1278 if ( compare(0, prefix
.length(), prefix
) != 0 )
1283 // put the rest of the string into provided pointer
1284 rest
->assign(*this, prefix
.length(), npos
);
1291 // check that the string ends with suffix and return the rest of it in the
1292 // provided pointer if it is not NULL, otherwise return false
1293 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1295 int start
= length() - suffix
.length();
1297 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1302 // put the rest of the string into provided pointer
1303 rest
->assign(*this, 0, start
);
1310 // extract nCount last (rightmost) characters
1311 wxString
wxString::Right(size_t nCount
) const
1313 if ( nCount
> length() )
1316 wxString
dest(*this, length() - nCount
, nCount
);
1317 if ( dest
.length() != nCount
) {
1318 wxFAIL_MSG( wxT("out of memory in wxString::Right") );
1323 // get all characters after the last occurrence of ch
1324 // (returns the whole string if ch not found)
1325 wxString
wxString::AfterLast(wxUniChar ch
) const
1328 int iPos
= Find(ch
, true);
1329 if ( iPos
== wxNOT_FOUND
)
1332 str
.assign(*this, iPos
+ 1, npos
);
1337 // extract nCount first (leftmost) characters
1338 wxString
wxString::Left(size_t nCount
) const
1340 if ( nCount
> length() )
1343 wxString
dest(*this, 0, nCount
);
1344 if ( dest
.length() != nCount
) {
1345 wxFAIL_MSG( wxT("out of memory in wxString::Left") );
1350 // get all characters before the first occurrence of ch
1351 // (returns the whole string if ch not found)
1352 wxString
wxString::BeforeFirst(wxUniChar ch
, wxString
*rest
) const
1354 int iPos
= Find(ch
);
1355 if ( iPos
== wxNOT_FOUND
)
1364 rest
->assign(*this, iPos
+ 1, npos
);
1367 return wxString(*this, 0, iPos
);
1370 /// get all characters before the last occurrence of ch
1371 /// (returns empty string if ch not found)
1372 wxString
wxString::BeforeLast(wxUniChar ch
, wxString
*rest
) const
1375 int iPos
= Find(ch
, true);
1376 if ( iPos
!= wxNOT_FOUND
)
1379 str
.assign(*this, 0, iPos
);
1382 rest
->assign(*this, iPos
+ 1, npos
);
1393 /// get all characters after the first occurrence of ch
1394 /// (returns empty string if ch not found)
1395 wxString
wxString::AfterFirst(wxUniChar ch
) const
1398 int iPos
= Find(ch
);
1399 if ( iPos
!= wxNOT_FOUND
)
1400 str
.assign(*this, iPos
+ 1, npos
);
1405 // replace first (or all) occurrences of some substring with another one
1406 size_t wxString::Replace(const wxString
& strOld
,
1407 const wxString
& strNew
, bool bReplaceAll
)
1409 // if we tried to replace an empty string we'd enter an infinite loop below
1410 wxCHECK_MSG( !strOld
.empty(), 0,
1411 wxT("wxString::Replace(): invalid parameter") );
1413 wxSTRING_INVALIDATE_CACHE();
1415 size_t uiCount
= 0; // count of replacements made
1417 // optimize the special common case: replacement of one character by
1418 // another one (in UTF-8 case we can only do this for ASCII characters)
1420 // benchmarks show that this special version is around 3 times faster
1421 // (depending on the proportion of matching characters and UTF-8/wchar_t
1423 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1425 const wxStringCharType chOld
= strOld
.m_impl
[0],
1426 chNew
= strNew
.m_impl
[0];
1428 // this loop is the simplified version of the one below
1429 for ( size_t pos
= 0; ; )
1431 pos
= m_impl
.find(chOld
, pos
);
1435 m_impl
[pos
++] = chNew
;
1443 else if ( !bReplaceAll
)
1445 size_t pos
= m_impl
.find(strOld
, 0);
1448 m_impl
.replace(pos
, strOld
.m_impl
.length(), strNew
.m_impl
);
1452 else // replace all occurrences
1454 const size_t uiOldLen
= strOld
.m_impl
.length();
1455 const size_t uiNewLen
= strNew
.m_impl
.length();
1457 // first scan the string to find all positions at which the replacement
1459 wxVector
<size_t> replacePositions
;
1462 for ( pos
= m_impl
.find(strOld
.m_impl
, 0);
1464 pos
= m_impl
.find(strOld
.m_impl
, pos
+ uiOldLen
))
1466 replacePositions
.push_back(pos
);
1473 // allocate enough memory for the whole new string
1475 tmp
.m_impl
.reserve(m_impl
.length() + uiCount
*(uiNewLen
- uiOldLen
));
1477 // copy this string to tmp doing replacements on the fly
1479 for ( pos
= 0; replNum
< uiCount
; replNum
++ )
1481 const size_t nextReplPos
= replacePositions
[replNum
];
1483 if ( pos
!= nextReplPos
)
1485 tmp
.m_impl
.append(m_impl
, pos
, nextReplPos
- pos
);
1488 tmp
.m_impl
.append(strNew
.m_impl
);
1489 pos
= nextReplPos
+ uiOldLen
;
1492 if ( pos
!= m_impl
.length() )
1494 // append the rest of the string unchanged
1495 tmp
.m_impl
.append(m_impl
, pos
, m_impl
.length() - pos
);
1504 bool wxString::IsAscii() const
1506 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1508 if ( !(*i
).IsAscii() )
1515 bool wxString::IsWord() const
1517 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1519 if ( !wxIsalpha(*i
) )
1526 bool wxString::IsNumber() const
1531 const_iterator i
= begin();
1533 if ( *i
== wxT('-') || *i
== wxT('+') )
1536 for ( ; i
!= end(); ++i
)
1538 if ( !wxIsdigit(*i
) )
1545 wxString
wxString::Strip(stripType w
) const
1548 if ( w
& leading
) s
.Trim(false);
1549 if ( w
& trailing
) s
.Trim(true);
1553 // ---------------------------------------------------------------------------
1555 // ---------------------------------------------------------------------------
1557 wxString
& wxString::MakeUpper()
1559 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1560 *it
= (wxChar
)wxToupper(*it
);
1565 wxString
& wxString::MakeLower()
1567 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1568 *it
= (wxChar
)wxTolower(*it
);
1573 wxString
& wxString::MakeCapitalized()
1575 const iterator en
= end();
1576 iterator it
= begin();
1579 *it
= (wxChar
)wxToupper(*it
);
1580 for ( ++it
; it
!= en
; ++it
)
1581 *it
= (wxChar
)wxTolower(*it
);
1587 // ---------------------------------------------------------------------------
1588 // trimming and padding
1589 // ---------------------------------------------------------------------------
1591 // some compilers (VC++ 6.0 not to name them) return true for a call to
1592 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1593 // to live with this by checking that the character is a 7 bit one - even if
1594 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1595 // space-like symbols somewhere except in the first 128 chars), it is arguably
1596 // still better than trimming away accented letters
1597 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1599 // trims spaces (in the sense of isspace) from left or right side
1600 wxString
& wxString::Trim(bool bFromRight
)
1602 // first check if we're going to modify the string at all
1605 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1606 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1612 // find last non-space character
1613 reverse_iterator psz
= rbegin();
1614 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1617 // truncate at trailing space start
1618 erase(psz
.base(), end());
1622 // find first non-space character
1623 iterator psz
= begin();
1624 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1627 // fix up data and length
1628 erase(begin(), psz
);
1635 // adds nCount characters chPad to the string from either side
1636 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1638 wxString
s(chPad
, nCount
);
1651 // truncate the string
1652 wxString
& wxString::Truncate(size_t uiLen
)
1654 if ( uiLen
< length() )
1656 erase(begin() + uiLen
, end());
1658 //else: nothing to do, string is already short enough
1663 // ---------------------------------------------------------------------------
1664 // finding (return wxNOT_FOUND if not found and index otherwise)
1665 // ---------------------------------------------------------------------------
1668 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1670 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1672 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1675 // ----------------------------------------------------------------------------
1676 // conversion to numbers
1677 // ----------------------------------------------------------------------------
1679 // The implementation of all the functions below is exactly the same so factor
1680 // it out. Note that number extraction works correctly on UTF-8 strings, so
1681 // we can use wxStringCharType and wx_str() for maximum efficiency.
1684 #define DO_IF_NOT_WINCE(x) x
1686 #define DO_IF_NOT_WINCE(x)
1689 #define WX_STRING_TO_X_TYPE_START \
1690 wxCHECK_MSG( pVal, false, wxT("NULL output pointer") ); \
1691 DO_IF_NOT_WINCE( errno = 0; ) \
1692 const wxStringCharType *start = wx_str(); \
1693 wxStringCharType *end;
1695 // notice that we return false without modifying the output parameter at all if
1696 // nothing could be parsed but we do modify it and return false then if we did
1697 // parse something successfully but not the entire string
1698 #define WX_STRING_TO_X_TYPE_END \
1699 if ( end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1704 bool wxString::ToLong(long *pVal
, int base
) const
1706 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1708 WX_STRING_TO_X_TYPE_START
1709 long val
= wxStrtol(start
, &end
, base
);
1710 WX_STRING_TO_X_TYPE_END
1713 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1715 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1717 WX_STRING_TO_X_TYPE_START
1718 unsigned long val
= wxStrtoul(start
, &end
, base
);
1719 WX_STRING_TO_X_TYPE_END
1722 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1724 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1726 WX_STRING_TO_X_TYPE_START
1727 wxLongLong_t val
= wxStrtoll(start
, &end
, base
);
1728 WX_STRING_TO_X_TYPE_END
1731 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1733 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1735 WX_STRING_TO_X_TYPE_START
1736 wxULongLong_t val
= wxStrtoull(start
, &end
, base
);
1737 WX_STRING_TO_X_TYPE_END
1740 bool wxString::ToDouble(double *pVal
) const
1742 WX_STRING_TO_X_TYPE_START
1743 double val
= wxStrtod(start
, &end
);
1744 WX_STRING_TO_X_TYPE_END
1749 bool wxString::ToCLong(long *pVal
, int base
) const
1751 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1753 WX_STRING_TO_X_TYPE_START
1754 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1755 long val
= wxStrtol_lA(start
, &end
, base
, wxCLocale
);
1757 long val
= wxStrtol_l(start
, &end
, base
, wxCLocale
);
1759 WX_STRING_TO_X_TYPE_END
1762 bool wxString::ToCULong(unsigned long *pVal
, int base
) const
1764 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1766 WX_STRING_TO_X_TYPE_START
1767 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1768 unsigned long val
= wxStrtoul_lA(start
, &end
, base
, wxCLocale
);
1770 unsigned long val
= wxStrtoul_l(start
, &end
, base
, wxCLocale
);
1772 WX_STRING_TO_X_TYPE_END
1775 bool wxString::ToCDouble(double *pVal
) const
1777 WX_STRING_TO_X_TYPE_START
1778 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1779 double val
= wxStrtod_lA(start
, &end
, wxCLocale
);
1781 double val
= wxStrtod_l(start
, &end
, wxCLocale
);
1783 WX_STRING_TO_X_TYPE_END
1786 #else // wxUSE_XLOCALE
1788 // Provide implementation of these functions even when wxUSE_XLOCALE is
1789 // disabled, we still need them in wxWidgets internal code.
1791 // For integers we just assume the current locale uses the same number
1792 // representation as the C one as there is nothing else we can do.
1793 bool wxString::ToCLong(long *pVal
, int base
) const
1795 return ToLong(pVal
, base
);
1798 bool wxString::ToCULong(unsigned long *pVal
, int base
) const
1800 return ToULong(pVal
, base
);
1803 // For floating point numbers we have to handle the problem of the decimal
1804 // point which is different in different locales.
1805 bool wxString::ToCDouble(double *pVal
) const
1807 // Create a copy of this string using the decimal point instead of whatever
1808 // separator the current locale uses.
1810 wxString sep
= wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT
,
1811 wxLOCALE_CAT_NUMBER
);
1814 // We can avoid an unnecessary string copy in this case.
1815 return ToDouble(pVal
);
1817 #else // !wxUSE_INTL
1818 // We don't know what the current separator is so it might even be a point
1819 // already, try to parse the string as a double:
1820 if ( ToDouble(pVal
) )
1822 // It must have been the point, nothing else to do.
1826 // Try to guess the separator, using the most common alternative value.
1828 #endif // wxUSE_INTL/!wxUSE_INTL
1829 wxString
cstr(*this);
1830 cstr
.Replace(".", sep
);
1832 return cstr
.ToDouble(pVal
);
1835 #endif // wxUSE_XLOCALE/!wxUSE_XLOCALE
1837 // ----------------------------------------------------------------------------
1838 // number to string conversion
1839 // ----------------------------------------------------------------------------
1842 wxString
wxString::FromCDouble(double val
)
1844 #if wxUSE_STD_IOSTREAM && wxUSE_STD_STRING
1845 // We assume that we can use the ostream and not wstream for numbers.
1846 wxSTD ostringstream os
;
1849 #else // wxUSE_STD_IOSTREAM
1850 // Can't use iostream locale support, fall back to the manual method
1852 wxString s
= FromDouble(val
);
1854 wxString sep
= wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT
,
1855 wxLOCALE_CAT_NUMBER
);
1856 #else // !wxUSE_INTL
1857 // As above, this is the most common alternative value. Notice that here it
1858 // doesn't matter if we guess wrongly and the current separator is already
1859 // ".": we'll just waste a call to Replace() in this case.
1861 #endif // wxUSE_INTL/!wxUSE_INTL
1863 s
.Replace(sep
, ".");
1865 #endif // wxUSE_STD_IOSTREAM/!wxUSE_STD_IOSTREAM
1868 // ---------------------------------------------------------------------------
1870 // ---------------------------------------------------------------------------
1872 #if !wxUSE_UTF8_LOCALE_ONLY
1874 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1875 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1877 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1881 va_start(argptr
, format
);
1884 s
.PrintfV(format
, argptr
);
1890 #endif // !wxUSE_UTF8_LOCALE_ONLY
1892 #if wxUSE_UNICODE_UTF8
1894 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1897 va_start(argptr
, format
);
1900 s
.PrintfV(format
, argptr
);
1906 #endif // wxUSE_UNICODE_UTF8
1909 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1912 s
.PrintfV(format
, argptr
);
1916 #if !wxUSE_UTF8_LOCALE_ONLY
1917 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1918 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1920 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1924 va_start(argptr
, format
);
1926 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1927 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1928 // because it's the only cast that works safely for downcasting when
1929 // multiple inheritance is used:
1930 wxString
*str
= static_cast<wxString
*>(this);
1932 wxString
*str
= this;
1935 int iLen
= str
->PrintfV(format
, argptr
);
1941 #endif // !wxUSE_UTF8_LOCALE_ONLY
1943 #if wxUSE_UNICODE_UTF8
1944 int wxString::DoPrintfUtf8(const char *format
, ...)
1947 va_start(argptr
, format
);
1949 int iLen
= PrintfV(format
, argptr
);
1955 #endif // wxUSE_UNICODE_UTF8
1958 Uses wxVsnprintf and places the result into the this string.
1960 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1961 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1962 the ISO C99 (and thus SUSv3) standard the return value for the case of
1963 an undersized buffer is inconsistent. For conforming vsnprintf
1964 implementations the function must return the number of characters that
1965 would have been printed had the buffer been large enough. For conforming
1966 vswprintf implementations the function must return a negative number
1969 What vswprintf sets errno to is undefined but Darwin seems to set it to
1970 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1971 those are defined in the standard and backed up by several conformance
1972 statements. Note that ENOMEM mentioned in the manual page does not
1973 apply to swprintf, only wprintf and fwprintf.
1975 Official manual page:
1976 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1978 Some conformance statements (AIX, Solaris):
1979 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1980 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1982 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1983 EILSEQ and EINVAL are specifically defined to mean the error is other than
1984 an undersized buffer and no other errno are defined we treat those two
1985 as meaning hard errors and everything else gets the old behavior which
1986 is to keep looping and increasing buffer size until the function succeeds.
1988 In practice it's impossible to determine before compilation which behavior
1989 may be used. The vswprintf function may have vsnprintf-like behavior or
1990 vice-versa. Behavior detected on one release can theoretically change
1991 with an updated release. Not to mention that configure testing for it
1992 would require the test to be run on the host system, not the build system
1993 which makes cross compilation difficult. Therefore, we make no assumptions
1994 about behavior and try our best to handle every known case, including the
1995 case where wxVsnprintf returns a negative number and fails to set errno.
1997 There is yet one more non-standard implementation and that is our own.
1998 Fortunately, that can be detected at compile-time.
2000 On top of all that, ISO C99 explicitly defines snprintf to write a null
2001 character to the last position of the specified buffer. That would be at
2002 at the given buffer size minus 1. It is supposed to do this even if it
2003 turns out that the buffer is sized too small.
2005 Darwin (tested on 10.5) follows the C99 behavior exactly.
2007 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
2008 errno even when it fails. However, it only seems to ever fail due
2009 to an undersized buffer.
2011 #if wxUSE_UNICODE_UTF8
2012 template<typename BufferType
>
2014 // we only need one version in non-UTF8 builds and at least two Windows
2015 // compilers have problems with this function template, so use just one
2016 // normal function here
2018 static int DoStringPrintfV(wxString
& str
,
2019 const wxString
& format
, va_list argptr
)
2025 #if wxUSE_UNICODE_UTF8
2026 BufferType
tmp(str
, size
+ 1);
2027 typename
BufferType::CharType
*buf
= tmp
;
2029 wxStringBuffer
tmp(str
, size
+ 1);
2039 // wxVsnprintf() may modify the original arg pointer, so pass it
2042 wxVaCopy(argptrcopy
, argptr
);
2045 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
2048 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
2051 // some implementations of vsnprintf() don't NUL terminate
2052 // the string if there is not enough space for it so
2053 // always do it manually
2054 // FIXME: This really seems to be the wrong and would be an off-by-one
2055 // bug except the code above allocates an extra character.
2056 buf
[size
] = wxT('\0');
2058 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
2059 // total number of characters which would have been written if the
2060 // buffer were large enough (newer standards such as Unix98)
2063 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
2064 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
2065 // is true if *both* of them use our own implementation,
2066 // otherwise we can't be sure
2067 #if wxUSE_WXVSNPRINTF
2068 // we know that our own implementation of wxVsnprintf() returns -1
2069 // only for a format error - thus there's something wrong with
2070 // the user's format string
2073 #else // possibly using system version
2074 // assume it only returns error if there is not enough space, but
2075 // as we don't know how much we need, double the current size of
2078 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
2079 // If errno was set to one of the two well-known hard errors
2080 // then fail immediately to avoid an infinite loop.
2083 #endif // __WXWINCE__
2084 // still not enough, as we don't know how much we need, double the
2085 // current size of the buffer
2087 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
2089 else if ( len
>= size
)
2091 #if wxUSE_WXVSNPRINTF
2092 // we know that our own implementation of wxVsnprintf() returns
2093 // size+1 when there's not enough space but that's not the size
2094 // of the required buffer!
2095 size
*= 2; // so we just double the current size of the buffer
2097 // some vsnprintf() implementations NUL-terminate the buffer and
2098 // some don't in len == size case, to be safe always add 1
2099 // FIXME: I don't quite understand this comment. The vsnprintf
2100 // function is specifically defined to return the number of
2101 // characters printed not including the null terminator.
2102 // So OF COURSE you need to add 1 to get the right buffer size.
2103 // The following line is definitely correct, no question.
2107 else // ok, there was enough space
2113 // we could have overshot
2116 return str
.length();
2119 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
2121 #if wxUSE_UNICODE_UTF8
2122 #if wxUSE_STL_BASED_WXSTRING
2123 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
2125 typedef wxStringInternalBuffer Utf8Buffer
;
2129 #if wxUSE_UTF8_LOCALE_ONLY
2130 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2132 #if wxUSE_UNICODE_UTF8
2133 if ( wxLocaleIsUtf8
)
2134 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2137 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
2139 return DoStringPrintfV(*this, format
, argptr
);
2140 #endif // UTF8/WCHAR
2144 // ----------------------------------------------------------------------------
2145 // misc other operations
2146 // ----------------------------------------------------------------------------
2148 // returns true if the string matches the pattern which may contain '*' and
2149 // '?' metacharacters (as usual, '?' matches any character and '*' any number
2151 bool wxString::Matches(const wxString
& mask
) const
2153 // I disable this code as it doesn't seem to be faster (in fact, it seems
2154 // to be much slower) than the old, hand-written code below and using it
2155 // here requires always linking with libregex even if the user code doesn't
2157 #if 0 // wxUSE_REGEX
2158 // first translate the shell-like mask into a regex
2160 pattern
.reserve(wxStrlen(pszMask
));
2162 pattern
+= wxT('^');
2168 pattern
+= wxT('.');
2172 pattern
+= wxT(".*");
2183 // these characters are special in a RE, quote them
2184 // (however note that we don't quote '[' and ']' to allow
2185 // using them for Unix shell like matching)
2186 pattern
+= wxT('\\');
2190 pattern
+= *pszMask
;
2195 pattern
+= wxT('$');
2198 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
2199 #else // !wxUSE_REGEX
2200 // TODO: this is, of course, awfully inefficient...
2202 // FIXME-UTF8: implement using iterators, remove #if
2203 #if wxUSE_UNICODE_UTF8
2204 const wxScopedWCharBuffer maskBuf
= mask
.wc_str();
2205 const wxScopedWCharBuffer txtBuf
= wc_str();
2206 const wxChar
*pszMask
= maskBuf
.data();
2207 const wxChar
*pszTxt
= txtBuf
.data();
2209 const wxChar
*pszMask
= mask
.wx_str();
2210 // the char currently being checked
2211 const wxChar
*pszTxt
= wx_str();
2214 // the last location where '*' matched
2215 const wxChar
*pszLastStarInText
= NULL
;
2216 const wxChar
*pszLastStarInMask
= NULL
;
2219 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
2220 switch ( *pszMask
) {
2222 if ( *pszTxt
== wxT('\0') )
2225 // pszTxt and pszMask will be incremented in the loop statement
2231 // remember where we started to be able to backtrack later
2232 pszLastStarInText
= pszTxt
;
2233 pszLastStarInMask
= pszMask
;
2235 // ignore special chars immediately following this one
2236 // (should this be an error?)
2237 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2240 // if there is nothing more, match
2241 if ( *pszMask
== wxT('\0') )
2244 // are there any other metacharacters in the mask?
2246 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2248 if ( pEndMask
!= NULL
) {
2249 // we have to match the string between two metachars
2250 uiLenMask
= pEndMask
- pszMask
;
2253 // we have to match the remainder of the string
2254 uiLenMask
= wxStrlen(pszMask
);
2257 wxString
strToMatch(pszMask
, uiLenMask
);
2258 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2259 if ( pMatch
== NULL
)
2262 // -1 to compensate "++" in the loop
2263 pszTxt
= pMatch
+ uiLenMask
- 1;
2264 pszMask
+= uiLenMask
- 1;
2269 if ( *pszMask
!= *pszTxt
)
2275 // match only if nothing left
2276 if ( *pszTxt
== wxT('\0') )
2279 // if we failed to match, backtrack if we can
2280 if ( pszLastStarInText
) {
2281 pszTxt
= pszLastStarInText
+ 1;
2282 pszMask
= pszLastStarInMask
;
2284 pszLastStarInText
= NULL
;
2286 // don't bother resetting pszLastStarInMask, it's unnecessary
2292 #endif // wxUSE_REGEX/!wxUSE_REGEX
2295 // Count the number of chars
2296 int wxString::Freq(wxUniChar ch
) const
2299 for ( const_iterator i
= begin(); i
!= end(); ++i
)