1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
39 #include "wx/hashmap.h"
40 #include "wx/vector.h"
41 #include "wx/xlocale.h"
44 #include "wx/msw/wrapwin.h"
47 // string handling functions used by wxString:
48 #if wxUSE_UNICODE_UTF8
49 #define wxStringMemcpy memcpy
50 #define wxStringMemcmp memcmp
51 #define wxStringMemchr memchr
52 #define wxStringStrlen strlen
54 #define wxStringMemcpy wxTmemcpy
55 #define wxStringMemcmp wxTmemcmp
56 #define wxStringMemchr wxTmemchr
57 #define wxStringStrlen wxStrlen
60 // ----------------------------------------------------------------------------
62 // ----------------------------------------------------------------------------
67 static UntypedBufferData
s_untypedNullData(NULL
, 0);
69 UntypedBufferData
* const untypedNullDataPtr
= &s_untypedNullData
;
71 } // namespace wxPrivate
73 // ---------------------------------------------------------------------------
74 // static class variables definition
75 // ---------------------------------------------------------------------------
77 //According to STL _must_ be a -1 size_t
78 const size_t wxString::npos
= (size_t) -1;
80 #if wxUSE_STRING_POS_CACHE
82 #ifdef wxHAS_COMPILER_TLS
84 wxTLS_TYPE(wxString::Cache
) wxString::ms_cache
;
86 #else // !wxHAS_COMPILER_TLS
88 struct wxStrCacheInitializer
90 wxStrCacheInitializer()
92 // calling this function triggers s_cache initialization in it, and
93 // from now on it becomes safe to call from multiple threads
99 wxString::Cache& wxString::GetCache()
101 static wxTLS_TYPE(Cache) s_cache;
103 return wxTLS_VALUE(s_cache);
107 static wxStrCacheInitializer gs_stringCacheInit
;
109 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
111 // gdb seems to be unable to display thread-local variables correctly, at least
112 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
113 #if wxDEBUG_LEVEL >= 2
115 struct wxStrCacheDumper
117 static void ShowAll()
119 puts("*** wxString cache dump:");
120 for ( unsigned n
= 0; n
< wxString::Cache::SIZE
; n
++ )
122 const wxString::Cache::Element
&
123 c
= wxString::GetCacheBegin()[n
];
125 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
127 n
== wxString::LastUsedCacheElement() ? " [*]" : "",
129 (unsigned long)c
.pos
,
130 (unsigned long)c
.impl
,
136 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
138 #endif // wxDEBUG_LEVEL >= 2
140 #ifdef wxPROFILE_STRING_CACHE
142 wxString::CacheStats
wxString::ms_cacheStats
;
144 struct wxStrCacheStatsDumper
146 ~wxStrCacheStatsDumper()
148 const wxString::CacheStats
& stats
= wxString::ms_cacheStats
;
152 puts("*** wxString cache statistics:");
153 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
155 printf("\tHits %u (of which %u not used) or %.2f%%\n",
158 100.*float(stats
.poshits
- stats
.mishits
)/stats
.postot
);
159 printf("\tAverage position requested: %.2f\n",
160 float(stats
.sumpos
) / stats
.postot
);
161 printf("\tAverage offset after cached hint: %.2f\n",
162 float(stats
.sumofs
) / stats
.postot
);
167 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
168 stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
);
173 static wxStrCacheStatsDumper s_showCacheStats
;
175 #endif // wxPROFILE_STRING_CACHE
177 #endif // wxUSE_STRING_POS_CACHE
179 // ----------------------------------------------------------------------------
181 // ----------------------------------------------------------------------------
183 #if wxUSE_STD_IOSTREAM
187 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
189 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
190 const wxScopedCharBuffer
buf(str
.AsCharBuf());
192 os
.clear(wxSTD
ios_base::failbit
);
198 return os
<< str
.AsInternal();
202 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
204 return os
<< str
.c_str();
207 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedCharBuffer
& str
)
209 return os
<< str
.data();
213 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedWCharBuffer
& str
)
215 return os
<< str
.data();
219 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
221 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
223 return wos
<< str
.wc_str();
226 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
228 return wos
<< str
.AsWChar();
231 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxScopedWCharBuffer
& str
)
233 return wos
<< str
.data();
236 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
238 #endif // wxUSE_STD_IOSTREAM
240 // ===========================================================================
241 // wxString class core
242 // ===========================================================================
244 #if wxUSE_UNICODE_UTF8
246 void wxString::PosLenToImpl(size_t pos
, size_t len
,
247 size_t *implPos
, size_t *implLen
) const
253 else // have valid start position
255 const const_iterator b
= GetIterForNthChar(pos
);
256 *implPos
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin();
261 else // have valid length too
263 // we need to handle the case of length specifying a substring
264 // going beyond the end of the string, just as std::string does
265 const const_iterator
e(end());
267 while ( len
&& i
<= e
)
273 *implLen
= i
.impl() - b
.impl();
278 #endif // wxUSE_UNICODE_UTF8
280 // ----------------------------------------------------------------------------
281 // wxCStrData converted strings caching
282 // ----------------------------------------------------------------------------
284 // FIXME-UTF8: temporarily disabled because it doesn't work with global
285 // string objects; re-enable after fixing this bug and benchmarking
286 // performance to see if using a hash is a good idea at all
289 // For backward compatibility reasons, it must be possible to assign the value
290 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
291 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
292 // because the memory would be freed immediately, but it has to be valid as long
293 // as the string is not modified, so that code like this still works:
295 // const wxChar *s = str.c_str();
296 // while ( s ) { ... }
298 // FIXME-UTF8: not thread safe!
299 // FIXME-UTF8: we currently clear the cached conversion only when the string is
300 // destroyed, but we should do it when the string is modified, to
301 // keep memory usage down
302 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
303 // invalidated the cache on every change, we could keep the previous
305 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
306 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
309 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
311 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
312 if ( i
!= hash
.end() )
320 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
321 // so we have to use wxString* here and const-cast when used
322 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
323 wxStringCharConversionCache
);
324 static wxStringCharConversionCache gs_stringsCharCache
;
326 const char* wxCStrData::AsChar() const
328 // remove previously cache value, if any (see FIXMEs above):
329 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
331 // convert the string and keep it:
332 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
333 m_str
->mb_str().release();
337 #endif // wxUSE_UNICODE
339 #if !wxUSE_UNICODE_WCHAR
340 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
341 wxStringWCharConversionCache
);
342 static wxStringWCharConversionCache gs_stringsWCharCache
;
344 const wchar_t* wxCStrData::AsWChar() const
346 // remove previously cache value, if any (see FIXMEs above):
347 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
349 // convert the string and keep it:
350 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
351 m_str
->wc_str().release();
355 #endif // !wxUSE_UNICODE_WCHAR
357 wxString::~wxString()
360 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
361 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
363 #if !wxUSE_UNICODE_WCHAR
364 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
369 // ===========================================================================
370 // wxString class core
371 // ===========================================================================
373 // ---------------------------------------------------------------------------
374 // construction and conversion
375 // ---------------------------------------------------------------------------
377 #if wxUSE_UNICODE_WCHAR
379 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
380 const wxMBConv
& conv
)
383 if ( !psz
|| nLength
== 0 )
384 return SubstrBufFromMB(wxWCharBuffer(L
""), 0);
386 if ( nLength
== npos
)
390 wxScopedWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
392 return SubstrBufFromMB(wxWCharBuffer(L
""), 0);
394 return SubstrBufFromMB(wcBuf
, wcLen
);
396 #endif // wxUSE_UNICODE_WCHAR
398 #if wxUSE_UNICODE_UTF8
400 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
401 const wxMBConv
& conv
)
404 if ( !psz
|| nLength
== 0 )
405 return SubstrBufFromMB(wxCharBuffer(""), 0);
407 // if psz is already in UTF-8, we don't have to do the roundtrip to
408 // wchar_t* and back:
411 // we need to validate the input because UTF8 iterators assume valid
412 // UTF-8 sequence and psz may be invalid:
413 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
415 // we must pass the real string length to SubstrBufFromMB ctor
416 if ( nLength
== npos
)
417 nLength
= psz
? strlen(psz
) : 0;
418 return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz
, nLength
),
421 // else: do the roundtrip through wchar_t*
424 if ( nLength
== npos
)
427 // first convert to wide string:
429 wxScopedWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
431 return SubstrBufFromMB(wxCharBuffer(""), 0);
433 // and then to UTF-8:
434 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
435 // widechar -> UTF-8 conversion isn't supposed to ever fail:
436 wxASSERT_MSG( buf
.data
, wxT("conversion to UTF-8 failed") );
440 #endif // wxUSE_UNICODE_UTF8
442 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
444 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
445 const wxMBConv
& conv
)
448 if ( !pwz
|| nLength
== 0 )
449 return SubstrBufFromWC(wxCharBuffer(""), 0);
451 if ( nLength
== npos
)
455 wxScopedCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
457 return SubstrBufFromWC(wxCharBuffer(""), 0);
459 return SubstrBufFromWC(mbBuf
, mbLen
);
461 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
463 // This std::string::c_str()-like method returns a wide char pointer to string
464 // contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return
465 // a pointer to the internal representation. Otherwise a conversion is required
466 // and it returns a temporary buffer.
468 // However for compatibility with c_str() and to avoid breaking existing code
471 // for ( const wchar_t *p = s.wc_str(); *p; p++ )
474 // we actually need to ensure that the returned buffer is _not_ temporary and
475 // so we use wxString::m_convertedToWChar to store the returned data
476 #if !wxUSE_UNICODE_WCHAR
478 const wchar_t *wxString::AsWChar(const wxMBConv
& conv
) const
480 const char * const strMB
= m_impl
.c_str();
481 const size_t lenMB
= m_impl
.length();
483 // find out the size of the buffer needed
484 const size_t lenWC
= conv
.ToWChar(NULL
, 0, strMB
, lenMB
);
485 if ( lenWC
== wxCONV_FAILED
)
488 // keep the same buffer if the string size didn't change: this is not only
489 // an optimization but also ensure that code which modifies string
490 // character by character (without changing its length) can continue to use
491 // the pointer returned by a previous wc_str() call even after changing the
494 // TODO-UTF8: we could check for ">" instead of "!=" here as this would
495 // allow to save on buffer reallocations but at the cost of
496 // consuming (even) more memory, we should benchmark this to
497 // determine if it's worth doing
498 if ( !m_convertedToWChar
.m_str
|| lenWC
!= m_convertedToWChar
.m_len
)
500 if ( !const_cast<wxString
*>(this)->m_convertedToWChar
.Extend(lenWC
) )
504 // finally do convert
505 m_convertedToWChar
.m_str
[lenWC
] = L
'\0';
506 if ( conv
.ToWChar(m_convertedToWChar
.m_str
, lenWC
,
507 strMB
, lenMB
) == wxCONV_FAILED
)
510 return m_convertedToWChar
.m_str
;
513 #endif // !wxUSE_UNICODE_WCHAR
516 // Same thing for mb_str() which returns a normal char pointer to string
517 // contents: this always requires converting it to the specified encoding in
518 // non-ANSI build except if we need to convert to UTF-8 and this is what we
519 // already use internally.
522 const char *wxString::AsChar(const wxMBConv
& conv
) const
524 #if wxUSE_UNICODE_UTF8
526 return m_impl
.c_str();
528 const wchar_t * const strWC
= AsWChar(wxMBConvStrictUTF8());
529 const size_t lenWC
= m_convertedToWChar
.m_len
;
530 #else // wxUSE_UNICODE_WCHAR
531 const wchar_t * const strWC
= m_impl
.c_str();
532 const size_t lenWC
= m_impl
.length();
533 #endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR
535 const size_t lenMB
= conv
.FromWChar(NULL
, 0, strWC
, lenWC
);
536 if ( lenMB
== wxCONV_FAILED
)
539 if ( !m_convertedToChar
.m_str
|| lenMB
!= m_convertedToChar
.m_len
)
541 if ( !const_cast<wxString
*>(this)->m_convertedToChar
.Extend(lenMB
) )
545 m_convertedToChar
.m_str
[lenMB
] = '\0';
546 if ( conv
.FromWChar(m_convertedToChar
.m_str
, lenMB
,
547 strWC
, lenWC
) == wxCONV_FAILED
)
550 return m_convertedToChar
.m_str
;
553 #endif // wxUSE_UNICODE
555 // shrink to minimal size (releasing extra memory)
556 bool wxString::Shrink()
558 wxString
tmp(begin(), end());
560 return tmp
.length() == length();
563 // deprecated compatibility code:
564 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
565 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
567 return DoGetWriteBuf(nLen
);
570 void wxString::UngetWriteBuf()
575 void wxString::UngetWriteBuf(size_t nLen
)
577 DoUngetWriteBuf(nLen
);
579 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
582 // ---------------------------------------------------------------------------
584 // ---------------------------------------------------------------------------
586 // all functions are inline in string.h
588 // ---------------------------------------------------------------------------
589 // concatenation operators
590 // ---------------------------------------------------------------------------
593 * concatenation functions come in 5 flavours:
595 * char + string and string + char
596 * C str + string and string + C str
599 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
601 #if !wxUSE_STL_BASED_WXSTRING
602 wxASSERT( str1
.IsValid() );
603 wxASSERT( str2
.IsValid() );
612 wxString
operator+(const wxString
& str
, wxUniChar ch
)
614 #if !wxUSE_STL_BASED_WXSTRING
615 wxASSERT( str
.IsValid() );
624 wxString
operator+(wxUniChar ch
, const wxString
& str
)
626 #if !wxUSE_STL_BASED_WXSTRING
627 wxASSERT( str
.IsValid() );
636 wxString
operator+(const wxString
& str
, const char *psz
)
638 #if !wxUSE_STL_BASED_WXSTRING
639 wxASSERT( str
.IsValid() );
643 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
644 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
652 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
654 #if !wxUSE_STL_BASED_WXSTRING
655 wxASSERT( str
.IsValid() );
659 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
660 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
668 wxString
operator+(const char *psz
, const wxString
& str
)
670 #if !wxUSE_STL_BASED_WXSTRING
671 wxASSERT( str
.IsValid() );
675 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
676 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
684 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
686 #if !wxUSE_STL_BASED_WXSTRING
687 wxASSERT( str
.IsValid() );
691 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
692 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
700 // ---------------------------------------------------------------------------
702 // ---------------------------------------------------------------------------
704 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
706 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
707 : wxToupper(GetChar(0u)) == wxToupper(c
));
710 #ifdef HAVE_STD_STRING_COMPARE
712 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
713 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
714 // sort strings in characters code point order by sorting the byte sequence
715 // in byte values order (i.e. what strcmp() and memcmp() do).
717 int wxString::compare(const wxString
& str
) const
719 return m_impl
.compare(str
.m_impl
);
722 int wxString::compare(size_t nStart
, size_t nLen
,
723 const wxString
& str
) const
726 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
727 return m_impl
.compare(pos
, len
, str
.m_impl
);
730 int wxString::compare(size_t nStart
, size_t nLen
,
732 size_t nStart2
, size_t nLen2
) const
735 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
738 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
740 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
743 int wxString::compare(const char* sz
) const
745 return m_impl
.compare(ImplStr(sz
));
748 int wxString::compare(const wchar_t* sz
) const
750 return m_impl
.compare(ImplStr(sz
));
753 int wxString::compare(size_t nStart
, size_t nLen
,
754 const char* sz
, size_t nCount
) const
757 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
759 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
761 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
764 int wxString::compare(size_t nStart
, size_t nLen
,
765 const wchar_t* sz
, size_t nCount
) const
768 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
770 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
772 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
775 #else // !HAVE_STD_STRING_COMPARE
777 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
778 const wxStringCharType
* s2
, size_t l2
)
781 return wxStringMemcmp(s1
, s2
, l1
);
784 int ret
= wxStringMemcmp(s1
, s2
, l1
);
785 return ret
== 0 ? -1 : ret
;
789 int ret
= wxStringMemcmp(s1
, s2
, l2
);
790 return ret
== 0 ? +1 : ret
;
794 int wxString::compare(const wxString
& str
) const
796 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
797 str
.m_impl
.data(), str
.m_impl
.length());
800 int wxString::compare(size_t nStart
, size_t nLen
,
801 const wxString
& str
) const
803 wxASSERT(nStart
<= length());
804 size_type strLen
= length() - nStart
;
805 nLen
= strLen
< nLen
? strLen
: nLen
;
808 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
810 return ::wxDoCmp(m_impl
.data() + pos
, len
,
811 str
.m_impl
.data(), str
.m_impl
.length());
814 int wxString::compare(size_t nStart
, size_t nLen
,
816 size_t nStart2
, size_t nLen2
) const
818 wxASSERT(nStart
<= length());
819 wxASSERT(nStart2
<= str
.length());
820 size_type strLen
= length() - nStart
,
821 strLen2
= str
.length() - nStart2
;
822 nLen
= strLen
< nLen
? strLen
: nLen
;
823 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
826 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
828 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
830 return ::wxDoCmp(m_impl
.data() + pos
, len
,
831 str
.m_impl
.data() + pos2
, len2
);
834 int wxString::compare(const char* sz
) const
836 SubstrBufFromMB
str(ImplStr(sz
, npos
));
837 if ( str
.len
== npos
)
838 str
.len
= wxStringStrlen(str
.data
);
839 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
842 int wxString::compare(const wchar_t* sz
) const
844 SubstrBufFromWC
str(ImplStr(sz
, npos
));
845 if ( str
.len
== npos
)
846 str
.len
= wxStringStrlen(str
.data
);
847 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
850 int wxString::compare(size_t nStart
, size_t nLen
,
851 const char* sz
, size_t nCount
) const
853 wxASSERT(nStart
<= length());
854 size_type strLen
= length() - nStart
;
855 nLen
= strLen
< nLen
? strLen
: nLen
;
858 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
860 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
861 if ( str
.len
== npos
)
862 str
.len
= wxStringStrlen(str
.data
);
864 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
867 int wxString::compare(size_t nStart
, size_t nLen
,
868 const wchar_t* sz
, size_t nCount
) const
870 wxASSERT(nStart
<= length());
871 size_type strLen
= length() - nStart
;
872 nLen
= strLen
< nLen
? strLen
: nLen
;
875 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
877 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
878 if ( str
.len
== npos
)
879 str
.len
= wxStringStrlen(str
.data
);
881 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
884 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
887 // ---------------------------------------------------------------------------
888 // find_{first,last}_[not]_of functions
889 // ---------------------------------------------------------------------------
891 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
893 // NB: All these functions are implemented with the argument being wxChar*,
894 // i.e. widechar string in any Unicode build, even though native string
895 // representation is char* in the UTF-8 build. This is because we couldn't
896 // use memchr() to determine if a character is in a set encoded as UTF-8.
898 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
900 return find_first_of(sz
, nStart
, wxStrlen(sz
));
903 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
905 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
908 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
910 wxASSERT_MSG( nStart
<= length(), wxT("invalid index") );
913 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
915 if ( wxTmemchr(sz
, *i
, n
) )
922 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
924 wxASSERT_MSG( nStart
<= length(), wxT("invalid index") );
927 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
929 if ( !wxTmemchr(sz
, *i
, n
) )
937 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
939 return find_last_of(sz
, nStart
, wxStrlen(sz
));
942 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
944 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
947 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
949 size_t len
= length();
951 if ( nStart
== npos
)
957 wxASSERT_MSG( nStart
<= len
, wxT("invalid index") );
961 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
962 i
!= rend(); --idx
, ++i
)
964 if ( wxTmemchr(sz
, *i
, n
) )
971 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
973 size_t len
= length();
975 if ( nStart
== npos
)
981 wxASSERT_MSG( nStart
<= len
, wxT("invalid index") );
985 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
986 i
!= rend(); --idx
, ++i
)
988 if ( !wxTmemchr(sz
, *i
, n
) )
995 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
997 wxASSERT_MSG( nStart
<= length(), wxT("invalid index") );
1000 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1009 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1011 size_t len
= length();
1013 if ( nStart
== npos
)
1019 wxASSERT_MSG( nStart
<= len
, wxT("invalid index") );
1022 size_t idx
= nStart
;
1023 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1024 i
!= rend(); --idx
, ++i
)
1033 // the functions above were implemented for wchar_t* arguments in Unicode
1034 // build and char* in ANSI build; below are implementations for the other
1037 #define wxOtherCharType char
1038 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1040 #define wxOtherCharType wchar_t
1041 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1044 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1045 { return find_first_of(STRCONV(sz
), nStart
); }
1047 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1049 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1050 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1051 { return find_last_of(STRCONV(sz
), nStart
); }
1052 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1054 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1055 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1056 { return find_first_not_of(STRCONV(sz
), nStart
); }
1057 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1059 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1060 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1061 { return find_last_not_of(STRCONV(sz
), nStart
); }
1062 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1064 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1066 #undef wxOtherCharType
1069 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1071 // ===========================================================================
1072 // other common string functions
1073 // ===========================================================================
1075 int wxString::CmpNoCase(const wxString
& s
) const
1077 #if defined(__WXMSW__) && !wxUSE_UNICODE_UTF8
1078 // prefer to use CompareString() if available as it's more efficient than
1079 // doing it manual or even using wxStricmp() (see #10375)
1080 switch ( ::CompareString(LOCALE_USER_DEFAULT
, NORM_IGNORECASE
,
1081 m_impl
.c_str(), m_impl
.length(),
1082 s
.m_impl
.c_str(), s
.m_impl
.length()) )
1084 case CSTR_LESS_THAN
:
1090 case CSTR_GREATER_THAN
:
1094 wxFAIL_MSG( "unexpected CompareString() return value" );
1098 wxLogLastError("CompareString");
1099 // use generic code below
1101 #endif // __WXMSW__ && !wxUSE_UNICODE_UTF8
1103 // do the comparison manually: notice that we can't use wxStricmp() as it
1104 // doesn't handle embedded NULs
1106 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1107 const_iterator i1
= begin();
1108 const_iterator end1
= end();
1109 const_iterator i2
= s
.begin();
1110 const_iterator end2
= s
.end();
1112 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1114 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1115 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1116 if ( lower1
!= lower2
)
1117 return lower1
< lower2
? -1 : 1;
1120 size_t len1
= length();
1121 size_t len2
= s
.length();
1125 else if ( len1
> len2
)
1134 #ifndef __SCHAR_MAX__
1135 #define __SCHAR_MAX__ 127
1139 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1141 if (!ascii
|| len
== 0)
1142 return wxEmptyString
;
1147 wxStringInternalBuffer
buf(res
, len
);
1148 wxStringCharType
*dest
= buf
;
1150 for ( ; len
> 0; --len
)
1152 unsigned char c
= (unsigned char)*ascii
++;
1153 wxASSERT_MSG( c
< 0x80,
1154 wxT("Non-ASCII value passed to FromAscii().") );
1156 *dest
++ = (wchar_t)c
;
1163 wxString
wxString::FromAscii(const char *ascii
)
1165 return FromAscii(ascii
, wxStrlen(ascii
));
1168 wxString
wxString::FromAscii(char ascii
)
1170 // What do we do with '\0' ?
1172 unsigned char c
= (unsigned char)ascii
;
1174 wxASSERT_MSG( c
< 0x80, wxT("Non-ASCII value passed to FromAscii().") );
1176 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1177 return wxString(wxUniChar((wchar_t)c
));
1180 const wxScopedCharBuffer
wxString::ToAscii() const
1182 // this will allocate enough space for the terminating NUL too
1183 wxCharBuffer
buffer(length());
1184 char *dest
= buffer
.data();
1186 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1189 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1190 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1192 // the output string can't have embedded NULs anyhow, so we can safely
1193 // stop at first of them even if we do have any
1201 #endif // wxUSE_UNICODE
1203 // extract string of length nCount starting at nFirst
1204 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1206 size_t nLen
= length();
1208 // default value of nCount is npos and means "till the end"
1209 if ( nCount
== npos
)
1211 nCount
= nLen
- nFirst
;
1214 // out-of-bounds requests return sensible things
1215 if ( nFirst
+ nCount
> nLen
)
1217 nCount
= nLen
- nFirst
;
1220 if ( nFirst
> nLen
)
1222 // AllocCopy() will return empty string
1223 return wxEmptyString
;
1226 wxString
dest(*this, nFirst
, nCount
);
1227 if ( dest
.length() != nCount
)
1229 wxFAIL_MSG( wxT("out of memory in wxString::Mid") );
1235 // check that the string starts with prefix and return the rest of the string
1236 // in the provided pointer if it is not NULL, otherwise return false
1237 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1239 if ( compare(0, prefix
.length(), prefix
) != 0 )
1244 // put the rest of the string into provided pointer
1245 rest
->assign(*this, prefix
.length(), npos
);
1252 // check that the string ends with suffix and return the rest of it in the
1253 // provided pointer if it is not NULL, otherwise return false
1254 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1256 int start
= length() - suffix
.length();
1258 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1263 // put the rest of the string into provided pointer
1264 rest
->assign(*this, 0, start
);
1271 // extract nCount last (rightmost) characters
1272 wxString
wxString::Right(size_t nCount
) const
1274 if ( nCount
> length() )
1277 wxString
dest(*this, length() - nCount
, nCount
);
1278 if ( dest
.length() != nCount
) {
1279 wxFAIL_MSG( wxT("out of memory in wxString::Right") );
1284 // get all characters after the last occurrence of ch
1285 // (returns the whole string if ch not found)
1286 wxString
wxString::AfterLast(wxUniChar ch
) const
1289 int iPos
= Find(ch
, true);
1290 if ( iPos
== wxNOT_FOUND
)
1293 str
.assign(*this, iPos
+ 1, npos
);
1298 // extract nCount first (leftmost) characters
1299 wxString
wxString::Left(size_t nCount
) const
1301 if ( nCount
> length() )
1304 wxString
dest(*this, 0, nCount
);
1305 if ( dest
.length() != nCount
) {
1306 wxFAIL_MSG( wxT("out of memory in wxString::Left") );
1311 // get all characters before the first occurrence of ch
1312 // (returns the whole string if ch not found)
1313 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1315 int iPos
= Find(ch
);
1316 if ( iPos
== wxNOT_FOUND
)
1318 return wxString(*this, 0, iPos
);
1321 /// get all characters before the last occurrence of ch
1322 /// (returns empty string if ch not found)
1323 wxString
wxString::BeforeLast(wxUniChar ch
) const
1326 int iPos
= Find(ch
, true);
1327 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1328 str
= wxString(c_str(), iPos
);
1333 /// get all characters after the first occurrence of ch
1334 /// (returns empty string if ch not found)
1335 wxString
wxString::AfterFirst(wxUniChar ch
) const
1338 int iPos
= Find(ch
);
1339 if ( iPos
!= wxNOT_FOUND
)
1340 str
.assign(*this, iPos
+ 1, npos
);
1345 // replace first (or all) occurrences of some substring with another one
1346 size_t wxString::Replace(const wxString
& strOld
,
1347 const wxString
& strNew
, bool bReplaceAll
)
1349 // if we tried to replace an empty string we'd enter an infinite loop below
1350 wxCHECK_MSG( !strOld
.empty(), 0,
1351 wxT("wxString::Replace(): invalid parameter") );
1353 wxSTRING_INVALIDATE_CACHE();
1355 size_t uiCount
= 0; // count of replacements made
1357 // optimize the special common case: replacement of one character by
1358 // another one (in UTF-8 case we can only do this for ASCII characters)
1360 // benchmarks show that this special version is around 3 times faster
1361 // (depending on the proportion of matching characters and UTF-8/wchar_t
1363 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1365 const wxStringCharType chOld
= strOld
.m_impl
[0],
1366 chNew
= strNew
.m_impl
[0];
1368 // this loop is the simplified version of the one below
1369 for ( size_t pos
= 0; ; )
1371 pos
= m_impl
.find(chOld
, pos
);
1375 m_impl
[pos
++] = chNew
;
1383 else if ( !bReplaceAll
)
1385 size_t pos
= m_impl
.find(strOld
, 0);
1388 m_impl
.replace(pos
, strOld
.m_impl
.length(), strNew
.m_impl
);
1392 else // replace all occurrences
1394 const size_t uiOldLen
= strOld
.m_impl
.length();
1395 const size_t uiNewLen
= strNew
.m_impl
.length();
1397 // first scan the string to find all positions at which the replacement
1399 wxVector
<size_t> replacePositions
;
1402 for ( pos
= m_impl
.find(strOld
.m_impl
, 0);
1404 pos
= m_impl
.find(strOld
.m_impl
, pos
+ uiOldLen
))
1406 replacePositions
.push_back(pos
);
1413 // allocate enough memory for the whole new string
1415 tmp
.m_impl
.reserve(m_impl
.length() + uiCount
*(uiNewLen
- uiOldLen
));
1417 // copy this string to tmp doing replacements on the fly
1419 for ( pos
= 0; replNum
< uiCount
; replNum
++ )
1421 const size_t nextReplPos
= replacePositions
[replNum
];
1423 if ( pos
!= nextReplPos
)
1425 tmp
.m_impl
.append(m_impl
, pos
, nextReplPos
- pos
);
1428 tmp
.m_impl
.append(strNew
.m_impl
);
1429 pos
= nextReplPos
+ uiOldLen
;
1432 if ( pos
!= m_impl
.length() )
1434 // append the rest of the string unchanged
1435 tmp
.m_impl
.append(m_impl
, pos
, m_impl
.length() - pos
);
1444 bool wxString::IsAscii() const
1446 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1448 if ( !(*i
).IsAscii() )
1455 bool wxString::IsWord() const
1457 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1459 if ( !wxIsalpha(*i
) )
1466 bool wxString::IsNumber() const
1471 const_iterator i
= begin();
1473 if ( *i
== wxT('-') || *i
== wxT('+') )
1476 for ( ; i
!= end(); ++i
)
1478 if ( !wxIsdigit(*i
) )
1485 wxString
wxString::Strip(stripType w
) const
1488 if ( w
& leading
) s
.Trim(false);
1489 if ( w
& trailing
) s
.Trim(true);
1493 // ---------------------------------------------------------------------------
1495 // ---------------------------------------------------------------------------
1497 wxString
& wxString::MakeUpper()
1499 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1500 *it
= (wxChar
)wxToupper(*it
);
1505 wxString
& wxString::MakeLower()
1507 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1508 *it
= (wxChar
)wxTolower(*it
);
1513 wxString
& wxString::MakeCapitalized()
1515 const iterator en
= end();
1516 iterator it
= begin();
1519 *it
= (wxChar
)wxToupper(*it
);
1520 for ( ++it
; it
!= en
; ++it
)
1521 *it
= (wxChar
)wxTolower(*it
);
1527 // ---------------------------------------------------------------------------
1528 // trimming and padding
1529 // ---------------------------------------------------------------------------
1531 // some compilers (VC++ 6.0 not to name them) return true for a call to
1532 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1533 // to live with this by checking that the character is a 7 bit one - even if
1534 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1535 // space-like symbols somewhere except in the first 128 chars), it is arguably
1536 // still better than trimming away accented letters
1537 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1539 // trims spaces (in the sense of isspace) from left or right side
1540 wxString
& wxString::Trim(bool bFromRight
)
1542 // first check if we're going to modify the string at all
1545 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1546 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1552 // find last non-space character
1553 reverse_iterator psz
= rbegin();
1554 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1557 // truncate at trailing space start
1558 erase(psz
.base(), end());
1562 // find first non-space character
1563 iterator psz
= begin();
1564 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1567 // fix up data and length
1568 erase(begin(), psz
);
1575 // adds nCount characters chPad to the string from either side
1576 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1578 wxString
s(chPad
, nCount
);
1591 // truncate the string
1592 wxString
& wxString::Truncate(size_t uiLen
)
1594 if ( uiLen
< length() )
1596 erase(begin() + uiLen
, end());
1598 //else: nothing to do, string is already short enough
1603 // ---------------------------------------------------------------------------
1604 // finding (return wxNOT_FOUND if not found and index otherwise)
1605 // ---------------------------------------------------------------------------
1608 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1610 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1612 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1615 // ----------------------------------------------------------------------------
1616 // conversion to numbers
1617 // ----------------------------------------------------------------------------
1619 // The implementation of all the functions below is exactly the same so factor
1620 // it out. Note that number extraction works correctly on UTF-8 strings, so
1621 // we can use wxStringCharType and wx_str() for maximum efficiency.
1624 #define DO_IF_NOT_WINCE(x) x
1626 #define DO_IF_NOT_WINCE(x)
1629 #define WX_STRING_TO_X_TYPE_START \
1630 wxCHECK_MSG( pVal, false, wxT("NULL output pointer") ); \
1631 DO_IF_NOT_WINCE( errno = 0; ) \
1632 const wxStringCharType *start = wx_str(); \
1633 wxStringCharType *end;
1635 #define WX_STRING_TO_X_TYPE_END \
1636 /* return true only if scan was stopped by the terminating NUL and */ \
1637 /* if the string was not empty to start with and no under/overflow */ \
1639 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1644 bool wxString::ToLong(long *pVal
, int base
) const
1646 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1648 WX_STRING_TO_X_TYPE_START
1649 long val
= wxStrtol(start
, &end
, base
);
1650 WX_STRING_TO_X_TYPE_END
1653 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1655 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1657 WX_STRING_TO_X_TYPE_START
1658 unsigned long val
= wxStrtoul(start
, &end
, base
);
1659 WX_STRING_TO_X_TYPE_END
1662 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1664 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1666 WX_STRING_TO_X_TYPE_START
1667 wxLongLong_t val
= wxStrtoll(start
, &end
, base
);
1668 WX_STRING_TO_X_TYPE_END
1671 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1673 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1675 WX_STRING_TO_X_TYPE_START
1676 wxULongLong_t val
= wxStrtoull(start
, &end
, base
);
1677 WX_STRING_TO_X_TYPE_END
1680 bool wxString::ToDouble(double *pVal
) const
1682 WX_STRING_TO_X_TYPE_START
1683 double val
= wxStrtod(start
, &end
);
1684 WX_STRING_TO_X_TYPE_END
1689 bool wxString::ToCLong(long *pVal
, int base
) const
1691 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1693 WX_STRING_TO_X_TYPE_START
1694 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1695 long val
= wxStrtol_lA(start
, &end
, base
, wxCLocale
);
1697 long val
= wxStrtol_l(start
, &end
, base
, wxCLocale
);
1699 WX_STRING_TO_X_TYPE_END
1702 bool wxString::ToCULong(unsigned long *pVal
, int base
) const
1704 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), wxT("invalid base") );
1706 WX_STRING_TO_X_TYPE_START
1707 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1708 unsigned long val
= wxStrtoul_lA(start
, &end
, base
, wxCLocale
);
1710 unsigned long val
= wxStrtoul_l(start
, &end
, base
, wxCLocale
);
1712 WX_STRING_TO_X_TYPE_END
1715 bool wxString::ToCDouble(double *pVal
) const
1717 WX_STRING_TO_X_TYPE_START
1718 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1719 double val
= wxStrtod_lA(start
, &end
, wxCLocale
);
1721 double val
= wxStrtod_l(start
, &end
, wxCLocale
);
1723 WX_STRING_TO_X_TYPE_END
1726 #endif // wxUSE_XLOCALE
1728 // ---------------------------------------------------------------------------
1730 // ---------------------------------------------------------------------------
1732 #if !wxUSE_UTF8_LOCALE_ONLY
1734 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1735 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1737 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1741 va_start(argptr
, format
);
1744 s
.PrintfV(format
, argptr
);
1750 #endif // !wxUSE_UTF8_LOCALE_ONLY
1752 #if wxUSE_UNICODE_UTF8
1754 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1757 va_start(argptr
, format
);
1760 s
.PrintfV(format
, argptr
);
1766 #endif // wxUSE_UNICODE_UTF8
1769 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1772 s
.PrintfV(format
, argptr
);
1776 #if !wxUSE_UTF8_LOCALE_ONLY
1777 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1778 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1780 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1784 va_start(argptr
, format
);
1786 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1787 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1788 // because it's the only cast that works safely for downcasting when
1789 // multiple inheritance is used:
1790 wxString
*str
= static_cast<wxString
*>(this);
1792 wxString
*str
= this;
1795 int iLen
= str
->PrintfV(format
, argptr
);
1801 #endif // !wxUSE_UTF8_LOCALE_ONLY
1803 #if wxUSE_UNICODE_UTF8
1804 int wxString::DoPrintfUtf8(const char *format
, ...)
1807 va_start(argptr
, format
);
1809 int iLen
= PrintfV(format
, argptr
);
1815 #endif // wxUSE_UNICODE_UTF8
1818 Uses wxVsnprintf and places the result into the this string.
1820 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1821 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1822 the ISO C99 (and thus SUSv3) standard the return value for the case of
1823 an undersized buffer is inconsistent. For conforming vsnprintf
1824 implementations the function must return the number of characters that
1825 would have been printed had the buffer been large enough. For conforming
1826 vswprintf implementations the function must return a negative number
1829 What vswprintf sets errno to is undefined but Darwin seems to set it to
1830 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1831 those are defined in the standard and backed up by several conformance
1832 statements. Note that ENOMEM mentioned in the manual page does not
1833 apply to swprintf, only wprintf and fwprintf.
1835 Official manual page:
1836 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1838 Some conformance statements (AIX, Solaris):
1839 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1840 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1842 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1843 EILSEQ and EINVAL are specifically defined to mean the error is other than
1844 an undersized buffer and no other errno are defined we treat those two
1845 as meaning hard errors and everything else gets the old behavior which
1846 is to keep looping and increasing buffer size until the function succeeds.
1848 In practice it's impossible to determine before compilation which behavior
1849 may be used. The vswprintf function may have vsnprintf-like behavior or
1850 vice-versa. Behavior detected on one release can theoretically change
1851 with an updated release. Not to mention that configure testing for it
1852 would require the test to be run on the host system, not the build system
1853 which makes cross compilation difficult. Therefore, we make no assumptions
1854 about behavior and try our best to handle every known case, including the
1855 case where wxVsnprintf returns a negative number and fails to set errno.
1857 There is yet one more non-standard implementation and that is our own.
1858 Fortunately, that can be detected at compile-time.
1860 On top of all that, ISO C99 explicitly defines snprintf to write a null
1861 character to the last position of the specified buffer. That would be at
1862 at the given buffer size minus 1. It is supposed to do this even if it
1863 turns out that the buffer is sized too small.
1865 Darwin (tested on 10.5) follows the C99 behavior exactly.
1867 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1868 errno even when it fails. However, it only seems to ever fail due
1869 to an undersized buffer.
1871 #if wxUSE_UNICODE_UTF8
1872 template<typename BufferType
>
1874 // we only need one version in non-UTF8 builds and at least two Windows
1875 // compilers have problems with this function template, so use just one
1876 // normal function here
1878 static int DoStringPrintfV(wxString
& str
,
1879 const wxString
& format
, va_list argptr
)
1885 #if wxUSE_UNICODE_UTF8
1886 BufferType
tmp(str
, size
+ 1);
1887 typename
BufferType::CharType
*buf
= tmp
;
1889 wxStringBuffer
tmp(str
, size
+ 1);
1897 // in UTF-8 build, leaving uninitialized junk in the buffer
1898 // could result in invalid non-empty UTF-8 string, so just
1899 // reset the string to empty on failure:
1904 // wxVsnprintf() may modify the original arg pointer, so pass it
1907 wxVaCopy(argptrcopy
, argptr
);
1910 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1913 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1916 // some implementations of vsnprintf() don't NUL terminate
1917 // the string if there is not enough space for it so
1918 // always do it manually
1919 // FIXME: This really seems to be the wrong and would be an off-by-one
1920 // bug except the code above allocates an extra character.
1921 buf
[size
] = wxT('\0');
1923 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1924 // total number of characters which would have been written if the
1925 // buffer were large enough (newer standards such as Unix98)
1928 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1929 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1930 // is true if *both* of them use our own implementation,
1931 // otherwise we can't be sure
1932 #if wxUSE_WXVSNPRINTF
1933 // we know that our own implementation of wxVsnprintf() returns -1
1934 // only for a format error - thus there's something wrong with
1935 // the user's format string
1938 #else // possibly using system version
1939 // assume it only returns error if there is not enough space, but
1940 // as we don't know how much we need, double the current size of
1943 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1944 // If errno was set to one of the two well-known hard errors
1945 // then fail immediately to avoid an infinite loop.
1948 #endif // __WXWINCE__
1949 // still not enough, as we don't know how much we need, double the
1950 // current size of the buffer
1952 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1954 else if ( len
>= size
)
1956 #if wxUSE_WXVSNPRINTF
1957 // we know that our own implementation of wxVsnprintf() returns
1958 // size+1 when there's not enough space but that's not the size
1959 // of the required buffer!
1960 size
*= 2; // so we just double the current size of the buffer
1962 // some vsnprintf() implementations NUL-terminate the buffer and
1963 // some don't in len == size case, to be safe always add 1
1964 // FIXME: I don't quite understand this comment. The vsnprintf
1965 // function is specifically defined to return the number of
1966 // characters printed not including the null terminator.
1967 // So OF COURSE you need to add 1 to get the right buffer size.
1968 // The following line is definitely correct, no question.
1972 else // ok, there was enough space
1978 // we could have overshot
1981 return str
.length();
1984 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
1986 #if wxUSE_UNICODE_UTF8
1987 #if wxUSE_STL_BASED_WXSTRING
1988 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
1990 typedef wxStringInternalBuffer Utf8Buffer
;
1994 #if wxUSE_UTF8_LOCALE_ONLY
1995 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1997 #if wxUSE_UNICODE_UTF8
1998 if ( wxLocaleIsUtf8
)
1999 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2002 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
2004 return DoStringPrintfV(*this, format
, argptr
);
2005 #endif // UTF8/WCHAR
2009 // ----------------------------------------------------------------------------
2010 // misc other operations
2011 // ----------------------------------------------------------------------------
2013 // returns true if the string matches the pattern which may contain '*' and
2014 // '?' metacharacters (as usual, '?' matches any character and '*' any number
2016 bool wxString::Matches(const wxString
& mask
) const
2018 // I disable this code as it doesn't seem to be faster (in fact, it seems
2019 // to be much slower) than the old, hand-written code below and using it
2020 // here requires always linking with libregex even if the user code doesn't
2022 #if 0 // wxUSE_REGEX
2023 // first translate the shell-like mask into a regex
2025 pattern
.reserve(wxStrlen(pszMask
));
2027 pattern
+= wxT('^');
2033 pattern
+= wxT('.');
2037 pattern
+= wxT(".*");
2048 // these characters are special in a RE, quote them
2049 // (however note that we don't quote '[' and ']' to allow
2050 // using them for Unix shell like matching)
2051 pattern
+= wxT('\\');
2055 pattern
+= *pszMask
;
2060 pattern
+= wxT('$');
2063 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
2064 #else // !wxUSE_REGEX
2065 // TODO: this is, of course, awfully inefficient...
2067 // FIXME-UTF8: implement using iterators, remove #if
2068 #if wxUSE_UNICODE_UTF8
2069 const wxScopedWCharBuffer maskBuf
= mask
.wc_str();
2070 const wxScopedWCharBuffer txtBuf
= wc_str();
2071 const wxChar
*pszMask
= maskBuf
.data();
2072 const wxChar
*pszTxt
= txtBuf
.data();
2074 const wxChar
*pszMask
= mask
.wx_str();
2075 // the char currently being checked
2076 const wxChar
*pszTxt
= wx_str();
2079 // the last location where '*' matched
2080 const wxChar
*pszLastStarInText
= NULL
;
2081 const wxChar
*pszLastStarInMask
= NULL
;
2084 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
2085 switch ( *pszMask
) {
2087 if ( *pszTxt
== wxT('\0') )
2090 // pszTxt and pszMask will be incremented in the loop statement
2096 // remember where we started to be able to backtrack later
2097 pszLastStarInText
= pszTxt
;
2098 pszLastStarInMask
= pszMask
;
2100 // ignore special chars immediately following this one
2101 // (should this be an error?)
2102 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2105 // if there is nothing more, match
2106 if ( *pszMask
== wxT('\0') )
2109 // are there any other metacharacters in the mask?
2111 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2113 if ( pEndMask
!= NULL
) {
2114 // we have to match the string between two metachars
2115 uiLenMask
= pEndMask
- pszMask
;
2118 // we have to match the remainder of the string
2119 uiLenMask
= wxStrlen(pszMask
);
2122 wxString
strToMatch(pszMask
, uiLenMask
);
2123 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2124 if ( pMatch
== NULL
)
2127 // -1 to compensate "++" in the loop
2128 pszTxt
= pMatch
+ uiLenMask
- 1;
2129 pszMask
+= uiLenMask
- 1;
2134 if ( *pszMask
!= *pszTxt
)
2140 // match only if nothing left
2141 if ( *pszTxt
== wxT('\0') )
2144 // if we failed to match, backtrack if we can
2145 if ( pszLastStarInText
) {
2146 pszTxt
= pszLastStarInText
+ 1;
2147 pszMask
= pszLastStarInMask
;
2149 pszLastStarInText
= NULL
;
2151 // don't bother resetting pszLastStarInMask, it's unnecessary
2157 #endif // wxUSE_REGEX/!wxUSE_REGEX
2160 // Count the number of chars
2161 int wxString::Freq(wxUniChar ch
) const
2164 for ( const_iterator i
= begin(); i
!= end(); ++i
)