1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
38 #include "wx/hashmap.h"
39 #include "wx/vector.h"
40 #include "wx/xlocale.h"
42 // string handling functions used by wxString:
43 #if wxUSE_UNICODE_UTF8
44 #define wxStringMemcpy memcpy
45 #define wxStringMemcmp memcmp
46 #define wxStringMemchr memchr
47 #define wxStringStrlen strlen
49 #define wxStringMemcpy wxTmemcpy
50 #define wxStringMemcmp wxTmemcmp
51 #define wxStringMemchr wxTmemchr
52 #define wxStringStrlen wxStrlen
55 // ----------------------------------------------------------------------------
57 // ----------------------------------------------------------------------------
62 static UntypedBufferData
s_untypedNullData(NULL
);
64 UntypedBufferData
* const untypedNullDataPtr
= &s_untypedNullData
;
66 } // namespace wxPrivate
68 // ---------------------------------------------------------------------------
69 // static class variables definition
70 // ---------------------------------------------------------------------------
72 //According to STL _must_ be a -1 size_t
73 const size_t wxString::npos
= (size_t) -1;
75 #if wxUSE_STRING_POS_CACHE
77 #ifdef wxHAS_COMPILER_TLS
79 wxTLS_TYPE(wxString::Cache
) wxString::ms_cache
;
81 #else // !wxHAS_COMPILER_TLS
83 struct wxStrCacheInitializer
85 wxStrCacheInitializer()
87 // calling this function triggers s_cache initialization in it, and
88 // from now on it becomes safe to call from multiple threads
94 wxString::Cache& wxString::GetCache()
96 static wxTLS_TYPE(Cache) s_cache;
98 return wxTLS_VALUE(s_cache);
102 static wxStrCacheInitializer gs_stringCacheInit
;
104 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
106 // gdb seems to be unable to display thread-local variables correctly, at least
107 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
108 #if wxDEBUG_LEVEL >= 2
110 struct wxStrCacheDumper
112 static void ShowAll()
114 puts("*** wxString cache dump:");
115 for ( unsigned n
= 0; n
< wxString::Cache::SIZE
; n
++ )
117 const wxString::Cache::Element
&
118 c
= wxString::GetCacheBegin()[n
];
120 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
122 n
== wxString::LastUsedCacheElement() ? " [*]" : "",
124 (unsigned long)c
.pos
,
125 (unsigned long)c
.impl
,
131 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
133 #endif // wxDEBUG_LEVEL >= 2
135 #ifdef wxPROFILE_STRING_CACHE
137 wxString::CacheStats
wxString::ms_cacheStats
;
139 struct wxStrCacheStatsDumper
141 ~wxStrCacheStatsDumper()
143 const wxString::CacheStats
& stats
= wxString::ms_cacheStats
;
147 puts("*** wxString cache statistics:");
148 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
150 printf("\tHits %u (of which %u not used) or %.2f%%\n",
153 100.*float(stats
.poshits
- stats
.mishits
)/stats
.postot
);
154 printf("\tAverage position requested: %.2f\n",
155 float(stats
.sumpos
) / stats
.postot
);
156 printf("\tAverage offset after cached hint: %.2f\n",
157 float(stats
.sumofs
) / stats
.postot
);
162 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
163 stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
);
168 static wxStrCacheStatsDumper s_showCacheStats
;
170 #endif // wxPROFILE_STRING_CACHE
172 #endif // wxUSE_STRING_POS_CACHE
174 // ----------------------------------------------------------------------------
176 // ----------------------------------------------------------------------------
178 #if wxUSE_STD_IOSTREAM
182 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
184 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
185 const wxCharBuffer
buf(str
.AsCharBuf());
187 os
.clear(wxSTD
ios_base::failbit
);
193 return os
<< str
.AsInternal();
197 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
199 return os
<< str
.c_str();
202 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCharBuffer
& str
)
204 return os
<< str
.data();
208 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxWCharBuffer
& str
)
210 return os
<< str
.data();
214 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
216 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
218 return wos
<< str
.wc_str();
221 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
223 return wos
<< str
.AsWChar();
226 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxWCharBuffer
& str
)
228 return wos
<< str
.data();
231 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
233 #endif // wxUSE_STD_IOSTREAM
235 // ===========================================================================
236 // wxString class core
237 // ===========================================================================
239 #if wxUSE_UNICODE_UTF8
241 void wxString::PosLenToImpl(size_t pos
, size_t len
,
242 size_t *implPos
, size_t *implLen
) const
248 else // have valid start position
250 const const_iterator b
= GetIterForNthChar(pos
);
251 *implPos
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin();
256 else // have valid length too
258 // we need to handle the case of length specifying a substring
259 // going beyond the end of the string, just as std::string does
260 const const_iterator
e(end());
262 while ( len
&& i
<= e
)
268 *implLen
= i
.impl() - b
.impl();
273 #endif // wxUSE_UNICODE_UTF8
275 // ----------------------------------------------------------------------------
276 // wxCStrData converted strings caching
277 // ----------------------------------------------------------------------------
279 // FIXME-UTF8: temporarily disabled because it doesn't work with global
280 // string objects; re-enable after fixing this bug and benchmarking
281 // performance to see if using a hash is a good idea at all
284 // For backward compatibility reasons, it must be possible to assign the value
285 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
286 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
287 // because the memory would be freed immediately, but it has to be valid as long
288 // as the string is not modified, so that code like this still works:
290 // const wxChar *s = str.c_str();
291 // while ( s ) { ... }
293 // FIXME-UTF8: not thread safe!
294 // FIXME-UTF8: we currently clear the cached conversion only when the string is
295 // destroyed, but we should do it when the string is modified, to
296 // keep memory usage down
297 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
298 // invalidated the cache on every change, we could keep the previous
300 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
301 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
304 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
306 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
307 if ( i
!= hash
.end() )
315 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
316 // so we have to use wxString* here and const-cast when used
317 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
318 wxStringCharConversionCache
);
319 static wxStringCharConversionCache gs_stringsCharCache
;
321 const char* wxCStrData::AsChar() const
323 // remove previously cache value, if any (see FIXMEs above):
324 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
326 // convert the string and keep it:
327 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
328 m_str
->mb_str().release();
332 #endif // wxUSE_UNICODE
334 #if !wxUSE_UNICODE_WCHAR
335 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
336 wxStringWCharConversionCache
);
337 static wxStringWCharConversionCache gs_stringsWCharCache
;
339 const wchar_t* wxCStrData::AsWChar() const
341 // remove previously cache value, if any (see FIXMEs above):
342 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
344 // convert the string and keep it:
345 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
346 m_str
->wc_str().release();
350 #endif // !wxUSE_UNICODE_WCHAR
352 wxString::~wxString()
355 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
356 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
358 #if !wxUSE_UNICODE_WCHAR
359 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
364 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
365 const char* wxCStrData::AsChar() const
367 #if wxUSE_UNICODE_UTF8
368 if ( wxLocaleIsUtf8
)
371 // under non-UTF8 locales, we have to convert the internal UTF-8
372 // representation using wxConvLibc and cache the result
374 wxString
*str
= wxConstCast(m_str
, wxString
);
376 // convert the string:
378 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
379 // have it) but it's unfortunately not obvious to implement
380 // because we don't know how big buffer do we need for the
381 // given string length (in case of multibyte encodings, e.g.
382 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
384 // One idea would be to store more than just m_convertedToChar
385 // in wxString: then we could record the length of the string
386 // which was converted the last time and try to reuse the same
387 // buffer if the current length is not greater than it (this
388 // could still fail because string could have been modified in
389 // place but it would work most of the time, so we'd do it and
390 // only allocate the new buffer if in-place conversion returned
391 // an error). We could also store a bit saying if the string
392 // was modified since the last conversion (and update it in all
393 // operation modifying the string, of course) to avoid unneeded
394 // consequential conversions. But both of these ideas require
395 // adding more fields to wxString and require profiling results
396 // to be sure that we really gain enough from them to justify
398 wxCharBuffer
buf(str
->mb_str());
400 // if it failed, return empty string and not NULL to avoid crashes in code
401 // written with either wxWidgets 2 wxString or std::string behaviour in
402 // mind: neither of them ever returns NULL and so we shouldn't neither
406 if ( str
->m_convertedToChar
&&
407 strlen(buf
) == strlen(str
->m_convertedToChar
) )
409 // keep the same buffer for as long as possible, so that several calls
410 // to c_str() in a row still work:
411 strcpy(str
->m_convertedToChar
, buf
);
415 str
->m_convertedToChar
= buf
.release();
419 return str
->m_convertedToChar
+ m_offset
;
421 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
423 #if !wxUSE_UNICODE_WCHAR
424 const wchar_t* wxCStrData::AsWChar() const
426 wxString
*str
= wxConstCast(m_str
, wxString
);
428 // convert the string:
429 wxWCharBuffer
buf(str
->wc_str());
431 // notice that here, unlike above in AsChar(), conversion can't fail as our
432 // internal UTF-8 is always well-formed -- or the string was corrupted and
433 // all bets are off anyhow
435 // FIXME-UTF8: do the conversion in-place in the existing buffer
436 if ( str
->m_convertedToWChar
&&
437 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
439 // keep the same buffer for as long as possible, so that several calls
440 // to c_str() in a row still work:
441 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
445 str
->m_convertedToWChar
= buf
.release();
449 return str
->m_convertedToWChar
+ m_offset
;
451 #endif // !wxUSE_UNICODE_WCHAR
453 // ===========================================================================
454 // wxString class core
455 // ===========================================================================
457 // ---------------------------------------------------------------------------
458 // construction and conversion
459 // ---------------------------------------------------------------------------
461 #if wxUSE_UNICODE_WCHAR
463 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
464 const wxMBConv
& conv
)
467 if ( !psz
|| nLength
== 0 )
468 return SubstrBufFromMB(L
"", 0);
470 if ( nLength
== npos
)
474 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
476 return SubstrBufFromMB(_T(""), 0);
478 return SubstrBufFromMB(wcBuf
, wcLen
);
480 #endif // wxUSE_UNICODE_WCHAR
482 #if wxUSE_UNICODE_UTF8
484 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
485 const wxMBConv
& conv
)
488 if ( !psz
|| nLength
== 0 )
489 return SubstrBufFromMB("", 0);
491 // if psz is already in UTF-8, we don't have to do the roundtrip to
492 // wchar_t* and back:
495 // we need to validate the input because UTF8 iterators assume valid
496 // UTF-8 sequence and psz may be invalid:
497 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
499 // we must pass the real string length to SubstrBufFromMB ctor
500 if ( nLength
== npos
)
501 nLength
= psz
? strlen(psz
) : 0;
502 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz
), nLength
);
504 // else: do the roundtrip through wchar_t*
507 if ( nLength
== npos
)
510 // first convert to wide string:
512 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
514 return SubstrBufFromMB("", 0);
516 // and then to UTF-8:
517 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
518 // widechar -> UTF-8 conversion isn't supposed to ever fail:
519 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
523 #endif // wxUSE_UNICODE_UTF8
525 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
527 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
528 const wxMBConv
& conv
)
531 if ( !pwz
|| nLength
== 0 )
532 return SubstrBufFromWC("", 0);
534 if ( nLength
== npos
)
538 wxCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
540 return SubstrBufFromWC("", 0);
542 return SubstrBufFromWC(mbBuf
, mbLen
);
544 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
547 #if wxUSE_UNICODE_WCHAR
549 //Convert wxString in Unicode mode to a multi-byte string
550 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
552 return conv
.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL
);
555 #elif wxUSE_UNICODE_UTF8
557 const wxWCharBuffer
wxString::wc_str() const
559 return wxMBConvStrictUTF8().cMB2WC
562 m_impl
.length() + 1, // size, not length
567 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
570 return wxCharBuffer::CreateNonOwned(m_impl
.c_str());
572 // FIXME-UTF8: use wc_str() here once we have buffers with length
575 wxWCharBuffer
wcBuf(wxMBConvStrictUTF8().cMB2WC
578 m_impl
.length() + 1, // size
582 return wxCharBuffer("");
584 return conv
.cWC2MB(wcBuf
, wcLen
+1, NULL
);
589 //Converts this string to a wide character string if unicode
590 //mode is not enabled and wxUSE_WCHAR_T is enabled
591 const wxWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
593 return conv
.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL
);
596 #endif // Unicode/ANSI
598 // shrink to minimal size (releasing extra memory)
599 bool wxString::Shrink()
601 wxString
tmp(begin(), end());
603 return tmp
.length() == length();
606 // deprecated compatibility code:
607 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
608 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
610 return DoGetWriteBuf(nLen
);
613 void wxString::UngetWriteBuf()
618 void wxString::UngetWriteBuf(size_t nLen
)
620 DoUngetWriteBuf(nLen
);
622 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
625 // ---------------------------------------------------------------------------
627 // ---------------------------------------------------------------------------
629 // all functions are inline in string.h
631 // ---------------------------------------------------------------------------
632 // concatenation operators
633 // ---------------------------------------------------------------------------
636 * concatenation functions come in 5 flavours:
638 * char + string and string + char
639 * C str + string and string + C str
642 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
644 #if !wxUSE_STL_BASED_WXSTRING
645 wxASSERT( str1
.IsValid() );
646 wxASSERT( str2
.IsValid() );
655 wxString
operator+(const wxString
& str
, wxUniChar ch
)
657 #if !wxUSE_STL_BASED_WXSTRING
658 wxASSERT( str
.IsValid() );
667 wxString
operator+(wxUniChar ch
, const wxString
& str
)
669 #if !wxUSE_STL_BASED_WXSTRING
670 wxASSERT( str
.IsValid() );
679 wxString
operator+(const wxString
& str
, const char *psz
)
681 #if !wxUSE_STL_BASED_WXSTRING
682 wxASSERT( str
.IsValid() );
686 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
687 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
695 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
697 #if !wxUSE_STL_BASED_WXSTRING
698 wxASSERT( str
.IsValid() );
702 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
703 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
711 wxString
operator+(const char *psz
, const wxString
& str
)
713 #if !wxUSE_STL_BASED_WXSTRING
714 wxASSERT( str
.IsValid() );
718 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
719 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
727 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
729 #if !wxUSE_STL_BASED_WXSTRING
730 wxASSERT( str
.IsValid() );
734 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
735 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
743 // ---------------------------------------------------------------------------
745 // ---------------------------------------------------------------------------
747 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
749 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
750 : wxToupper(GetChar(0u)) == wxToupper(c
));
753 #ifdef HAVE_STD_STRING_COMPARE
755 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
756 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
757 // sort strings in characters code point order by sorting the byte sequence
758 // in byte values order (i.e. what strcmp() and memcmp() do).
760 int wxString::compare(const wxString
& str
) const
762 return m_impl
.compare(str
.m_impl
);
765 int wxString::compare(size_t nStart
, size_t nLen
,
766 const wxString
& str
) const
769 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
770 return m_impl
.compare(pos
, len
, str
.m_impl
);
773 int wxString::compare(size_t nStart
, size_t nLen
,
775 size_t nStart2
, size_t nLen2
) const
778 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
781 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
783 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
786 int wxString::compare(const char* sz
) const
788 return m_impl
.compare(ImplStr(sz
));
791 int wxString::compare(const wchar_t* sz
) const
793 return m_impl
.compare(ImplStr(sz
));
796 int wxString::compare(size_t nStart
, size_t nLen
,
797 const char* sz
, size_t nCount
) const
800 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
802 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
804 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
807 int wxString::compare(size_t nStart
, size_t nLen
,
808 const wchar_t* sz
, size_t nCount
) const
811 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
813 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
815 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
818 #else // !HAVE_STD_STRING_COMPARE
820 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
821 const wxStringCharType
* s2
, size_t l2
)
824 return wxStringMemcmp(s1
, s2
, l1
);
827 int ret
= wxStringMemcmp(s1
, s2
, l1
);
828 return ret
== 0 ? -1 : ret
;
832 int ret
= wxStringMemcmp(s1
, s2
, l2
);
833 return ret
== 0 ? +1 : ret
;
837 int wxString::compare(const wxString
& str
) const
839 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
840 str
.m_impl
.data(), str
.m_impl
.length());
843 int wxString::compare(size_t nStart
, size_t nLen
,
844 const wxString
& str
) const
846 wxASSERT(nStart
<= length());
847 size_type strLen
= length() - nStart
;
848 nLen
= strLen
< nLen
? strLen
: nLen
;
851 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
853 return ::wxDoCmp(m_impl
.data() + pos
, len
,
854 str
.m_impl
.data(), str
.m_impl
.length());
857 int wxString::compare(size_t nStart
, size_t nLen
,
859 size_t nStart2
, size_t nLen2
) const
861 wxASSERT(nStart
<= length());
862 wxASSERT(nStart2
<= str
.length());
863 size_type strLen
= length() - nStart
,
864 strLen2
= str
.length() - nStart2
;
865 nLen
= strLen
< nLen
? strLen
: nLen
;
866 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
869 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
871 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
873 return ::wxDoCmp(m_impl
.data() + pos
, len
,
874 str
.m_impl
.data() + pos2
, len2
);
877 int wxString::compare(const char* sz
) const
879 SubstrBufFromMB
str(ImplStr(sz
, npos
));
880 if ( str
.len
== npos
)
881 str
.len
= wxStringStrlen(str
.data
);
882 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
885 int wxString::compare(const wchar_t* sz
) const
887 SubstrBufFromWC
str(ImplStr(sz
, npos
));
888 if ( str
.len
== npos
)
889 str
.len
= wxStringStrlen(str
.data
);
890 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
893 int wxString::compare(size_t nStart
, size_t nLen
,
894 const char* sz
, size_t nCount
) const
896 wxASSERT(nStart
<= length());
897 size_type strLen
= length() - nStart
;
898 nLen
= strLen
< nLen
? strLen
: nLen
;
901 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
903 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
904 if ( str
.len
== npos
)
905 str
.len
= wxStringStrlen(str
.data
);
907 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
910 int wxString::compare(size_t nStart
, size_t nLen
,
911 const wchar_t* sz
, size_t nCount
) const
913 wxASSERT(nStart
<= length());
914 size_type strLen
= length() - nStart
;
915 nLen
= strLen
< nLen
? strLen
: nLen
;
918 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
920 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
921 if ( str
.len
== npos
)
922 str
.len
= wxStringStrlen(str
.data
);
924 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
927 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
930 // ---------------------------------------------------------------------------
931 // find_{first,last}_[not]_of functions
932 // ---------------------------------------------------------------------------
934 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
936 // NB: All these functions are implemented with the argument being wxChar*,
937 // i.e. widechar string in any Unicode build, even though native string
938 // representation is char* in the UTF-8 build. This is because we couldn't
939 // use memchr() to determine if a character is in a set encoded as UTF-8.
941 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
943 return find_first_of(sz
, nStart
, wxStrlen(sz
));
946 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
948 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
951 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
953 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
956 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
958 if ( wxTmemchr(sz
, *i
, n
) )
965 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
967 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
970 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
972 if ( !wxTmemchr(sz
, *i
, n
) )
980 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
982 return find_last_of(sz
, nStart
, wxStrlen(sz
));
985 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
987 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
990 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
992 size_t len
= length();
994 if ( nStart
== npos
)
1000 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1003 size_t idx
= nStart
;
1004 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1005 i
!= rend(); --idx
, ++i
)
1007 if ( wxTmemchr(sz
, *i
, n
) )
1014 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
1016 size_t len
= length();
1018 if ( nStart
== npos
)
1024 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1027 size_t idx
= nStart
;
1028 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1029 i
!= rend(); --idx
, ++i
)
1031 if ( !wxTmemchr(sz
, *i
, n
) )
1038 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
1040 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
1042 size_t idx
= nStart
;
1043 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1052 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1054 size_t len
= length();
1056 if ( nStart
== npos
)
1062 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1065 size_t idx
= nStart
;
1066 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1067 i
!= rend(); --idx
, ++i
)
1076 // the functions above were implemented for wchar_t* arguments in Unicode
1077 // build and char* in ANSI build; below are implementations for the other
1080 #define wxOtherCharType char
1081 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1083 #define wxOtherCharType wchar_t
1084 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1087 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1088 { return find_first_of(STRCONV(sz
), nStart
); }
1090 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1092 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1093 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1094 { return find_last_of(STRCONV(sz
), nStart
); }
1095 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1097 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1098 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1099 { return find_first_not_of(STRCONV(sz
), nStart
); }
1100 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1102 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1103 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1104 { return find_last_not_of(STRCONV(sz
), nStart
); }
1105 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1107 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1109 #undef wxOtherCharType
1112 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1114 // ===========================================================================
1115 // other common string functions
1116 // ===========================================================================
1118 int wxString::CmpNoCase(const wxString
& s
) const
1120 #if wxUSE_UNICODE_UTF8
1121 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1123 const_iterator i1
= begin();
1124 const_iterator end1
= end();
1125 const_iterator i2
= s
.begin();
1126 const_iterator end2
= s
.end();
1128 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1130 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1131 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1132 if ( lower1
!= lower2
)
1133 return lower1
< lower2
? -1 : 1;
1136 size_t len1
= length();
1137 size_t len2
= s
.length();
1141 else if ( len1
> len2
)
1144 #else // wxUSE_UNICODE_WCHAR or ANSI
1145 return wxStricmp(m_impl
.c_str(), s
.m_impl
.c_str());
1153 #ifndef __SCHAR_MAX__
1154 #define __SCHAR_MAX__ 127
1158 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1160 if (!ascii
|| len
== 0)
1161 return wxEmptyString
;
1166 wxStringInternalBuffer
buf(res
, len
);
1167 wxStringCharType
*dest
= buf
;
1169 for ( ; len
> 0; --len
)
1171 unsigned char c
= (unsigned char)*ascii
++;
1172 wxASSERT_MSG( c
< 0x80,
1173 _T("Non-ASCII value passed to FromAscii().") );
1175 *dest
++ = (wchar_t)c
;
1182 wxString
wxString::FromAscii(const char *ascii
)
1184 return FromAscii(ascii
, wxStrlen(ascii
));
1187 wxString
wxString::FromAscii(char ascii
)
1189 // What do we do with '\0' ?
1191 unsigned char c
= (unsigned char)ascii
;
1193 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
1195 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1196 return wxString(wxUniChar((wchar_t)c
));
1199 const wxCharBuffer
wxString::ToAscii() const
1201 // this will allocate enough space for the terminating NUL too
1202 wxCharBuffer
buffer(length());
1203 char *dest
= buffer
.data();
1205 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1208 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1209 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1211 // the output string can't have embedded NULs anyhow, so we can safely
1212 // stop at first of them even if we do have any
1220 #endif // wxUSE_UNICODE
1222 // extract string of length nCount starting at nFirst
1223 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1225 size_t nLen
= length();
1227 // default value of nCount is npos and means "till the end"
1228 if ( nCount
== npos
)
1230 nCount
= nLen
- nFirst
;
1233 // out-of-bounds requests return sensible things
1234 if ( nFirst
+ nCount
> nLen
)
1236 nCount
= nLen
- nFirst
;
1239 if ( nFirst
> nLen
)
1241 // AllocCopy() will return empty string
1242 return wxEmptyString
;
1245 wxString
dest(*this, nFirst
, nCount
);
1246 if ( dest
.length() != nCount
)
1248 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1254 // check that the string starts with prefix and return the rest of the string
1255 // in the provided pointer if it is not NULL, otherwise return false
1256 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1258 if ( compare(0, prefix
.length(), prefix
) != 0 )
1263 // put the rest of the string into provided pointer
1264 rest
->assign(*this, prefix
.length(), npos
);
1271 // check that the string ends with suffix and return the rest of it in the
1272 // provided pointer if it is not NULL, otherwise return false
1273 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1275 int start
= length() - suffix
.length();
1277 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1282 // put the rest of the string into provided pointer
1283 rest
->assign(*this, 0, start
);
1290 // extract nCount last (rightmost) characters
1291 wxString
wxString::Right(size_t nCount
) const
1293 if ( nCount
> length() )
1296 wxString
dest(*this, length() - nCount
, nCount
);
1297 if ( dest
.length() != nCount
) {
1298 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1303 // get all characters after the last occurrence of ch
1304 // (returns the whole string if ch not found)
1305 wxString
wxString::AfterLast(wxUniChar ch
) const
1308 int iPos
= Find(ch
, true);
1309 if ( iPos
== wxNOT_FOUND
)
1312 str
.assign(*this, iPos
+ 1, npos
);
1317 // extract nCount first (leftmost) characters
1318 wxString
wxString::Left(size_t nCount
) const
1320 if ( nCount
> length() )
1323 wxString
dest(*this, 0, nCount
);
1324 if ( dest
.length() != nCount
) {
1325 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1330 // get all characters before the first occurrence of ch
1331 // (returns the whole string if ch not found)
1332 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1334 int iPos
= Find(ch
);
1335 if ( iPos
== wxNOT_FOUND
)
1337 return wxString(*this, 0, iPos
);
1340 /// get all characters before the last occurrence of ch
1341 /// (returns empty string if ch not found)
1342 wxString
wxString::BeforeLast(wxUniChar ch
) const
1345 int iPos
= Find(ch
, true);
1346 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1347 str
= wxString(c_str(), iPos
);
1352 /// get all characters after the first occurrence of ch
1353 /// (returns empty string if ch not found)
1354 wxString
wxString::AfterFirst(wxUniChar ch
) const
1357 int iPos
= Find(ch
);
1358 if ( iPos
!= wxNOT_FOUND
)
1359 str
.assign(*this, iPos
+ 1, npos
);
1364 // replace first (or all) occurrences of some substring with another one
1365 size_t wxString::Replace(const wxString
& strOld
,
1366 const wxString
& strNew
, bool bReplaceAll
)
1368 // if we tried to replace an empty string we'd enter an infinite loop below
1369 wxCHECK_MSG( !strOld
.empty(), 0,
1370 _T("wxString::Replace(): invalid parameter") );
1372 wxSTRING_INVALIDATE_CACHE();
1374 size_t uiCount
= 0; // count of replacements made
1376 // optimize the special common case: replacement of one character by
1377 // another one (in UTF-8 case we can only do this for ASCII characters)
1379 // benchmarks show that this special version is around 3 times faster
1380 // (depending on the proportion of matching characters and UTF-8/wchar_t
1382 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1384 const wxStringCharType chOld
= strOld
.m_impl
[0],
1385 chNew
= strNew
.m_impl
[0];
1387 // this loop is the simplified version of the one below
1388 for ( size_t pos
= 0; ; )
1390 pos
= m_impl
.find(chOld
, pos
);
1394 m_impl
[pos
++] = chNew
;
1402 else if ( !bReplaceAll
)
1404 size_t pos
= m_impl
.find(strOld
, 0);
1407 m_impl
.replace(pos
, strOld
.m_impl
.length(), strNew
.m_impl
);
1411 else // replace all occurrences
1413 const size_t uiOldLen
= strOld
.m_impl
.length();
1414 const size_t uiNewLen
= strNew
.m_impl
.length();
1416 // first scan the string to find all positions at which the replacement
1418 wxVector
<size_t> replacePositions
;
1421 for ( pos
= m_impl
.find(strOld
.m_impl
, 0);
1423 pos
= m_impl
.find(strOld
.m_impl
, pos
+ uiOldLen
))
1425 replacePositions
.push_back(pos
);
1432 // allocate enough memory for the whole new string
1434 tmp
.m_impl
.reserve(m_impl
.length() + uiCount
*(uiNewLen
- uiOldLen
));
1436 // copy this string to tmp doing replacements on the fly
1438 for ( pos
= 0; replNum
< uiCount
; replNum
++ )
1440 const size_t nextReplPos
= replacePositions
[replNum
];
1442 if ( pos
!= nextReplPos
)
1444 tmp
.m_impl
.append(m_impl
, pos
, nextReplPos
- pos
);
1447 tmp
.m_impl
.append(strNew
.m_impl
);
1448 pos
= nextReplPos
+ uiOldLen
;
1451 if ( pos
!= m_impl
.length() )
1453 // append the rest of the string unchanged
1454 tmp
.m_impl
.append(m_impl
, pos
, m_impl
.length() - pos
);
1463 bool wxString::IsAscii() const
1465 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1467 if ( !(*i
).IsAscii() )
1474 bool wxString::IsWord() const
1476 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1478 if ( !wxIsalpha(*i
) )
1485 bool wxString::IsNumber() const
1490 const_iterator i
= begin();
1492 if ( *i
== _T('-') || *i
== _T('+') )
1495 for ( ; i
!= end(); ++i
)
1497 if ( !wxIsdigit(*i
) )
1504 wxString
wxString::Strip(stripType w
) const
1507 if ( w
& leading
) s
.Trim(false);
1508 if ( w
& trailing
) s
.Trim(true);
1512 // ---------------------------------------------------------------------------
1514 // ---------------------------------------------------------------------------
1516 wxString
& wxString::MakeUpper()
1518 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1519 *it
= (wxChar
)wxToupper(*it
);
1524 wxString
& wxString::MakeLower()
1526 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1527 *it
= (wxChar
)wxTolower(*it
);
1532 wxString
& wxString::MakeCapitalized()
1534 const iterator en
= end();
1535 iterator it
= begin();
1538 *it
= (wxChar
)wxToupper(*it
);
1539 for ( ++it
; it
!= en
; ++it
)
1540 *it
= (wxChar
)wxTolower(*it
);
1546 // ---------------------------------------------------------------------------
1547 // trimming and padding
1548 // ---------------------------------------------------------------------------
1550 // some compilers (VC++ 6.0 not to name them) return true for a call to
1551 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1552 // to live with this by checking that the character is a 7 bit one - even if
1553 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1554 // space-like symbols somewhere except in the first 128 chars), it is arguably
1555 // still better than trimming away accented letters
1556 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1558 // trims spaces (in the sense of isspace) from left or right side
1559 wxString
& wxString::Trim(bool bFromRight
)
1561 // first check if we're going to modify the string at all
1564 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1565 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1571 // find last non-space character
1572 reverse_iterator psz
= rbegin();
1573 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1576 // truncate at trailing space start
1577 erase(psz
.base(), end());
1581 // find first non-space character
1582 iterator psz
= begin();
1583 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1586 // fix up data and length
1587 erase(begin(), psz
);
1594 // adds nCount characters chPad to the string from either side
1595 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1597 wxString
s(chPad
, nCount
);
1610 // truncate the string
1611 wxString
& wxString::Truncate(size_t uiLen
)
1613 if ( uiLen
< length() )
1615 erase(begin() + uiLen
, end());
1617 //else: nothing to do, string is already short enough
1622 // ---------------------------------------------------------------------------
1623 // finding (return wxNOT_FOUND if not found and index otherwise)
1624 // ---------------------------------------------------------------------------
1627 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1629 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1631 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1634 // ----------------------------------------------------------------------------
1635 // conversion to numbers
1636 // ----------------------------------------------------------------------------
1638 // The implementation of all the functions below is exactly the same so factor
1639 // it out. Note that number extraction works correctly on UTF-8 strings, so
1640 // we can use wxStringCharType and wx_str() for maximum efficiency.
1643 #define DO_IF_NOT_WINCE(x) x
1645 #define DO_IF_NOT_WINCE(x)
1648 #define WX_STRING_TO_X_TYPE_START \
1649 wxCHECK_MSG( pVal, false, _T("NULL output pointer") ); \
1650 DO_IF_NOT_WINCE( errno = 0; ) \
1651 const wxStringCharType *start = wx_str(); \
1652 wxStringCharType *end;
1654 #define WX_STRING_TO_X_TYPE_END \
1655 /* return true only if scan was stopped by the terminating NUL and */ \
1656 /* if the string was not empty to start with and no under/overflow */ \
1658 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1663 bool wxString::ToLong(long *pVal
, int base
) const
1665 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1667 WX_STRING_TO_X_TYPE_START
1668 long val
= wxStrtol(start
, &end
, base
);
1669 WX_STRING_TO_X_TYPE_END
1672 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1674 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1676 WX_STRING_TO_X_TYPE_START
1677 unsigned long val
= wxStrtoul(start
, &end
, base
);
1678 WX_STRING_TO_X_TYPE_END
1681 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1683 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1685 WX_STRING_TO_X_TYPE_START
1686 wxLongLong_t val
= wxStrtoll(start
, &end
, base
);
1687 WX_STRING_TO_X_TYPE_END
1690 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1692 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1694 WX_STRING_TO_X_TYPE_START
1695 wxULongLong_t val
= wxStrtoull(start
, &end
, base
);
1696 WX_STRING_TO_X_TYPE_END
1699 bool wxString::ToDouble(double *pVal
) const
1701 WX_STRING_TO_X_TYPE_START
1702 double val
= wxStrtod(start
, &end
);
1703 WX_STRING_TO_X_TYPE_END
1708 bool wxString::ToCLong(long *pVal
, int base
) const
1710 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1712 WX_STRING_TO_X_TYPE_START
1713 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1714 long val
= wxStrtol_lA(start
, &end
, base
, wxCLocale
);
1716 long val
= wxStrtol_l(start
, &end
, base
, wxCLocale
);
1718 WX_STRING_TO_X_TYPE_END
1721 bool wxString::ToCULong(unsigned long *pVal
, int base
) const
1723 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1725 WX_STRING_TO_X_TYPE_START
1726 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1727 unsigned long val
= wxStrtoul_lA(start
, &end
, base
, wxCLocale
);
1729 unsigned long val
= wxStrtoul_l(start
, &end
, base
, wxCLocale
);
1731 WX_STRING_TO_X_TYPE_END
1734 bool wxString::ToCDouble(double *pVal
) const
1736 WX_STRING_TO_X_TYPE_START
1737 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1738 double val
= wxStrtod_lA(start
, &end
, wxCLocale
);
1740 double val
= wxStrtod_l(start
, &end
, wxCLocale
);
1742 WX_STRING_TO_X_TYPE_END
1745 #endif // wxUSE_XLOCALE
1747 // ---------------------------------------------------------------------------
1749 // ---------------------------------------------------------------------------
1751 #if !wxUSE_UTF8_LOCALE_ONLY
1753 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1754 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1756 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1760 va_start(argptr
, format
);
1763 s
.PrintfV(format
, argptr
);
1769 #endif // !wxUSE_UTF8_LOCALE_ONLY
1771 #if wxUSE_UNICODE_UTF8
1773 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1776 va_start(argptr
, format
);
1779 s
.PrintfV(format
, argptr
);
1785 #endif // wxUSE_UNICODE_UTF8
1788 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1791 s
.PrintfV(format
, argptr
);
1795 #if !wxUSE_UTF8_LOCALE_ONLY
1796 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1797 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1799 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1803 va_start(argptr
, format
);
1805 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1806 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1807 // because it's the only cast that works safely for downcasting when
1808 // multiple inheritance is used:
1809 wxString
*str
= static_cast<wxString
*>(this);
1811 wxString
*str
= this;
1814 int iLen
= str
->PrintfV(format
, argptr
);
1820 #endif // !wxUSE_UTF8_LOCALE_ONLY
1822 #if wxUSE_UNICODE_UTF8
1823 int wxString::DoPrintfUtf8(const char *format
, ...)
1826 va_start(argptr
, format
);
1828 int iLen
= PrintfV(format
, argptr
);
1834 #endif // wxUSE_UNICODE_UTF8
1837 Uses wxVsnprintf and places the result into the this string.
1839 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1840 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1841 the ISO C99 (and thus SUSv3) standard the return value for the case of
1842 an undersized buffer is inconsistent. For conforming vsnprintf
1843 implementations the function must return the number of characters that
1844 would have been printed had the buffer been large enough. For conforming
1845 vswprintf implementations the function must return a negative number
1848 What vswprintf sets errno to is undefined but Darwin seems to set it to
1849 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1850 those are defined in the standard and backed up by several conformance
1851 statements. Note that ENOMEM mentioned in the manual page does not
1852 apply to swprintf, only wprintf and fwprintf.
1854 Official manual page:
1855 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1857 Some conformance statements (AIX, Solaris):
1858 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1859 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1861 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1862 EILSEQ and EINVAL are specifically defined to mean the error is other than
1863 an undersized buffer and no other errno are defined we treat those two
1864 as meaning hard errors and everything else gets the old behavior which
1865 is to keep looping and increasing buffer size until the function succeeds.
1867 In practice it's impossible to determine before compilation which behavior
1868 may be used. The vswprintf function may have vsnprintf-like behavior or
1869 vice-versa. Behavior detected on one release can theoretically change
1870 with an updated release. Not to mention that configure testing for it
1871 would require the test to be run on the host system, not the build system
1872 which makes cross compilation difficult. Therefore, we make no assumptions
1873 about behavior and try our best to handle every known case, including the
1874 case where wxVsnprintf returns a negative number and fails to set errno.
1876 There is yet one more non-standard implementation and that is our own.
1877 Fortunately, that can be detected at compile-time.
1879 On top of all that, ISO C99 explicitly defines snprintf to write a null
1880 character to the last position of the specified buffer. That would be at
1881 at the given buffer size minus 1. It is supposed to do this even if it
1882 turns out that the buffer is sized too small.
1884 Darwin (tested on 10.5) follows the C99 behavior exactly.
1886 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1887 errno even when it fails. However, it only seems to ever fail due
1888 to an undersized buffer.
1890 #if wxUSE_UNICODE_UTF8
1891 template<typename BufferType
>
1893 // we only need one version in non-UTF8 builds and at least two Windows
1894 // compilers have problems with this function template, so use just one
1895 // normal function here
1897 static int DoStringPrintfV(wxString
& str
,
1898 const wxString
& format
, va_list argptr
)
1904 #if wxUSE_UNICODE_UTF8
1905 BufferType
tmp(str
, size
+ 1);
1906 typename
BufferType::CharType
*buf
= tmp
;
1908 wxStringBuffer
tmp(str
, size
+ 1);
1916 // in UTF-8 build, leaving uninitialized junk in the buffer
1917 // could result in invalid non-empty UTF-8 string, so just
1918 // reset the string to empty on failure:
1923 // wxVsnprintf() may modify the original arg pointer, so pass it
1926 wxVaCopy(argptrcopy
, argptr
);
1929 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1932 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1935 // some implementations of vsnprintf() don't NUL terminate
1936 // the string if there is not enough space for it so
1937 // always do it manually
1938 // FIXME: This really seems to be the wrong and would be an off-by-one
1939 // bug except the code above allocates an extra character.
1940 buf
[size
] = _T('\0');
1942 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1943 // total number of characters which would have been written if the
1944 // buffer were large enough (newer standards such as Unix98)
1947 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1948 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1949 // is true if *both* of them use our own implementation,
1950 // otherwise we can't be sure
1951 #if wxUSE_WXVSNPRINTF
1952 // we know that our own implementation of wxVsnprintf() returns -1
1953 // only for a format error - thus there's something wrong with
1954 // the user's format string
1957 #else // possibly using system version
1958 // assume it only returns error if there is not enough space, but
1959 // as we don't know how much we need, double the current size of
1962 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1963 // If errno was set to one of the two well-known hard errors
1964 // then fail immediately to avoid an infinite loop.
1967 #endif // __WXWINCE__
1968 // still not enough, as we don't know how much we need, double the
1969 // current size of the buffer
1971 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1973 else if ( len
>= size
)
1975 #if wxUSE_WXVSNPRINTF
1976 // we know that our own implementation of wxVsnprintf() returns
1977 // size+1 when there's not enough space but that's not the size
1978 // of the required buffer!
1979 size
*= 2; // so we just double the current size of the buffer
1981 // some vsnprintf() implementations NUL-terminate the buffer and
1982 // some don't in len == size case, to be safe always add 1
1983 // FIXME: I don't quite understand this comment. The vsnprintf
1984 // function is specifically defined to return the number of
1985 // characters printed not including the null terminator.
1986 // So OF COURSE you need to add 1 to get the right buffer size.
1987 // The following line is definitely correct, no question.
1991 else // ok, there was enough space
1997 // we could have overshot
2000 return str
.length();
2003 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
2005 #if wxUSE_UNICODE_UTF8
2006 #if wxUSE_STL_BASED_WXSTRING
2007 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
2009 typedef wxStringInternalBuffer Utf8Buffer
;
2013 #if wxUSE_UTF8_LOCALE_ONLY
2014 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2016 #if wxUSE_UNICODE_UTF8
2017 if ( wxLocaleIsUtf8
)
2018 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2021 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
2023 return DoStringPrintfV(*this, format
, argptr
);
2024 #endif // UTF8/WCHAR
2028 // ----------------------------------------------------------------------------
2029 // misc other operations
2030 // ----------------------------------------------------------------------------
2032 // returns true if the string matches the pattern which may contain '*' and
2033 // '?' metacharacters (as usual, '?' matches any character and '*' any number
2035 bool wxString::Matches(const wxString
& mask
) const
2037 // I disable this code as it doesn't seem to be faster (in fact, it seems
2038 // to be much slower) than the old, hand-written code below and using it
2039 // here requires always linking with libregex even if the user code doesn't
2041 #if 0 // wxUSE_REGEX
2042 // first translate the shell-like mask into a regex
2044 pattern
.reserve(wxStrlen(pszMask
));
2056 pattern
+= _T(".*");
2067 // these characters are special in a RE, quote them
2068 // (however note that we don't quote '[' and ']' to allow
2069 // using them for Unix shell like matching)
2070 pattern
+= _T('\\');
2074 pattern
+= *pszMask
;
2082 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
2083 #else // !wxUSE_REGEX
2084 // TODO: this is, of course, awfully inefficient...
2086 // FIXME-UTF8: implement using iterators, remove #if
2087 #if wxUSE_UNICODE_UTF8
2088 wxWCharBuffer maskBuf
= mask
.wc_str();
2089 wxWCharBuffer txtBuf
= wc_str();
2090 const wxChar
*pszMask
= maskBuf
.data();
2091 const wxChar
*pszTxt
= txtBuf
.data();
2093 const wxChar
*pszMask
= mask
.wx_str();
2094 // the char currently being checked
2095 const wxChar
*pszTxt
= wx_str();
2098 // the last location where '*' matched
2099 const wxChar
*pszLastStarInText
= NULL
;
2100 const wxChar
*pszLastStarInMask
= NULL
;
2103 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
2104 switch ( *pszMask
) {
2106 if ( *pszTxt
== wxT('\0') )
2109 // pszTxt and pszMask will be incremented in the loop statement
2115 // remember where we started to be able to backtrack later
2116 pszLastStarInText
= pszTxt
;
2117 pszLastStarInMask
= pszMask
;
2119 // ignore special chars immediately following this one
2120 // (should this be an error?)
2121 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2124 // if there is nothing more, match
2125 if ( *pszMask
== wxT('\0') )
2128 // are there any other metacharacters in the mask?
2130 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2132 if ( pEndMask
!= NULL
) {
2133 // we have to match the string between two metachars
2134 uiLenMask
= pEndMask
- pszMask
;
2137 // we have to match the remainder of the string
2138 uiLenMask
= wxStrlen(pszMask
);
2141 wxString
strToMatch(pszMask
, uiLenMask
);
2142 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2143 if ( pMatch
== NULL
)
2146 // -1 to compensate "++" in the loop
2147 pszTxt
= pMatch
+ uiLenMask
- 1;
2148 pszMask
+= uiLenMask
- 1;
2153 if ( *pszMask
!= *pszTxt
)
2159 // match only if nothing left
2160 if ( *pszTxt
== wxT('\0') )
2163 // if we failed to match, backtrack if we can
2164 if ( pszLastStarInText
) {
2165 pszTxt
= pszLastStarInText
+ 1;
2166 pszMask
= pszLastStarInMask
;
2168 pszLastStarInText
= NULL
;
2170 // don't bother resetting pszLastStarInMask, it's unnecessary
2176 #endif // wxUSE_REGEX/!wxUSE_REGEX
2179 // Count the number of chars
2180 int wxString::Freq(wxUniChar ch
) const
2183 for ( const_iterator i
= begin(); i
!= end(); ++i
)