1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
38 #include "wx/hashmap.h"
39 #include "wx/vector.h"
40 #include "wx/xlocale.h"
42 // string handling functions used by wxString:
43 #if wxUSE_UNICODE_UTF8
44 #define wxStringMemcpy memcpy
45 #define wxStringMemcmp memcmp
46 #define wxStringMemchr memchr
47 #define wxStringStrlen strlen
49 #define wxStringMemcpy wxTmemcpy
50 #define wxStringMemcmp wxTmemcmp
51 #define wxStringMemchr wxTmemchr
52 #define wxStringStrlen wxStrlen
55 // ----------------------------------------------------------------------------
57 // ----------------------------------------------------------------------------
62 static UntypedBufferData
s_untypedNullData(NULL
);
64 UntypedBufferData
* const untypedNullDataPtr
= &s_untypedNullData
;
66 } // namespace wxPrivate
68 // ---------------------------------------------------------------------------
69 // static class variables definition
70 // ---------------------------------------------------------------------------
72 //According to STL _must_ be a -1 size_t
73 const size_t wxString::npos
= (size_t) -1;
75 #if wxUSE_STRING_POS_CACHE
77 #ifdef wxHAS_COMPILER_TLS
79 wxTLS_TYPE(wxString::Cache
) wxString::ms_cache
;
81 #else // !wxHAS_COMPILER_TLS
83 struct wxStrCacheInitializer
85 wxStrCacheInitializer()
87 // calling this function triggers s_cache initialization in it, and
88 // from now on it becomes safe to call from multiple threads
94 wxString::Cache& wxString::GetCache()
96 static wxTLS_TYPE(Cache) s_cache;
98 return wxTLS_VALUE(s_cache);
102 static wxStrCacheInitializer gs_stringCacheInit
;
104 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
106 // gdb seems to be unable to display thread-local variables correctly, at least
107 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
108 #if wxDEBUG_LEVEL >= 2
110 struct wxStrCacheDumper
112 static void ShowAll()
114 puts("*** wxString cache dump:");
115 for ( unsigned n
= 0; n
< wxString::Cache::SIZE
; n
++ )
117 const wxString::Cache::Element
&
118 c
= wxString::GetCacheBegin()[n
];
120 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
122 n
== wxString::LastUsedCacheElement() ? " [*]" : "",
124 (unsigned long)c
.pos
,
125 (unsigned long)c
.impl
,
131 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
133 #endif // wxDEBUG_LEVEL >= 2
135 #ifdef wxPROFILE_STRING_CACHE
137 wxString::CacheStats
wxString::ms_cacheStats
;
139 struct wxStrCacheStatsDumper
141 ~wxStrCacheStatsDumper()
143 const wxString::CacheStats
& stats
= wxString::ms_cacheStats
;
147 puts("*** wxString cache statistics:");
148 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
150 printf("\tHits %u (of which %u not used) or %.2f%%\n",
153 100.*float(stats
.poshits
- stats
.mishits
)/stats
.postot
);
154 printf("\tAverage position requested: %.2f\n",
155 float(stats
.sumpos
) / stats
.postot
);
156 printf("\tAverage offset after cached hint: %.2f\n",
157 float(stats
.sumofs
) / stats
.postot
);
162 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
163 stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
);
168 static wxStrCacheStatsDumper s_showCacheStats
;
170 #endif // wxPROFILE_STRING_CACHE
172 #endif // wxUSE_STRING_POS_CACHE
174 // ----------------------------------------------------------------------------
176 // ----------------------------------------------------------------------------
178 #if wxUSE_STD_IOSTREAM
182 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
184 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
185 const wxScopedCharBuffer
buf(str
.AsCharBuf());
187 os
.clear(wxSTD
ios_base::failbit
);
193 return os
<< str
.AsInternal();
197 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
199 return os
<< str
.c_str();
202 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedCharBuffer
& str
)
204 return os
<< str
.data();
208 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxScopedWCharBuffer
& str
)
210 return os
<< str
.data();
214 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
216 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
218 return wos
<< str
.wc_str();
221 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
223 return wos
<< str
.AsWChar();
226 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxScopedWCharBuffer
& str
)
228 return wos
<< str
.data();
231 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
233 #endif // wxUSE_STD_IOSTREAM
235 // ===========================================================================
236 // wxString class core
237 // ===========================================================================
239 #if wxUSE_UNICODE_UTF8
241 void wxString::PosLenToImpl(size_t pos
, size_t len
,
242 size_t *implPos
, size_t *implLen
) const
248 else // have valid start position
250 const const_iterator b
= GetIterForNthChar(pos
);
251 *implPos
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin();
256 else // have valid length too
258 // we need to handle the case of length specifying a substring
259 // going beyond the end of the string, just as std::string does
260 const const_iterator
e(end());
262 while ( len
&& i
<= e
)
268 *implLen
= i
.impl() - b
.impl();
273 #endif // wxUSE_UNICODE_UTF8
275 // ----------------------------------------------------------------------------
276 // wxCStrData converted strings caching
277 // ----------------------------------------------------------------------------
279 // FIXME-UTF8: temporarily disabled because it doesn't work with global
280 // string objects; re-enable after fixing this bug and benchmarking
281 // performance to see if using a hash is a good idea at all
284 // For backward compatibility reasons, it must be possible to assign the value
285 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
286 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
287 // because the memory would be freed immediately, but it has to be valid as long
288 // as the string is not modified, so that code like this still works:
290 // const wxChar *s = str.c_str();
291 // while ( s ) { ... }
293 // FIXME-UTF8: not thread safe!
294 // FIXME-UTF8: we currently clear the cached conversion only when the string is
295 // destroyed, but we should do it when the string is modified, to
296 // keep memory usage down
297 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
298 // invalidated the cache on every change, we could keep the previous
300 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
301 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
304 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
306 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
307 if ( i
!= hash
.end() )
315 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
316 // so we have to use wxString* here and const-cast when used
317 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
318 wxStringCharConversionCache
);
319 static wxStringCharConversionCache gs_stringsCharCache
;
321 const char* wxCStrData::AsChar() const
323 // remove previously cache value, if any (see FIXMEs above):
324 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
326 // convert the string and keep it:
327 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
328 m_str
->mb_str().release();
332 #endif // wxUSE_UNICODE
334 #if !wxUSE_UNICODE_WCHAR
335 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
336 wxStringWCharConversionCache
);
337 static wxStringWCharConversionCache gs_stringsWCharCache
;
339 const wchar_t* wxCStrData::AsWChar() const
341 // remove previously cache value, if any (see FIXMEs above):
342 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
344 // convert the string and keep it:
345 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
346 m_str
->wc_str().release();
350 #endif // !wxUSE_UNICODE_WCHAR
352 wxString::~wxString()
355 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
356 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
358 #if !wxUSE_UNICODE_WCHAR
359 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
364 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
365 const char* wxCStrData::AsChar() const
367 #if wxUSE_UNICODE_UTF8
368 if ( wxLocaleIsUtf8
)
371 // under non-UTF8 locales, we have to convert the internal UTF-8
372 // representation using wxConvLibc and cache the result
374 wxString
*str
= wxConstCast(m_str
, wxString
);
376 // convert the string:
378 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
379 // have it) but it's unfortunately not obvious to implement
380 // because we don't know how big buffer do we need for the
381 // given string length (in case of multibyte encodings, e.g.
382 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
384 // One idea would be to store more than just m_convertedToChar
385 // in wxString: then we could record the length of the string
386 // which was converted the last time and try to reuse the same
387 // buffer if the current length is not greater than it (this
388 // could still fail because string could have been modified in
389 // place but it would work most of the time, so we'd do it and
390 // only allocate the new buffer if in-place conversion returned
391 // an error). We could also store a bit saying if the string
392 // was modified since the last conversion (and update it in all
393 // operation modifying the string, of course) to avoid unneeded
394 // consequential conversions. But both of these ideas require
395 // adding more fields to wxString and require profiling results
396 // to be sure that we really gain enough from them to justify
398 wxScopedCharBuffer
buf(str
->mb_str());
400 // if it failed, return empty string and not NULL to avoid crashes in code
401 // written with either wxWidgets 2 wxString or std::string behaviour in
402 // mind: neither of them ever returns NULL and so we shouldn't neither
406 if ( str
->m_convertedToChar
&&
407 strlen(buf
) == strlen(str
->m_convertedToChar
) )
409 // keep the same buffer for as long as possible, so that several calls
410 // to c_str() in a row still work:
411 strcpy(str
->m_convertedToChar
, buf
);
415 str
->m_convertedToChar
= buf
.release();
419 return str
->m_convertedToChar
+ m_offset
;
421 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
423 #if !wxUSE_UNICODE_WCHAR
424 const wchar_t* wxCStrData::AsWChar() const
426 wxString
*str
= wxConstCast(m_str
, wxString
);
428 // convert the string:
429 wxScopedWCharBuffer
buf(str
->wc_str());
431 // notice that here, unlike above in AsChar(), conversion can't fail as our
432 // internal UTF-8 is always well-formed -- or the string was corrupted and
433 // all bets are off anyhow
435 // FIXME-UTF8: do the conversion in-place in the existing buffer
436 if ( str
->m_convertedToWChar
&&
437 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
439 // keep the same buffer for as long as possible, so that several calls
440 // to c_str() in a row still work:
441 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
445 str
->m_convertedToWChar
= buf
.release();
449 return str
->m_convertedToWChar
+ m_offset
;
451 #endif // !wxUSE_UNICODE_WCHAR
453 // ===========================================================================
454 // wxString class core
455 // ===========================================================================
457 // ---------------------------------------------------------------------------
458 // construction and conversion
459 // ---------------------------------------------------------------------------
461 #if wxUSE_UNICODE_WCHAR
463 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
464 const wxMBConv
& conv
)
467 if ( !psz
|| nLength
== 0 )
468 return SubstrBufFromMB(wxWCharBuffer(L
""), 0);
470 if ( nLength
== npos
)
474 wxScopedWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
476 return SubstrBufFromMB(wxWCharBuffer(L
""), 0);
478 return SubstrBufFromMB(wcBuf
, wcLen
);
480 #endif // wxUSE_UNICODE_WCHAR
482 #if wxUSE_UNICODE_UTF8
484 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
485 const wxMBConv
& conv
)
488 if ( !psz
|| nLength
== 0 )
489 return SubstrBufFromMB(wxCharBuffer(""), 0);
491 // if psz is already in UTF-8, we don't have to do the roundtrip to
492 // wchar_t* and back:
495 // we need to validate the input because UTF8 iterators assume valid
496 // UTF-8 sequence and psz may be invalid:
497 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
499 // we must pass the real string length to SubstrBufFromMB ctor
500 if ( nLength
== npos
)
501 nLength
= psz
? strlen(psz
) : 0;
502 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz
), nLength
);
504 // else: do the roundtrip through wchar_t*
507 if ( nLength
== npos
)
510 // first convert to wide string:
512 wxScopedWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
514 return SubstrBufFromMB(wxCharBuffer(""), 0);
516 // and then to UTF-8:
517 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
518 // widechar -> UTF-8 conversion isn't supposed to ever fail:
519 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
523 #endif // wxUSE_UNICODE_UTF8
525 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
527 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
528 const wxMBConv
& conv
)
531 if ( !pwz
|| nLength
== 0 )
532 return SubstrBufFromWC(wxCharBuffer(""), 0);
534 if ( nLength
== npos
)
538 wxScopedCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
540 return SubstrBufFromWC(wxCharBuffer(""), 0);
542 return SubstrBufFromWC(mbBuf
, mbLen
);
544 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
547 #if wxUSE_UNICODE_WCHAR
549 //Convert wxString in Unicode mode to a multi-byte string
550 const wxScopedCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
552 return conv
.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL
);
555 #elif wxUSE_UNICODE_UTF8
557 const wxScopedWCharBuffer
wxString::wc_str() const
559 return wxMBConvStrictUTF8().cMB2WC
562 m_impl
.length() + 1, // size, not length
567 const wxScopedCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
570 return wxScopedCharBuffer::CreateNonOwned(m_impl
.c_str());
572 // FIXME-UTF8: use wc_str() here once we have buffers with length
575 wxScopedWCharBuffer wcBuf
577 wxMBConvStrictUTF8().cMB2WC
580 m_impl
.length() + 1, // size
585 return wxCharBuffer("");
587 return conv
.cWC2MB(wcBuf
, wcLen
+1, NULL
);
592 //Converts this string to a wide character string if unicode
593 //mode is not enabled and wxUSE_WCHAR_T is enabled
594 const wxScopedWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
596 return conv
.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL
);
599 #endif // Unicode/ANSI
601 // shrink to minimal size (releasing extra memory)
602 bool wxString::Shrink()
604 wxString
tmp(begin(), end());
606 return tmp
.length() == length();
609 // deprecated compatibility code:
610 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
611 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
613 return DoGetWriteBuf(nLen
);
616 void wxString::UngetWriteBuf()
621 void wxString::UngetWriteBuf(size_t nLen
)
623 DoUngetWriteBuf(nLen
);
625 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
628 // ---------------------------------------------------------------------------
630 // ---------------------------------------------------------------------------
632 // all functions are inline in string.h
634 // ---------------------------------------------------------------------------
635 // concatenation operators
636 // ---------------------------------------------------------------------------
639 * concatenation functions come in 5 flavours:
641 * char + string and string + char
642 * C str + string and string + C str
645 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
647 #if !wxUSE_STL_BASED_WXSTRING
648 wxASSERT( str1
.IsValid() );
649 wxASSERT( str2
.IsValid() );
658 wxString
operator+(const wxString
& str
, wxUniChar ch
)
660 #if !wxUSE_STL_BASED_WXSTRING
661 wxASSERT( str
.IsValid() );
670 wxString
operator+(wxUniChar ch
, const wxString
& str
)
672 #if !wxUSE_STL_BASED_WXSTRING
673 wxASSERT( str
.IsValid() );
682 wxString
operator+(const wxString
& str
, const char *psz
)
684 #if !wxUSE_STL_BASED_WXSTRING
685 wxASSERT( str
.IsValid() );
689 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
690 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
698 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
700 #if !wxUSE_STL_BASED_WXSTRING
701 wxASSERT( str
.IsValid() );
705 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
706 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
714 wxString
operator+(const char *psz
, const wxString
& str
)
716 #if !wxUSE_STL_BASED_WXSTRING
717 wxASSERT( str
.IsValid() );
721 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
722 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
730 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
732 #if !wxUSE_STL_BASED_WXSTRING
733 wxASSERT( str
.IsValid() );
737 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
738 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
746 // ---------------------------------------------------------------------------
748 // ---------------------------------------------------------------------------
750 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
752 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
753 : wxToupper(GetChar(0u)) == wxToupper(c
));
756 #ifdef HAVE_STD_STRING_COMPARE
758 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
759 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
760 // sort strings in characters code point order by sorting the byte sequence
761 // in byte values order (i.e. what strcmp() and memcmp() do).
763 int wxString::compare(const wxString
& str
) const
765 return m_impl
.compare(str
.m_impl
);
768 int wxString::compare(size_t nStart
, size_t nLen
,
769 const wxString
& str
) const
772 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
773 return m_impl
.compare(pos
, len
, str
.m_impl
);
776 int wxString::compare(size_t nStart
, size_t nLen
,
778 size_t nStart2
, size_t nLen2
) const
781 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
784 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
786 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
789 int wxString::compare(const char* sz
) const
791 return m_impl
.compare(ImplStr(sz
));
794 int wxString::compare(const wchar_t* sz
) const
796 return m_impl
.compare(ImplStr(sz
));
799 int wxString::compare(size_t nStart
, size_t nLen
,
800 const char* sz
, size_t nCount
) const
803 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
805 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
807 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
810 int wxString::compare(size_t nStart
, size_t nLen
,
811 const wchar_t* sz
, size_t nCount
) const
814 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
816 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
818 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
821 #else // !HAVE_STD_STRING_COMPARE
823 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
824 const wxStringCharType
* s2
, size_t l2
)
827 return wxStringMemcmp(s1
, s2
, l1
);
830 int ret
= wxStringMemcmp(s1
, s2
, l1
);
831 return ret
== 0 ? -1 : ret
;
835 int ret
= wxStringMemcmp(s1
, s2
, l2
);
836 return ret
== 0 ? +1 : ret
;
840 int wxString::compare(const wxString
& str
) const
842 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
843 str
.m_impl
.data(), str
.m_impl
.length());
846 int wxString::compare(size_t nStart
, size_t nLen
,
847 const wxString
& str
) const
849 wxASSERT(nStart
<= length());
850 size_type strLen
= length() - nStart
;
851 nLen
= strLen
< nLen
? strLen
: nLen
;
854 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
856 return ::wxDoCmp(m_impl
.data() + pos
, len
,
857 str
.m_impl
.data(), str
.m_impl
.length());
860 int wxString::compare(size_t nStart
, size_t nLen
,
862 size_t nStart2
, size_t nLen2
) const
864 wxASSERT(nStart
<= length());
865 wxASSERT(nStart2
<= str
.length());
866 size_type strLen
= length() - nStart
,
867 strLen2
= str
.length() - nStart2
;
868 nLen
= strLen
< nLen
? strLen
: nLen
;
869 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
872 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
874 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
876 return ::wxDoCmp(m_impl
.data() + pos
, len
,
877 str
.m_impl
.data() + pos2
, len2
);
880 int wxString::compare(const char* sz
) const
882 SubstrBufFromMB
str(ImplStr(sz
, npos
));
883 if ( str
.len
== npos
)
884 str
.len
= wxStringStrlen(str
.data
);
885 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
888 int wxString::compare(const wchar_t* sz
) const
890 SubstrBufFromWC
str(ImplStr(sz
, npos
));
891 if ( str
.len
== npos
)
892 str
.len
= wxStringStrlen(str
.data
);
893 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
896 int wxString::compare(size_t nStart
, size_t nLen
,
897 const char* sz
, size_t nCount
) const
899 wxASSERT(nStart
<= length());
900 size_type strLen
= length() - nStart
;
901 nLen
= strLen
< nLen
? strLen
: nLen
;
904 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
906 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
907 if ( str
.len
== npos
)
908 str
.len
= wxStringStrlen(str
.data
);
910 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
913 int wxString::compare(size_t nStart
, size_t nLen
,
914 const wchar_t* sz
, size_t nCount
) const
916 wxASSERT(nStart
<= length());
917 size_type strLen
= length() - nStart
;
918 nLen
= strLen
< nLen
? strLen
: nLen
;
921 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
923 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
924 if ( str
.len
== npos
)
925 str
.len
= wxStringStrlen(str
.data
);
927 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
930 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
933 // ---------------------------------------------------------------------------
934 // find_{first,last}_[not]_of functions
935 // ---------------------------------------------------------------------------
937 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
939 // NB: All these functions are implemented with the argument being wxChar*,
940 // i.e. widechar string in any Unicode build, even though native string
941 // representation is char* in the UTF-8 build. This is because we couldn't
942 // use memchr() to determine if a character is in a set encoded as UTF-8.
944 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
946 return find_first_of(sz
, nStart
, wxStrlen(sz
));
949 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
951 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
954 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
956 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
959 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
961 if ( wxTmemchr(sz
, *i
, n
) )
968 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
970 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
973 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
975 if ( !wxTmemchr(sz
, *i
, n
) )
983 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
985 return find_last_of(sz
, nStart
, wxStrlen(sz
));
988 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
990 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
993 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
995 size_t len
= length();
997 if ( nStart
== npos
)
1003 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1006 size_t idx
= nStart
;
1007 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1008 i
!= rend(); --idx
, ++i
)
1010 if ( wxTmemchr(sz
, *i
, n
) )
1017 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
1019 size_t len
= length();
1021 if ( nStart
== npos
)
1027 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1030 size_t idx
= nStart
;
1031 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1032 i
!= rend(); --idx
, ++i
)
1034 if ( !wxTmemchr(sz
, *i
, n
) )
1041 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
1043 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
1045 size_t idx
= nStart
;
1046 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1055 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1057 size_t len
= length();
1059 if ( nStart
== npos
)
1065 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1068 size_t idx
= nStart
;
1069 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1070 i
!= rend(); --idx
, ++i
)
1079 // the functions above were implemented for wchar_t* arguments in Unicode
1080 // build and char* in ANSI build; below are implementations for the other
1083 #define wxOtherCharType char
1084 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1086 #define wxOtherCharType wchar_t
1087 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1090 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1091 { return find_first_of(STRCONV(sz
), nStart
); }
1093 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1095 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1096 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1097 { return find_last_of(STRCONV(sz
), nStart
); }
1098 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1100 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1101 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1102 { return find_first_not_of(STRCONV(sz
), nStart
); }
1103 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1105 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1106 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1107 { return find_last_not_of(STRCONV(sz
), nStart
); }
1108 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1110 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1112 #undef wxOtherCharType
1115 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1117 // ===========================================================================
1118 // other common string functions
1119 // ===========================================================================
1121 int wxString::CmpNoCase(const wxString
& s
) const
1123 #if wxUSE_UNICODE_UTF8
1124 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1126 const_iterator i1
= begin();
1127 const_iterator end1
= end();
1128 const_iterator i2
= s
.begin();
1129 const_iterator end2
= s
.end();
1131 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1133 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1134 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1135 if ( lower1
!= lower2
)
1136 return lower1
< lower2
? -1 : 1;
1139 size_t len1
= length();
1140 size_t len2
= s
.length();
1144 else if ( len1
> len2
)
1147 #else // wxUSE_UNICODE_WCHAR or ANSI
1148 return wxStricmp(m_impl
.c_str(), s
.m_impl
.c_str());
1156 #ifndef __SCHAR_MAX__
1157 #define __SCHAR_MAX__ 127
1161 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1163 if (!ascii
|| len
== 0)
1164 return wxEmptyString
;
1169 wxStringInternalBuffer
buf(res
, len
);
1170 wxStringCharType
*dest
= buf
;
1172 for ( ; len
> 0; --len
)
1174 unsigned char c
= (unsigned char)*ascii
++;
1175 wxASSERT_MSG( c
< 0x80,
1176 _T("Non-ASCII value passed to FromAscii().") );
1178 *dest
++ = (wchar_t)c
;
1185 wxString
wxString::FromAscii(const char *ascii
)
1187 return FromAscii(ascii
, wxStrlen(ascii
));
1190 wxString
wxString::FromAscii(char ascii
)
1192 // What do we do with '\0' ?
1194 unsigned char c
= (unsigned char)ascii
;
1196 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
1198 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1199 return wxString(wxUniChar((wchar_t)c
));
1202 const wxScopedCharBuffer
wxString::ToAscii() const
1204 // this will allocate enough space for the terminating NUL too
1205 wxCharBuffer
buffer(length());
1206 char *dest
= buffer
.data();
1208 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1211 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1212 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1214 // the output string can't have embedded NULs anyhow, so we can safely
1215 // stop at first of them even if we do have any
1223 #endif // wxUSE_UNICODE
1225 // extract string of length nCount starting at nFirst
1226 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1228 size_t nLen
= length();
1230 // default value of nCount is npos and means "till the end"
1231 if ( nCount
== npos
)
1233 nCount
= nLen
- nFirst
;
1236 // out-of-bounds requests return sensible things
1237 if ( nFirst
+ nCount
> nLen
)
1239 nCount
= nLen
- nFirst
;
1242 if ( nFirst
> nLen
)
1244 // AllocCopy() will return empty string
1245 return wxEmptyString
;
1248 wxString
dest(*this, nFirst
, nCount
);
1249 if ( dest
.length() != nCount
)
1251 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1257 // check that the string starts with prefix and return the rest of the string
1258 // in the provided pointer if it is not NULL, otherwise return false
1259 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1261 if ( compare(0, prefix
.length(), prefix
) != 0 )
1266 // put the rest of the string into provided pointer
1267 rest
->assign(*this, prefix
.length(), npos
);
1274 // check that the string ends with suffix and return the rest of it in the
1275 // provided pointer if it is not NULL, otherwise return false
1276 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1278 int start
= length() - suffix
.length();
1280 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1285 // put the rest of the string into provided pointer
1286 rest
->assign(*this, 0, start
);
1293 // extract nCount last (rightmost) characters
1294 wxString
wxString::Right(size_t nCount
) const
1296 if ( nCount
> length() )
1299 wxString
dest(*this, length() - nCount
, nCount
);
1300 if ( dest
.length() != nCount
) {
1301 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1306 // get all characters after the last occurrence of ch
1307 // (returns the whole string if ch not found)
1308 wxString
wxString::AfterLast(wxUniChar ch
) const
1311 int iPos
= Find(ch
, true);
1312 if ( iPos
== wxNOT_FOUND
)
1315 str
.assign(*this, iPos
+ 1, npos
);
1320 // extract nCount first (leftmost) characters
1321 wxString
wxString::Left(size_t nCount
) const
1323 if ( nCount
> length() )
1326 wxString
dest(*this, 0, nCount
);
1327 if ( dest
.length() != nCount
) {
1328 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1333 // get all characters before the first occurrence of ch
1334 // (returns the whole string if ch not found)
1335 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1337 int iPos
= Find(ch
);
1338 if ( iPos
== wxNOT_FOUND
)
1340 return wxString(*this, 0, iPos
);
1343 /// get all characters before the last occurrence of ch
1344 /// (returns empty string if ch not found)
1345 wxString
wxString::BeforeLast(wxUniChar ch
) const
1348 int iPos
= Find(ch
, true);
1349 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1350 str
= wxString(c_str(), iPos
);
1355 /// get all characters after the first occurrence of ch
1356 /// (returns empty string if ch not found)
1357 wxString
wxString::AfterFirst(wxUniChar ch
) const
1360 int iPos
= Find(ch
);
1361 if ( iPos
!= wxNOT_FOUND
)
1362 str
.assign(*this, iPos
+ 1, npos
);
1367 // replace first (or all) occurrences of some substring with another one
1368 size_t wxString::Replace(const wxString
& strOld
,
1369 const wxString
& strNew
, bool bReplaceAll
)
1371 // if we tried to replace an empty string we'd enter an infinite loop below
1372 wxCHECK_MSG( !strOld
.empty(), 0,
1373 _T("wxString::Replace(): invalid parameter") );
1375 wxSTRING_INVALIDATE_CACHE();
1377 size_t uiCount
= 0; // count of replacements made
1379 // optimize the special common case: replacement of one character by
1380 // another one (in UTF-8 case we can only do this for ASCII characters)
1382 // benchmarks show that this special version is around 3 times faster
1383 // (depending on the proportion of matching characters and UTF-8/wchar_t
1385 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1387 const wxStringCharType chOld
= strOld
.m_impl
[0],
1388 chNew
= strNew
.m_impl
[0];
1390 // this loop is the simplified version of the one below
1391 for ( size_t pos
= 0; ; )
1393 pos
= m_impl
.find(chOld
, pos
);
1397 m_impl
[pos
++] = chNew
;
1405 else if ( !bReplaceAll
)
1407 size_t pos
= m_impl
.find(strOld
, 0);
1410 m_impl
.replace(pos
, strOld
.m_impl
.length(), strNew
.m_impl
);
1414 else // replace all occurrences
1416 const size_t uiOldLen
= strOld
.m_impl
.length();
1417 const size_t uiNewLen
= strNew
.m_impl
.length();
1419 // first scan the string to find all positions at which the replacement
1421 wxVector
<size_t> replacePositions
;
1424 for ( pos
= m_impl
.find(strOld
.m_impl
, 0);
1426 pos
= m_impl
.find(strOld
.m_impl
, pos
+ uiOldLen
))
1428 replacePositions
.push_back(pos
);
1435 // allocate enough memory for the whole new string
1437 tmp
.m_impl
.reserve(m_impl
.length() + uiCount
*(uiNewLen
- uiOldLen
));
1439 // copy this string to tmp doing replacements on the fly
1441 for ( pos
= 0; replNum
< uiCount
; replNum
++ )
1443 const size_t nextReplPos
= replacePositions
[replNum
];
1445 if ( pos
!= nextReplPos
)
1447 tmp
.m_impl
.append(m_impl
, pos
, nextReplPos
- pos
);
1450 tmp
.m_impl
.append(strNew
.m_impl
);
1451 pos
= nextReplPos
+ uiOldLen
;
1454 if ( pos
!= m_impl
.length() )
1456 // append the rest of the string unchanged
1457 tmp
.m_impl
.append(m_impl
, pos
, m_impl
.length() - pos
);
1466 bool wxString::IsAscii() const
1468 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1470 if ( !(*i
).IsAscii() )
1477 bool wxString::IsWord() const
1479 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1481 if ( !wxIsalpha(*i
) )
1488 bool wxString::IsNumber() const
1493 const_iterator i
= begin();
1495 if ( *i
== _T('-') || *i
== _T('+') )
1498 for ( ; i
!= end(); ++i
)
1500 if ( !wxIsdigit(*i
) )
1507 wxString
wxString::Strip(stripType w
) const
1510 if ( w
& leading
) s
.Trim(false);
1511 if ( w
& trailing
) s
.Trim(true);
1515 // ---------------------------------------------------------------------------
1517 // ---------------------------------------------------------------------------
1519 wxString
& wxString::MakeUpper()
1521 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1522 *it
= (wxChar
)wxToupper(*it
);
1527 wxString
& wxString::MakeLower()
1529 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1530 *it
= (wxChar
)wxTolower(*it
);
1535 wxString
& wxString::MakeCapitalized()
1537 const iterator en
= end();
1538 iterator it
= begin();
1541 *it
= (wxChar
)wxToupper(*it
);
1542 for ( ++it
; it
!= en
; ++it
)
1543 *it
= (wxChar
)wxTolower(*it
);
1549 // ---------------------------------------------------------------------------
1550 // trimming and padding
1551 // ---------------------------------------------------------------------------
1553 // some compilers (VC++ 6.0 not to name them) return true for a call to
1554 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1555 // to live with this by checking that the character is a 7 bit one - even if
1556 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1557 // space-like symbols somewhere except in the first 128 chars), it is arguably
1558 // still better than trimming away accented letters
1559 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1561 // trims spaces (in the sense of isspace) from left or right side
1562 wxString
& wxString::Trim(bool bFromRight
)
1564 // first check if we're going to modify the string at all
1567 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1568 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1574 // find last non-space character
1575 reverse_iterator psz
= rbegin();
1576 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1579 // truncate at trailing space start
1580 erase(psz
.base(), end());
1584 // find first non-space character
1585 iterator psz
= begin();
1586 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1589 // fix up data and length
1590 erase(begin(), psz
);
1597 // adds nCount characters chPad to the string from either side
1598 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1600 wxString
s(chPad
, nCount
);
1613 // truncate the string
1614 wxString
& wxString::Truncate(size_t uiLen
)
1616 if ( uiLen
< length() )
1618 erase(begin() + uiLen
, end());
1620 //else: nothing to do, string is already short enough
1625 // ---------------------------------------------------------------------------
1626 // finding (return wxNOT_FOUND if not found and index otherwise)
1627 // ---------------------------------------------------------------------------
1630 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1632 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1634 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1637 // ----------------------------------------------------------------------------
1638 // conversion to numbers
1639 // ----------------------------------------------------------------------------
1641 // The implementation of all the functions below is exactly the same so factor
1642 // it out. Note that number extraction works correctly on UTF-8 strings, so
1643 // we can use wxStringCharType and wx_str() for maximum efficiency.
1646 #define DO_IF_NOT_WINCE(x) x
1648 #define DO_IF_NOT_WINCE(x)
1651 #define WX_STRING_TO_X_TYPE_START \
1652 wxCHECK_MSG( pVal, false, _T("NULL output pointer") ); \
1653 DO_IF_NOT_WINCE( errno = 0; ) \
1654 const wxStringCharType *start = wx_str(); \
1655 wxStringCharType *end;
1657 #define WX_STRING_TO_X_TYPE_END \
1658 /* return true only if scan was stopped by the terminating NUL and */ \
1659 /* if the string was not empty to start with and no under/overflow */ \
1661 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1666 bool wxString::ToLong(long *pVal
, int base
) const
1668 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1670 WX_STRING_TO_X_TYPE_START
1671 long val
= wxStrtol(start
, &end
, base
);
1672 WX_STRING_TO_X_TYPE_END
1675 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1677 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1679 WX_STRING_TO_X_TYPE_START
1680 unsigned long val
= wxStrtoul(start
, &end
, base
);
1681 WX_STRING_TO_X_TYPE_END
1684 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1686 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1688 WX_STRING_TO_X_TYPE_START
1689 wxLongLong_t val
= wxStrtoll(start
, &end
, base
);
1690 WX_STRING_TO_X_TYPE_END
1693 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1695 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1697 WX_STRING_TO_X_TYPE_START
1698 wxULongLong_t val
= wxStrtoull(start
, &end
, base
);
1699 WX_STRING_TO_X_TYPE_END
1702 bool wxString::ToDouble(double *pVal
) const
1704 WX_STRING_TO_X_TYPE_START
1705 double val
= wxStrtod(start
, &end
);
1706 WX_STRING_TO_X_TYPE_END
1711 bool wxString::ToCLong(long *pVal
, int base
) const
1713 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1715 WX_STRING_TO_X_TYPE_START
1716 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1717 long val
= wxStrtol_lA(start
, &end
, base
, wxCLocale
);
1719 long val
= wxStrtol_l(start
, &end
, base
, wxCLocale
);
1721 WX_STRING_TO_X_TYPE_END
1724 bool wxString::ToCULong(unsigned long *pVal
, int base
) const
1726 wxASSERT_MSG( !base
|| (base
> 1 && base
<= 36), _T("invalid base") );
1728 WX_STRING_TO_X_TYPE_START
1729 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1730 unsigned long val
= wxStrtoul_lA(start
, &end
, base
, wxCLocale
);
1732 unsigned long val
= wxStrtoul_l(start
, &end
, base
, wxCLocale
);
1734 WX_STRING_TO_X_TYPE_END
1737 bool wxString::ToCDouble(double *pVal
) const
1739 WX_STRING_TO_X_TYPE_START
1740 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
1741 double val
= wxStrtod_lA(start
, &end
, wxCLocale
);
1743 double val
= wxStrtod_l(start
, &end
, wxCLocale
);
1745 WX_STRING_TO_X_TYPE_END
1748 #endif // wxUSE_XLOCALE
1750 // ---------------------------------------------------------------------------
1752 // ---------------------------------------------------------------------------
1754 #if !wxUSE_UTF8_LOCALE_ONLY
1756 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1757 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1759 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1763 va_start(argptr
, format
);
1766 s
.PrintfV(format
, argptr
);
1772 #endif // !wxUSE_UTF8_LOCALE_ONLY
1774 #if wxUSE_UNICODE_UTF8
1776 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1779 va_start(argptr
, format
);
1782 s
.PrintfV(format
, argptr
);
1788 #endif // wxUSE_UNICODE_UTF8
1791 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1794 s
.PrintfV(format
, argptr
);
1798 #if !wxUSE_UTF8_LOCALE_ONLY
1799 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1800 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1802 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1806 va_start(argptr
, format
);
1808 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1809 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1810 // because it's the only cast that works safely for downcasting when
1811 // multiple inheritance is used:
1812 wxString
*str
= static_cast<wxString
*>(this);
1814 wxString
*str
= this;
1817 int iLen
= str
->PrintfV(format
, argptr
);
1823 #endif // !wxUSE_UTF8_LOCALE_ONLY
1825 #if wxUSE_UNICODE_UTF8
1826 int wxString::DoPrintfUtf8(const char *format
, ...)
1829 va_start(argptr
, format
);
1831 int iLen
= PrintfV(format
, argptr
);
1837 #endif // wxUSE_UNICODE_UTF8
1840 Uses wxVsnprintf and places the result into the this string.
1842 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1843 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1844 the ISO C99 (and thus SUSv3) standard the return value for the case of
1845 an undersized buffer is inconsistent. For conforming vsnprintf
1846 implementations the function must return the number of characters that
1847 would have been printed had the buffer been large enough. For conforming
1848 vswprintf implementations the function must return a negative number
1851 What vswprintf sets errno to is undefined but Darwin seems to set it to
1852 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1853 those are defined in the standard and backed up by several conformance
1854 statements. Note that ENOMEM mentioned in the manual page does not
1855 apply to swprintf, only wprintf and fwprintf.
1857 Official manual page:
1858 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1860 Some conformance statements (AIX, Solaris):
1861 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1862 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1864 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1865 EILSEQ and EINVAL are specifically defined to mean the error is other than
1866 an undersized buffer and no other errno are defined we treat those two
1867 as meaning hard errors and everything else gets the old behavior which
1868 is to keep looping and increasing buffer size until the function succeeds.
1870 In practice it's impossible to determine before compilation which behavior
1871 may be used. The vswprintf function may have vsnprintf-like behavior or
1872 vice-versa. Behavior detected on one release can theoretically change
1873 with an updated release. Not to mention that configure testing for it
1874 would require the test to be run on the host system, not the build system
1875 which makes cross compilation difficult. Therefore, we make no assumptions
1876 about behavior and try our best to handle every known case, including the
1877 case where wxVsnprintf returns a negative number and fails to set errno.
1879 There is yet one more non-standard implementation and that is our own.
1880 Fortunately, that can be detected at compile-time.
1882 On top of all that, ISO C99 explicitly defines snprintf to write a null
1883 character to the last position of the specified buffer. That would be at
1884 at the given buffer size minus 1. It is supposed to do this even if it
1885 turns out that the buffer is sized too small.
1887 Darwin (tested on 10.5) follows the C99 behavior exactly.
1889 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1890 errno even when it fails. However, it only seems to ever fail due
1891 to an undersized buffer.
1893 #if wxUSE_UNICODE_UTF8
1894 template<typename BufferType
>
1896 // we only need one version in non-UTF8 builds and at least two Windows
1897 // compilers have problems with this function template, so use just one
1898 // normal function here
1900 static int DoStringPrintfV(wxString
& str
,
1901 const wxString
& format
, va_list argptr
)
1907 #if wxUSE_UNICODE_UTF8
1908 BufferType
tmp(str
, size
+ 1);
1909 typename
BufferType::CharType
*buf
= tmp
;
1911 wxStringBuffer
tmp(str
, size
+ 1);
1919 // in UTF-8 build, leaving uninitialized junk in the buffer
1920 // could result in invalid non-empty UTF-8 string, so just
1921 // reset the string to empty on failure:
1926 // wxVsnprintf() may modify the original arg pointer, so pass it
1929 wxVaCopy(argptrcopy
, argptr
);
1932 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1935 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1938 // some implementations of vsnprintf() don't NUL terminate
1939 // the string if there is not enough space for it so
1940 // always do it manually
1941 // FIXME: This really seems to be the wrong and would be an off-by-one
1942 // bug except the code above allocates an extra character.
1943 buf
[size
] = _T('\0');
1945 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1946 // total number of characters which would have been written if the
1947 // buffer were large enough (newer standards such as Unix98)
1950 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1951 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1952 // is true if *both* of them use our own implementation,
1953 // otherwise we can't be sure
1954 #if wxUSE_WXVSNPRINTF
1955 // we know that our own implementation of wxVsnprintf() returns -1
1956 // only for a format error - thus there's something wrong with
1957 // the user's format string
1960 #else // possibly using system version
1961 // assume it only returns error if there is not enough space, but
1962 // as we don't know how much we need, double the current size of
1965 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1966 // If errno was set to one of the two well-known hard errors
1967 // then fail immediately to avoid an infinite loop.
1970 #endif // __WXWINCE__
1971 // still not enough, as we don't know how much we need, double the
1972 // current size of the buffer
1974 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1976 else if ( len
>= size
)
1978 #if wxUSE_WXVSNPRINTF
1979 // we know that our own implementation of wxVsnprintf() returns
1980 // size+1 when there's not enough space but that's not the size
1981 // of the required buffer!
1982 size
*= 2; // so we just double the current size of the buffer
1984 // some vsnprintf() implementations NUL-terminate the buffer and
1985 // some don't in len == size case, to be safe always add 1
1986 // FIXME: I don't quite understand this comment. The vsnprintf
1987 // function is specifically defined to return the number of
1988 // characters printed not including the null terminator.
1989 // So OF COURSE you need to add 1 to get the right buffer size.
1990 // The following line is definitely correct, no question.
1994 else // ok, there was enough space
2000 // we could have overshot
2003 return str
.length();
2006 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
2008 #if wxUSE_UNICODE_UTF8
2009 #if wxUSE_STL_BASED_WXSTRING
2010 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
2012 typedef wxStringInternalBuffer Utf8Buffer
;
2016 #if wxUSE_UTF8_LOCALE_ONLY
2017 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2019 #if wxUSE_UNICODE_UTF8
2020 if ( wxLocaleIsUtf8
)
2021 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
2024 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
2026 return DoStringPrintfV(*this, format
, argptr
);
2027 #endif // UTF8/WCHAR
2031 // ----------------------------------------------------------------------------
2032 // misc other operations
2033 // ----------------------------------------------------------------------------
2035 // returns true if the string matches the pattern which may contain '*' and
2036 // '?' metacharacters (as usual, '?' matches any character and '*' any number
2038 bool wxString::Matches(const wxString
& mask
) const
2040 // I disable this code as it doesn't seem to be faster (in fact, it seems
2041 // to be much slower) than the old, hand-written code below and using it
2042 // here requires always linking with libregex even if the user code doesn't
2044 #if 0 // wxUSE_REGEX
2045 // first translate the shell-like mask into a regex
2047 pattern
.reserve(wxStrlen(pszMask
));
2059 pattern
+= _T(".*");
2070 // these characters are special in a RE, quote them
2071 // (however note that we don't quote '[' and ']' to allow
2072 // using them for Unix shell like matching)
2073 pattern
+= _T('\\');
2077 pattern
+= *pszMask
;
2085 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
2086 #else // !wxUSE_REGEX
2087 // TODO: this is, of course, awfully inefficient...
2089 // FIXME-UTF8: implement using iterators, remove #if
2090 #if wxUSE_UNICODE_UTF8
2091 const wxScopedWCharBuffer maskBuf
= mask
.wc_str();
2092 const wxScopedWCharBuffer txtBuf
= wc_str();
2093 const wxChar
*pszMask
= maskBuf
.data();
2094 const wxChar
*pszTxt
= txtBuf
.data();
2096 const wxChar
*pszMask
= mask
.wx_str();
2097 // the char currently being checked
2098 const wxChar
*pszTxt
= wx_str();
2101 // the last location where '*' matched
2102 const wxChar
*pszLastStarInText
= NULL
;
2103 const wxChar
*pszLastStarInMask
= NULL
;
2106 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
2107 switch ( *pszMask
) {
2109 if ( *pszTxt
== wxT('\0') )
2112 // pszTxt and pszMask will be incremented in the loop statement
2118 // remember where we started to be able to backtrack later
2119 pszLastStarInText
= pszTxt
;
2120 pszLastStarInMask
= pszMask
;
2122 // ignore special chars immediately following this one
2123 // (should this be an error?)
2124 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2127 // if there is nothing more, match
2128 if ( *pszMask
== wxT('\0') )
2131 // are there any other metacharacters in the mask?
2133 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2135 if ( pEndMask
!= NULL
) {
2136 // we have to match the string between two metachars
2137 uiLenMask
= pEndMask
- pszMask
;
2140 // we have to match the remainder of the string
2141 uiLenMask
= wxStrlen(pszMask
);
2144 wxString
strToMatch(pszMask
, uiLenMask
);
2145 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2146 if ( pMatch
== NULL
)
2149 // -1 to compensate "++" in the loop
2150 pszTxt
= pMatch
+ uiLenMask
- 1;
2151 pszMask
+= uiLenMask
- 1;
2156 if ( *pszMask
!= *pszTxt
)
2162 // match only if nothing left
2163 if ( *pszTxt
== wxT('\0') )
2166 // if we failed to match, backtrack if we can
2167 if ( pszLastStarInText
) {
2168 pszTxt
= pszLastStarInText
+ 1;
2169 pszMask
= pszLastStarInMask
;
2171 pszLastStarInText
= NULL
;
2173 // don't bother resetting pszLastStarInMask, it's unnecessary
2179 #endif // wxUSE_REGEX/!wxUSE_REGEX
2182 // Count the number of chars
2183 int wxString::Freq(wxUniChar ch
) const
2186 for ( const_iterator i
= begin(); i
!= end(); ++i
)