1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
38 #include "wx/hashmap.h"
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
53 // ----------------------------------------------------------------------------
55 // ----------------------------------------------------------------------------
60 static UntypedBufferData
s_untypedNullData(NULL
);
62 UntypedBufferData
* const untypedNullDataPtr
= &s_untypedNullData
;
64 } // namespace wxPrivate
66 // ---------------------------------------------------------------------------
67 // static class variables definition
68 // ---------------------------------------------------------------------------
70 //According to STL _must_ be a -1 size_t
71 const size_t wxString::npos
= (size_t) -1;
73 #if wxUSE_STRING_POS_CACHE
75 #ifdef wxHAS_COMPILER_TLS
77 wxTLS_TYPE(wxString::Cache
) wxString::ms_cache
;
79 #else // !wxHAS_COMPILER_TLS
81 struct wxStrCacheInitializer
83 wxStrCacheInitializer()
85 // calling this function triggers s_cache initialization in it, and
86 // from now on it becomes safe to call from multiple threads
92 wxString::Cache& wxString::GetCache()
94 static wxTLS_TYPE(Cache) s_cache;
96 return wxTLS_VALUE(s_cache);
100 static wxStrCacheInitializer gs_stringCacheInit
;
102 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
104 // gdb seems to be unable to display thread-local variables correctly, at least
105 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
108 struct wxStrCacheDumper
110 static void ShowAll()
112 puts("*** wxString cache dump:");
113 for ( unsigned n
= 0; n
< wxString::Cache::SIZE
; n
++ )
115 const wxString::Cache::Element
&
116 c
= wxString::GetCacheBegin()[n
];
118 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
120 n
== wxString::LastUsedCacheElement() ? " [*]" : "",
122 (unsigned long)c
.pos
,
123 (unsigned long)c
.impl
,
129 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
131 #endif // __WXDEBUG__
133 #ifdef wxPROFILE_STRING_CACHE
135 wxString::CacheStats
wxString::ms_cacheStats
;
137 struct wxStrCacheStatsDumper
139 ~wxStrCacheStatsDumper()
141 const wxString::CacheStats
& stats
= wxString::ms_cacheStats
;
145 puts("*** wxString cache statistics:");
146 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
148 printf("\tHits %u (of which %u not used) or %.2f%%\n",
151 100.*float(stats
.poshits
- stats
.mishits
)/stats
.postot
);
152 printf("\tAverage position requested: %.2f\n",
153 float(stats
.sumpos
) / stats
.postot
);
154 printf("\tAverage offset after cached hint: %.2f\n",
155 float(stats
.sumofs
) / stats
.postot
);
160 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
161 stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
);
166 static wxStrCacheStatsDumper s_showCacheStats
;
168 #endif // wxPROFILE_STRING_CACHE
170 #endif // wxUSE_STRING_POS_CACHE
172 // ----------------------------------------------------------------------------
174 // ----------------------------------------------------------------------------
176 #if wxUSE_STD_IOSTREAM
180 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
182 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
183 const wxCharBuffer
buf(str
.AsCharBuf());
185 os
.clear(wxSTD
ios_base::failbit
);
191 return os
<< str
.AsInternal();
195 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
197 return os
<< str
.c_str();
200 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCharBuffer
& str
)
202 return os
<< str
.data();
206 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxWCharBuffer
& str
)
208 return os
<< str
.data();
212 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
214 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
216 return wos
<< str
.wc_str();
219 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
221 return wos
<< str
.AsWChar();
224 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxWCharBuffer
& str
)
226 return wos
<< str
.data();
229 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
231 #endif // wxUSE_STD_IOSTREAM
233 // ===========================================================================
234 // wxString class core
235 // ===========================================================================
237 #if wxUSE_UNICODE_UTF8
239 void wxString::PosLenToImpl(size_t pos
, size_t len
,
240 size_t *implPos
, size_t *implLen
) const
246 else // have valid start position
248 const const_iterator b
= GetIterForNthChar(pos
);
249 *implPos
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin();
254 else // have valid length too
256 // we need to handle the case of length specifying a substring
257 // going beyond the end of the string, just as std::string does
258 const const_iterator
e(end());
260 while ( len
&& i
<= e
)
266 *implLen
= i
.impl() - b
.impl();
271 #endif // wxUSE_UNICODE_UTF8
273 // ----------------------------------------------------------------------------
274 // wxCStrData converted strings caching
275 // ----------------------------------------------------------------------------
277 // FIXME-UTF8: temporarily disabled because it doesn't work with global
278 // string objects; re-enable after fixing this bug and benchmarking
279 // performance to see if using a hash is a good idea at all
282 // For backward compatibility reasons, it must be possible to assign the value
283 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
284 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
285 // because the memory would be freed immediately, but it has to be valid as long
286 // as the string is not modified, so that code like this still works:
288 // const wxChar *s = str.c_str();
289 // while ( s ) { ... }
291 // FIXME-UTF8: not thread safe!
292 // FIXME-UTF8: we currently clear the cached conversion only when the string is
293 // destroyed, but we should do it when the string is modified, to
294 // keep memory usage down
295 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
296 // invalidated the cache on every change, we could keep the previous
298 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
299 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
302 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
304 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
305 if ( i
!= hash
.end() )
313 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
314 // so we have to use wxString* here and const-cast when used
315 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
316 wxStringCharConversionCache
);
317 static wxStringCharConversionCache gs_stringsCharCache
;
319 const char* wxCStrData::AsChar() const
321 // remove previously cache value, if any (see FIXMEs above):
322 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
324 // convert the string and keep it:
325 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
326 m_str
->mb_str().release();
330 #endif // wxUSE_UNICODE
332 #if !wxUSE_UNICODE_WCHAR
333 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
334 wxStringWCharConversionCache
);
335 static wxStringWCharConversionCache gs_stringsWCharCache
;
337 const wchar_t* wxCStrData::AsWChar() const
339 // remove previously cache value, if any (see FIXMEs above):
340 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
342 // convert the string and keep it:
343 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
344 m_str
->wc_str().release();
348 #endif // !wxUSE_UNICODE_WCHAR
350 wxString::~wxString()
353 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
354 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
356 #if !wxUSE_UNICODE_WCHAR
357 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
362 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
363 const char* wxCStrData::AsChar() const
365 #if wxUSE_UNICODE_UTF8
366 if ( wxLocaleIsUtf8
)
369 // under non-UTF8 locales, we have to convert the internal UTF-8
370 // representation using wxConvLibc and cache the result
372 wxString
*str
= wxConstCast(m_str
, wxString
);
374 // convert the string:
376 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
377 // have it) but it's unfortunately not obvious to implement
378 // because we don't know how big buffer do we need for the
379 // given string length (in case of multibyte encodings, e.g.
380 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
382 // One idea would be to store more than just m_convertedToChar
383 // in wxString: then we could record the length of the string
384 // which was converted the last time and try to reuse the same
385 // buffer if the current length is not greater than it (this
386 // could still fail because string could have been modified in
387 // place but it would work most of the time, so we'd do it and
388 // only allocate the new buffer if in-place conversion returned
389 // an error). We could also store a bit saying if the string
390 // was modified since the last conversion (and update it in all
391 // operation modifying the string, of course) to avoid unneeded
392 // consequential conversions. But both of these ideas require
393 // adding more fields to wxString and require profiling results
394 // to be sure that we really gain enough from them to justify
396 wxCharBuffer
buf(str
->mb_str());
398 // if it failed, return empty string and not NULL to avoid crashes in code
399 // written with either wxWidgets 2 wxString or std::string behaviour in
400 // mind: neither of them ever returns NULL and so we shouldn't neither
404 if ( str
->m_convertedToChar
&&
405 strlen(buf
) == strlen(str
->m_convertedToChar
) )
407 // keep the same buffer for as long as possible, so that several calls
408 // to c_str() in a row still work:
409 strcpy(str
->m_convertedToChar
, buf
);
413 str
->m_convertedToChar
= buf
.release();
417 return str
->m_convertedToChar
+ m_offset
;
419 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
421 #if !wxUSE_UNICODE_WCHAR
422 const wchar_t* wxCStrData::AsWChar() const
424 wxString
*str
= wxConstCast(m_str
, wxString
);
426 // convert the string:
427 wxWCharBuffer
buf(str
->wc_str());
429 // notice that here, unlike above in AsChar(), conversion can't fail as our
430 // internal UTF-8 is always well-formed -- or the string was corrupted and
431 // all bets are off anyhow
433 // FIXME-UTF8: do the conversion in-place in the existing buffer
434 if ( str
->m_convertedToWChar
&&
435 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
437 // keep the same buffer for as long as possible, so that several calls
438 // to c_str() in a row still work:
439 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
443 str
->m_convertedToWChar
= buf
.release();
447 return str
->m_convertedToWChar
+ m_offset
;
449 #endif // !wxUSE_UNICODE_WCHAR
451 // ===========================================================================
452 // wxString class core
453 // ===========================================================================
455 // ---------------------------------------------------------------------------
456 // construction and conversion
457 // ---------------------------------------------------------------------------
459 #if wxUSE_UNICODE_WCHAR
461 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
462 const wxMBConv
& conv
)
465 if ( !psz
|| nLength
== 0 )
466 return SubstrBufFromMB(L
"", 0);
468 if ( nLength
== npos
)
472 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
474 return SubstrBufFromMB(_T(""), 0);
476 return SubstrBufFromMB(wcBuf
, wcLen
);
478 #endif // wxUSE_UNICODE_WCHAR
480 #if wxUSE_UNICODE_UTF8
482 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
483 const wxMBConv
& conv
)
486 if ( !psz
|| nLength
== 0 )
487 return SubstrBufFromMB("", 0);
489 // if psz is already in UTF-8, we don't have to do the roundtrip to
490 // wchar_t* and back:
493 // we need to validate the input because UTF8 iterators assume valid
494 // UTF-8 sequence and psz may be invalid:
495 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
497 // we must pass the real string length to SubstrBufFromMB ctor
498 if ( nLength
== npos
)
499 nLength
= psz
? strlen(psz
) : 0;
500 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz
), nLength
);
502 // else: do the roundtrip through wchar_t*
505 if ( nLength
== npos
)
508 // first convert to wide string:
510 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
512 return SubstrBufFromMB("", 0);
514 // and then to UTF-8:
515 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
516 // widechar -> UTF-8 conversion isn't supposed to ever fail:
517 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
521 #endif // wxUSE_UNICODE_UTF8
523 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
525 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
526 const wxMBConv
& conv
)
529 if ( !pwz
|| nLength
== 0 )
530 return SubstrBufFromWC("", 0);
532 if ( nLength
== npos
)
536 wxCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
538 return SubstrBufFromWC("", 0);
540 return SubstrBufFromWC(mbBuf
, mbLen
);
542 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
545 #if wxUSE_UNICODE_WCHAR
547 //Convert wxString in Unicode mode to a multi-byte string
548 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
550 return conv
.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL
);
553 #elif wxUSE_UNICODE_UTF8
555 const wxWCharBuffer
wxString::wc_str() const
557 return wxMBConvStrictUTF8().cMB2WC
560 m_impl
.length() + 1, // size, not length
565 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
568 return wxCharBuffer::CreateNonOwned(m_impl
.c_str());
570 // FIXME-UTF8: use wc_str() here once we have buffers with length
573 wxWCharBuffer
wcBuf(wxMBConvStrictUTF8().cMB2WC
576 m_impl
.length() + 1, // size
580 return wxCharBuffer("");
582 return conv
.cWC2MB(wcBuf
, wcLen
+1, NULL
);
587 //Converts this string to a wide character string if unicode
588 //mode is not enabled and wxUSE_WCHAR_T is enabled
589 const wxWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
591 return conv
.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL
);
594 #endif // Unicode/ANSI
596 // shrink to minimal size (releasing extra memory)
597 bool wxString::Shrink()
599 wxString
tmp(begin(), end());
601 return tmp
.length() == length();
604 // deprecated compatibility code:
605 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
606 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
608 return DoGetWriteBuf(nLen
);
611 void wxString::UngetWriteBuf()
616 void wxString::UngetWriteBuf(size_t nLen
)
618 DoUngetWriteBuf(nLen
);
620 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
623 // ---------------------------------------------------------------------------
625 // ---------------------------------------------------------------------------
627 // all functions are inline in string.h
629 // ---------------------------------------------------------------------------
630 // concatenation operators
631 // ---------------------------------------------------------------------------
634 * concatenation functions come in 5 flavours:
636 * char + string and string + char
637 * C str + string and string + C str
640 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
642 #if !wxUSE_STL_BASED_WXSTRING
643 wxASSERT( str1
.IsValid() );
644 wxASSERT( str2
.IsValid() );
653 wxString
operator+(const wxString
& str
, wxUniChar ch
)
655 #if !wxUSE_STL_BASED_WXSTRING
656 wxASSERT( str
.IsValid() );
665 wxString
operator+(wxUniChar ch
, const wxString
& str
)
667 #if !wxUSE_STL_BASED_WXSTRING
668 wxASSERT( str
.IsValid() );
677 wxString
operator+(const wxString
& str
, const char *psz
)
679 #if !wxUSE_STL_BASED_WXSTRING
680 wxASSERT( str
.IsValid() );
684 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
685 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
693 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
695 #if !wxUSE_STL_BASED_WXSTRING
696 wxASSERT( str
.IsValid() );
700 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
701 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
709 wxString
operator+(const char *psz
, const wxString
& str
)
711 #if !wxUSE_STL_BASED_WXSTRING
712 wxASSERT( str
.IsValid() );
716 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
717 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
725 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
727 #if !wxUSE_STL_BASED_WXSTRING
728 wxASSERT( str
.IsValid() );
732 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
733 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
741 // ---------------------------------------------------------------------------
743 // ---------------------------------------------------------------------------
745 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
747 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
748 : wxToupper(GetChar(0u)) == wxToupper(c
));
751 #ifdef HAVE_STD_STRING_COMPARE
753 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
754 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
755 // sort strings in characters code point order by sorting the byte sequence
756 // in byte values order (i.e. what strcmp() and memcmp() do).
758 int wxString::compare(const wxString
& str
) const
760 return m_impl
.compare(str
.m_impl
);
763 int wxString::compare(size_t nStart
, size_t nLen
,
764 const wxString
& str
) const
767 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
768 return m_impl
.compare(pos
, len
, str
.m_impl
);
771 int wxString::compare(size_t nStart
, size_t nLen
,
773 size_t nStart2
, size_t nLen2
) const
776 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
779 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
781 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
784 int wxString::compare(const char* sz
) const
786 return m_impl
.compare(ImplStr(sz
));
789 int wxString::compare(const wchar_t* sz
) const
791 return m_impl
.compare(ImplStr(sz
));
794 int wxString::compare(size_t nStart
, size_t nLen
,
795 const char* sz
, size_t nCount
) const
798 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
800 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
802 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
805 int wxString::compare(size_t nStart
, size_t nLen
,
806 const wchar_t* sz
, size_t nCount
) const
809 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
811 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
813 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
816 #else // !HAVE_STD_STRING_COMPARE
818 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
819 const wxStringCharType
* s2
, size_t l2
)
822 return wxStringMemcmp(s1
, s2
, l1
);
825 int ret
= wxStringMemcmp(s1
, s2
, l1
);
826 return ret
== 0 ? -1 : ret
;
830 int ret
= wxStringMemcmp(s1
, s2
, l2
);
831 return ret
== 0 ? +1 : ret
;
835 int wxString::compare(const wxString
& str
) const
837 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
838 str
.m_impl
.data(), str
.m_impl
.length());
841 int wxString::compare(size_t nStart
, size_t nLen
,
842 const wxString
& str
) const
844 wxASSERT(nStart
<= length());
845 size_type strLen
= length() - nStart
;
846 nLen
= strLen
< nLen
? strLen
: nLen
;
849 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
851 return ::wxDoCmp(m_impl
.data() + pos
, len
,
852 str
.m_impl
.data(), str
.m_impl
.length());
855 int wxString::compare(size_t nStart
, size_t nLen
,
857 size_t nStart2
, size_t nLen2
) const
859 wxASSERT(nStart
<= length());
860 wxASSERT(nStart2
<= str
.length());
861 size_type strLen
= length() - nStart
,
862 strLen2
= str
.length() - nStart2
;
863 nLen
= strLen
< nLen
? strLen
: nLen
;
864 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
867 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
869 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
871 return ::wxDoCmp(m_impl
.data() + pos
, len
,
872 str
.m_impl
.data() + pos2
, len2
);
875 int wxString::compare(const char* sz
) const
877 SubstrBufFromMB
str(ImplStr(sz
, npos
));
878 if ( str
.len
== npos
)
879 str
.len
= wxStringStrlen(str
.data
);
880 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
883 int wxString::compare(const wchar_t* sz
) const
885 SubstrBufFromWC
str(ImplStr(sz
, npos
));
886 if ( str
.len
== npos
)
887 str
.len
= wxStringStrlen(str
.data
);
888 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
891 int wxString::compare(size_t nStart
, size_t nLen
,
892 const char* sz
, size_t nCount
) const
894 wxASSERT(nStart
<= length());
895 size_type strLen
= length() - nStart
;
896 nLen
= strLen
< nLen
? strLen
: nLen
;
899 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
901 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
902 if ( str
.len
== npos
)
903 str
.len
= wxStringStrlen(str
.data
);
905 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
908 int wxString::compare(size_t nStart
, size_t nLen
,
909 const wchar_t* sz
, size_t nCount
) const
911 wxASSERT(nStart
<= length());
912 size_type strLen
= length() - nStart
;
913 nLen
= strLen
< nLen
? strLen
: nLen
;
916 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
918 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
919 if ( str
.len
== npos
)
920 str
.len
= wxStringStrlen(str
.data
);
922 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
925 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
928 // ---------------------------------------------------------------------------
929 // find_{first,last}_[not]_of functions
930 // ---------------------------------------------------------------------------
932 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
934 // NB: All these functions are implemented with the argument being wxChar*,
935 // i.e. widechar string in any Unicode build, even though native string
936 // representation is char* in the UTF-8 build. This is because we couldn't
937 // use memchr() to determine if a character is in a set encoded as UTF-8.
939 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
941 return find_first_of(sz
, nStart
, wxStrlen(sz
));
944 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
946 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
949 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
951 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
954 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
956 if ( wxTmemchr(sz
, *i
, n
) )
963 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
965 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
968 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
970 if ( !wxTmemchr(sz
, *i
, n
) )
978 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
980 return find_last_of(sz
, nStart
, wxStrlen(sz
));
983 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
985 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
988 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
990 size_t len
= length();
992 if ( nStart
== npos
)
998 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1001 size_t idx
= nStart
;
1002 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1003 i
!= rend(); --idx
, ++i
)
1005 if ( wxTmemchr(sz
, *i
, n
) )
1012 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
1014 size_t len
= length();
1016 if ( nStart
== npos
)
1022 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1025 size_t idx
= nStart
;
1026 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1027 i
!= rend(); --idx
, ++i
)
1029 if ( !wxTmemchr(sz
, *i
, n
) )
1036 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
1038 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
1040 size_t idx
= nStart
;
1041 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1050 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1052 size_t len
= length();
1054 if ( nStart
== npos
)
1060 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1063 size_t idx
= nStart
;
1064 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1065 i
!= rend(); --idx
, ++i
)
1074 // the functions above were implemented for wchar_t* arguments in Unicode
1075 // build and char* in ANSI build; below are implementations for the other
1078 #define wxOtherCharType char
1079 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1081 #define wxOtherCharType wchar_t
1082 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1085 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1086 { return find_first_of(STRCONV(sz
), nStart
); }
1088 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1090 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1091 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1092 { return find_last_of(STRCONV(sz
), nStart
); }
1093 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1095 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1096 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1097 { return find_first_not_of(STRCONV(sz
), nStart
); }
1098 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1100 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1101 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1102 { return find_last_not_of(STRCONV(sz
), nStart
); }
1103 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1105 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1107 #undef wxOtherCharType
1110 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1112 // ===========================================================================
1113 // other common string functions
1114 // ===========================================================================
1116 int wxString::CmpNoCase(const wxString
& s
) const
1118 #if wxUSE_UNICODE_UTF8
1119 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1121 const_iterator i1
= begin();
1122 const_iterator end1
= end();
1123 const_iterator i2
= s
.begin();
1124 const_iterator end2
= s
.end();
1126 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1128 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1129 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1130 if ( lower1
!= lower2
)
1131 return lower1
< lower2
? -1 : 1;
1134 size_t len1
= length();
1135 size_t len2
= s
.length();
1139 else if ( len1
> len2
)
1142 #else // wxUSE_UNICODE_WCHAR or ANSI
1143 return wxStricmp(m_impl
.c_str(), s
.m_impl
.c_str());
1151 #ifndef __SCHAR_MAX__
1152 #define __SCHAR_MAX__ 127
1156 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1158 if (!ascii
|| len
== 0)
1159 return wxEmptyString
;
1164 wxStringInternalBuffer
buf(res
, len
);
1165 wxStringCharType
*dest
= buf
;
1167 for ( ; len
> 0; --len
)
1169 unsigned char c
= (unsigned char)*ascii
++;
1170 wxASSERT_MSG( c
< 0x80,
1171 _T("Non-ASCII value passed to FromAscii().") );
1173 *dest
++ = (wchar_t)c
;
1180 wxString
wxString::FromAscii(const char *ascii
)
1182 return FromAscii(ascii
, wxStrlen(ascii
));
1185 wxString
wxString::FromAscii(char ascii
)
1187 // What do we do with '\0' ?
1189 unsigned char c
= (unsigned char)ascii
;
1191 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
1193 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1194 return wxString(wxUniChar((wchar_t)c
));
1197 const wxCharBuffer
wxString::ToAscii() const
1199 // this will allocate enough space for the terminating NUL too
1200 wxCharBuffer
buffer(length());
1201 char *dest
= buffer
.data();
1203 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1206 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1207 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1209 // the output string can't have embedded NULs anyhow, so we can safely
1210 // stop at first of them even if we do have any
1218 #endif // wxUSE_UNICODE
1220 // extract string of length nCount starting at nFirst
1221 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1223 size_t nLen
= length();
1225 // default value of nCount is npos and means "till the end"
1226 if ( nCount
== npos
)
1228 nCount
= nLen
- nFirst
;
1231 // out-of-bounds requests return sensible things
1232 if ( nFirst
+ nCount
> nLen
)
1234 nCount
= nLen
- nFirst
;
1237 if ( nFirst
> nLen
)
1239 // AllocCopy() will return empty string
1240 return wxEmptyString
;
1243 wxString
dest(*this, nFirst
, nCount
);
1244 if ( dest
.length() != nCount
)
1246 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1252 // check that the string starts with prefix and return the rest of the string
1253 // in the provided pointer if it is not NULL, otherwise return false
1254 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1256 if ( compare(0, prefix
.length(), prefix
) != 0 )
1261 // put the rest of the string into provided pointer
1262 rest
->assign(*this, prefix
.length(), npos
);
1269 // check that the string ends with suffix and return the rest of it in the
1270 // provided pointer if it is not NULL, otherwise return false
1271 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1273 int start
= length() - suffix
.length();
1275 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1280 // put the rest of the string into provided pointer
1281 rest
->assign(*this, 0, start
);
1288 // extract nCount last (rightmost) characters
1289 wxString
wxString::Right(size_t nCount
) const
1291 if ( nCount
> length() )
1294 wxString
dest(*this, length() - nCount
, nCount
);
1295 if ( dest
.length() != nCount
) {
1296 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1301 // get all characters after the last occurrence of ch
1302 // (returns the whole string if ch not found)
1303 wxString
wxString::AfterLast(wxUniChar ch
) const
1306 int iPos
= Find(ch
, true);
1307 if ( iPos
== wxNOT_FOUND
)
1310 str
.assign(*this, iPos
+ 1, npos
);
1315 // extract nCount first (leftmost) characters
1316 wxString
wxString::Left(size_t nCount
) const
1318 if ( nCount
> length() )
1321 wxString
dest(*this, 0, nCount
);
1322 if ( dest
.length() != nCount
) {
1323 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1328 // get all characters before the first occurrence of ch
1329 // (returns the whole string if ch not found)
1330 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1332 int iPos
= Find(ch
);
1333 if ( iPos
== wxNOT_FOUND
)
1335 return wxString(*this, 0, iPos
);
1338 /// get all characters before the last occurrence of ch
1339 /// (returns empty string if ch not found)
1340 wxString
wxString::BeforeLast(wxUniChar ch
) const
1343 int iPos
= Find(ch
, true);
1344 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1345 str
= wxString(c_str(), iPos
);
1350 /// get all characters after the first occurrence of ch
1351 /// (returns empty string if ch not found)
1352 wxString
wxString::AfterFirst(wxUniChar ch
) const
1355 int iPos
= Find(ch
);
1356 if ( iPos
!= wxNOT_FOUND
)
1357 str
.assign(*this, iPos
+ 1, npos
);
1362 // replace first (or all) occurrences of some substring with another one
1363 size_t wxString::Replace(const wxString
& strOld
,
1364 const wxString
& strNew
, bool bReplaceAll
)
1366 // if we tried to replace an empty string we'd enter an infinite loop below
1367 wxCHECK_MSG( !strOld
.empty(), 0,
1368 _T("wxString::Replace(): invalid parameter") );
1370 wxSTRING_INVALIDATE_CACHE();
1372 size_t uiCount
= 0; // count of replacements made
1374 // optimize the special common case: replacement of one character by
1375 // another one (in UTF-8 case we can only do this for ASCII characters)
1377 // benchmarks show that this special version is around 3 times faster
1378 // (depending on the proportion of matching characters and UTF-8/wchar_t
1380 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1382 const wxStringCharType chOld
= strOld
.m_impl
[0],
1383 chNew
= strNew
.m_impl
[0];
1385 // this loop is the simplified version of the one below
1386 for ( size_t pos
= 0; ; )
1388 pos
= m_impl
.find(chOld
, pos
);
1392 m_impl
[pos
++] = chNew
;
1400 else // general case
1402 const size_t uiOldLen
= strOld
.m_impl
.length();
1403 const size_t uiNewLen
= strNew
.m_impl
.length();
1405 for ( size_t pos
= 0; ; )
1407 pos
= m_impl
.find(strOld
.m_impl
, pos
);
1411 // replace this occurrence of the old string with the new one
1412 m_impl
.replace(pos
, uiOldLen
, strNew
.m_impl
);
1414 // move up pos past the string that was replaced
1417 // increase replace count
1420 // stop after the first one?
1429 bool wxString::IsAscii() const
1431 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1433 if ( !(*i
).IsAscii() )
1440 bool wxString::IsWord() const
1442 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1444 if ( !wxIsalpha(*i
) )
1451 bool wxString::IsNumber() const
1456 const_iterator i
= begin();
1458 if ( *i
== _T('-') || *i
== _T('+') )
1461 for ( ; i
!= end(); ++i
)
1463 if ( !wxIsdigit(*i
) )
1470 wxString
wxString::Strip(stripType w
) const
1473 if ( w
& leading
) s
.Trim(false);
1474 if ( w
& trailing
) s
.Trim(true);
1478 // ---------------------------------------------------------------------------
1480 // ---------------------------------------------------------------------------
1482 wxString
& wxString::MakeUpper()
1484 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1485 *it
= (wxChar
)wxToupper(*it
);
1490 wxString
& wxString::MakeLower()
1492 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1493 *it
= (wxChar
)wxTolower(*it
);
1498 wxString
& wxString::MakeCapitalized()
1500 const iterator en
= end();
1501 iterator it
= begin();
1504 *it
= (wxChar
)wxToupper(*it
);
1505 for ( ++it
; it
!= en
; ++it
)
1506 *it
= (wxChar
)wxTolower(*it
);
1512 // ---------------------------------------------------------------------------
1513 // trimming and padding
1514 // ---------------------------------------------------------------------------
1516 // some compilers (VC++ 6.0 not to name them) return true for a call to
1517 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1518 // to live with this by checking that the character is a 7 bit one - even if
1519 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1520 // space-like symbols somewhere except in the first 128 chars), it is arguably
1521 // still better than trimming away accented letters
1522 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1524 // trims spaces (in the sense of isspace) from left or right side
1525 wxString
& wxString::Trim(bool bFromRight
)
1527 // first check if we're going to modify the string at all
1530 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1531 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1537 // find last non-space character
1538 reverse_iterator psz
= rbegin();
1539 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1542 // truncate at trailing space start
1543 erase(psz
.base(), end());
1547 // find first non-space character
1548 iterator psz
= begin();
1549 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1552 // fix up data and length
1553 erase(begin(), psz
);
1560 // adds nCount characters chPad to the string from either side
1561 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1563 wxString
s(chPad
, nCount
);
1576 // truncate the string
1577 wxString
& wxString::Truncate(size_t uiLen
)
1579 if ( uiLen
< length() )
1581 erase(begin() + uiLen
, end());
1583 //else: nothing to do, string is already short enough
1588 // ---------------------------------------------------------------------------
1589 // finding (return wxNOT_FOUND if not found and index otherwise)
1590 // ---------------------------------------------------------------------------
1593 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1595 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1597 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1600 // ----------------------------------------------------------------------------
1601 // conversion to numbers
1602 // ----------------------------------------------------------------------------
1604 // The implementation of all the functions below is exactly the same so factor
1605 // it out. Note that number extraction works correctly on UTF-8 strings, so
1606 // we can use wxStringCharType and wx_str() for maximum efficiency.
1609 #define DO_IF_NOT_WINCE(x) x
1611 #define DO_IF_NOT_WINCE(x)
1614 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1615 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1616 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1618 DO_IF_NOT_WINCE( errno = 0; ) \
1620 const wxStringCharType *start = wx_str(); \
1621 wxStringCharType *end; \
1622 T val = func(start, &end, base); \
1624 /* return true only if scan was stopped by the terminating NUL and */ \
1625 /* if the string was not empty to start with and no under/overflow */ \
1627 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1632 bool wxString::ToLong(long *pVal
, int base
) const
1634 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtol
, long);
1637 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1639 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoul
, unsigned long);
1642 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1644 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoll
, wxLongLong_t
);
1647 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1649 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoull
, wxULongLong_t
);
1652 bool wxString::ToDouble(double *pVal
) const
1654 wxCHECK_MSG( pVal
, false, _T("NULL output pointer") );
1656 DO_IF_NOT_WINCE( errno
= 0; )
1658 const wxChar
*start
= c_str();
1660 double val
= wxStrtod(start
, &end
);
1662 // return true only if scan was stopped by the terminating NUL and if the
1663 // string was not empty to start with and no under/overflow occurred
1664 if ( *end
|| end
== start
DO_IF_NOT_WINCE(|| errno
== ERANGE
) )
1672 // ---------------------------------------------------------------------------
1674 // ---------------------------------------------------------------------------
1676 #if !wxUSE_UTF8_LOCALE_ONLY
1678 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1679 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1681 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1685 va_start(argptr
, format
);
1688 s
.PrintfV(format
, argptr
);
1694 #endif // !wxUSE_UTF8_LOCALE_ONLY
1696 #if wxUSE_UNICODE_UTF8
1698 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1701 va_start(argptr
, format
);
1704 s
.PrintfV(format
, argptr
);
1710 #endif // wxUSE_UNICODE_UTF8
1713 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1716 s
.PrintfV(format
, argptr
);
1720 #if !wxUSE_UTF8_LOCALE_ONLY
1721 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1722 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1724 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1728 va_start(argptr
, format
);
1730 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1731 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1732 // because it's the only cast that works safely for downcasting when
1733 // multiple inheritance is used:
1734 wxString
*str
= static_cast<wxString
*>(this);
1736 wxString
*str
= this;
1739 int iLen
= str
->PrintfV(format
, argptr
);
1745 #endif // !wxUSE_UTF8_LOCALE_ONLY
1747 #if wxUSE_UNICODE_UTF8
1748 int wxString::DoPrintfUtf8(const char *format
, ...)
1751 va_start(argptr
, format
);
1753 int iLen
= PrintfV(format
, argptr
);
1759 #endif // wxUSE_UNICODE_UTF8
1762 Uses wxVsnprintf and places the result into the this string.
1764 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1765 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1766 the ISO C99 (and thus SUSv3) standard the return value for the case of
1767 an undersized buffer is inconsistent. For conforming vsnprintf
1768 implementations the function must return the number of characters that
1769 would have been printed had the buffer been large enough. For conforming
1770 vswprintf implementations the function must return a negative number
1773 What vswprintf sets errno to is undefined but Darwin seems to set it to
1774 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1775 those are defined in the standard and backed up by several conformance
1776 statements. Note that ENOMEM mentioned in the manual page does not
1777 apply to swprintf, only wprintf and fwprintf.
1779 Official manual page:
1780 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1782 Some conformance statements (AIX, Solaris):
1783 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1784 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1786 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1787 EILSEQ and EINVAL are specifically defined to mean the error is other than
1788 an undersized buffer and no other errno are defined we treat those two
1789 as meaning hard errors and everything else gets the old behavior which
1790 is to keep looping and increasing buffer size until the function succeeds.
1792 In practice it's impossible to determine before compilation which behavior
1793 may be used. The vswprintf function may have vsnprintf-like behavior or
1794 vice-versa. Behavior detected on one release can theoretically change
1795 with an updated release. Not to mention that configure testing for it
1796 would require the test to be run on the host system, not the build system
1797 which makes cross compilation difficult. Therefore, we make no assumptions
1798 about behavior and try our best to handle every known case, including the
1799 case where wxVsnprintf returns a negative number and fails to set errno.
1801 There is yet one more non-standard implementation and that is our own.
1802 Fortunately, that can be detected at compile-time.
1804 On top of all that, ISO C99 explicitly defines snprintf to write a null
1805 character to the last position of the specified buffer. That would be at
1806 at the given buffer size minus 1. It is supposed to do this even if it
1807 turns out that the buffer is sized too small.
1809 Darwin (tested on 10.5) follows the C99 behavior exactly.
1811 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1812 errno even when it fails. However, it only seems to ever fail due
1813 to an undersized buffer.
1815 #if wxUSE_UNICODE_UTF8
1816 template<typename BufferType
>
1818 // we only need one version in non-UTF8 builds and at least two Windows
1819 // compilers have problems with this function template, so use just one
1820 // normal function here
1822 static int DoStringPrintfV(wxString
& str
,
1823 const wxString
& format
, va_list argptr
)
1829 #if wxUSE_UNICODE_UTF8
1830 BufferType
tmp(str
, size
+ 1);
1831 typename
BufferType::CharType
*buf
= tmp
;
1833 wxStringBuffer
tmp(str
, size
+ 1);
1841 // in UTF-8 build, leaving uninitialized junk in the buffer
1842 // could result in invalid non-empty UTF-8 string, so just
1843 // reset the string to empty on failure:
1848 // wxVsnprintf() may modify the original arg pointer, so pass it
1851 wxVaCopy(argptrcopy
, argptr
);
1854 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1857 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1860 // some implementations of vsnprintf() don't NUL terminate
1861 // the string if there is not enough space for it so
1862 // always do it manually
1863 // FIXME: This really seems to be the wrong and would be an off-by-one
1864 // bug except the code above allocates an extra character.
1865 buf
[size
] = _T('\0');
1867 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1868 // total number of characters which would have been written if the
1869 // buffer were large enough (newer standards such as Unix98)
1872 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1873 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1874 // is true if *both* of them use our own implementation,
1875 // otherwise we can't be sure
1876 #if wxUSE_WXVSNPRINTF
1877 // we know that our own implementation of wxVsnprintf() returns -1
1878 // only for a format error - thus there's something wrong with
1879 // the user's format string
1882 #else // possibly using system version
1883 // assume it only returns error if there is not enough space, but
1884 // as we don't know how much we need, double the current size of
1887 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1888 // If errno was set to one of the two well-known hard errors
1889 // then fail immediately to avoid an infinite loop.
1892 #endif // __WXWINCE__
1893 // still not enough, as we don't know how much we need, double the
1894 // current size of the buffer
1896 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1898 else if ( len
>= size
)
1900 #if wxUSE_WXVSNPRINTF
1901 // we know that our own implementation of wxVsnprintf() returns
1902 // size+1 when there's not enough space but that's not the size
1903 // of the required buffer!
1904 size
*= 2; // so we just double the current size of the buffer
1906 // some vsnprintf() implementations NUL-terminate the buffer and
1907 // some don't in len == size case, to be safe always add 1
1908 // FIXME: I don't quite understand this comment. The vsnprintf
1909 // function is specifically defined to return the number of
1910 // characters printed not including the null terminator.
1911 // So OF COURSE you need to add 1 to get the right buffer size.
1912 // The following line is definitely correct, no question.
1916 else // ok, there was enough space
1922 // we could have overshot
1925 return str
.length();
1928 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
1930 #if wxUSE_UNICODE_UTF8
1931 #if wxUSE_STL_BASED_WXSTRING
1932 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
1934 typedef wxStringInternalBuffer Utf8Buffer
;
1938 #if wxUSE_UTF8_LOCALE_ONLY
1939 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1941 #if wxUSE_UNICODE_UTF8
1942 if ( wxLocaleIsUtf8
)
1943 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1946 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
1948 return DoStringPrintfV(*this, format
, argptr
);
1949 #endif // UTF8/WCHAR
1953 // ----------------------------------------------------------------------------
1954 // misc other operations
1955 // ----------------------------------------------------------------------------
1957 // returns true if the string matches the pattern which may contain '*' and
1958 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1960 bool wxString::Matches(const wxString
& mask
) const
1962 // I disable this code as it doesn't seem to be faster (in fact, it seems
1963 // to be much slower) than the old, hand-written code below and using it
1964 // here requires always linking with libregex even if the user code doesn't
1966 #if 0 // wxUSE_REGEX
1967 // first translate the shell-like mask into a regex
1969 pattern
.reserve(wxStrlen(pszMask
));
1981 pattern
+= _T(".*");
1992 // these characters are special in a RE, quote them
1993 // (however note that we don't quote '[' and ']' to allow
1994 // using them for Unix shell like matching)
1995 pattern
+= _T('\\');
1999 pattern
+= *pszMask
;
2007 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
2008 #else // !wxUSE_REGEX
2009 // TODO: this is, of course, awfully inefficient...
2011 // FIXME-UTF8: implement using iterators, remove #if
2012 #if wxUSE_UNICODE_UTF8
2013 wxWCharBuffer maskBuf
= mask
.wc_str();
2014 wxWCharBuffer txtBuf
= wc_str();
2015 const wxChar
*pszMask
= maskBuf
.data();
2016 const wxChar
*pszTxt
= txtBuf
.data();
2018 const wxChar
*pszMask
= mask
.wx_str();
2019 // the char currently being checked
2020 const wxChar
*pszTxt
= wx_str();
2023 // the last location where '*' matched
2024 const wxChar
*pszLastStarInText
= NULL
;
2025 const wxChar
*pszLastStarInMask
= NULL
;
2028 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
2029 switch ( *pszMask
) {
2031 if ( *pszTxt
== wxT('\0') )
2034 // pszTxt and pszMask will be incremented in the loop statement
2040 // remember where we started to be able to backtrack later
2041 pszLastStarInText
= pszTxt
;
2042 pszLastStarInMask
= pszMask
;
2044 // ignore special chars immediately following this one
2045 // (should this be an error?)
2046 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2049 // if there is nothing more, match
2050 if ( *pszMask
== wxT('\0') )
2053 // are there any other metacharacters in the mask?
2055 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2057 if ( pEndMask
!= NULL
) {
2058 // we have to match the string between two metachars
2059 uiLenMask
= pEndMask
- pszMask
;
2062 // we have to match the remainder of the string
2063 uiLenMask
= wxStrlen(pszMask
);
2066 wxString
strToMatch(pszMask
, uiLenMask
);
2067 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2068 if ( pMatch
== NULL
)
2071 // -1 to compensate "++" in the loop
2072 pszTxt
= pMatch
+ uiLenMask
- 1;
2073 pszMask
+= uiLenMask
- 1;
2078 if ( *pszMask
!= *pszTxt
)
2084 // match only if nothing left
2085 if ( *pszTxt
== wxT('\0') )
2088 // if we failed to match, backtrack if we can
2089 if ( pszLastStarInText
) {
2090 pszTxt
= pszLastStarInText
+ 1;
2091 pszMask
= pszLastStarInMask
;
2093 pszLastStarInText
= NULL
;
2095 // don't bother resetting pszLastStarInMask, it's unnecessary
2101 #endif // wxUSE_REGEX/!wxUSE_REGEX
2104 // Count the number of chars
2105 int wxString::Freq(wxUniChar ch
) const
2108 for ( const_iterator i
= begin(); i
!= end(); ++i
)