1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
38 #include "wx/hashmap.h"
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
53 // ----------------------------------------------------------------------------
55 // ----------------------------------------------------------------------------
60 static UntypedBufferData
s_untypedNullData(NULL
);
62 UntypedBufferData
* const untypedNullDataPtr
= &s_untypedNullData
;
64 } // namespace wxPrivate
66 // ---------------------------------------------------------------------------
67 // static class variables definition
68 // ---------------------------------------------------------------------------
70 //According to STL _must_ be a -1 size_t
71 const size_t wxString::npos
= (size_t) -1;
73 #if wxUSE_STRING_POS_CACHE
75 #ifdef wxHAS_COMPILER_TLS
77 wxTLS_TYPE(wxString::Cache
) wxString::ms_cache
;
79 #else // !wxHAS_COMPILER_TLS
81 struct wxStrCacheInitializer
83 wxStrCacheInitializer()
85 // calling this function triggers s_cache initialization in it, and
86 // from now on it becomes safe to call from multiple threads
92 wxString::Cache& wxString::GetCache()
94 static wxTLS_TYPE(Cache) s_cache;
96 return wxTLS_VALUE(s_cache);
100 static wxStrCacheInitializer gs_stringCacheInit
;
102 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
104 // gdb seems to be unable to display thread-local variables correctly, at least
105 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
108 struct wxStrCacheDumper
110 static void ShowAll()
112 puts("*** wxString cache dump:");
113 for ( unsigned n
= 0; n
< wxString::Cache::SIZE
; n
++ )
115 const wxString::Cache::Element
&
116 c
= wxString::GetCacheBegin()[n
];
118 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
120 n
== wxString::LastUsedCacheElement() ? " [*]" : "",
122 (unsigned long)c
.pos
,
123 (unsigned long)c
.impl
,
129 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
131 #endif // __WXDEBUG__
133 #ifdef wxPROFILE_STRING_CACHE
135 wxString::CacheStats
wxString::ms_cacheStats
;
137 struct wxStrCacheStatsDumper
139 ~wxStrCacheStatsDumper()
141 const wxString::CacheStats
& stats
= wxString::ms_cacheStats
;
145 puts("*** wxString cache statistics:");
146 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
148 printf("\tHits %u (of which %u not used) or %.2f%%\n",
151 100.*float(stats
.poshits
- stats
.mishits
)/stats
.postot
);
152 printf("\tAverage position requested: %.2f\n",
153 float(stats
.sumpos
) / stats
.postot
);
154 printf("\tAverage offset after cached hint: %.2f\n",
155 float(stats
.sumofs
) / stats
.postot
);
160 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
161 stats
.lentot
, 100.*float(stats
.lenhits
)/stats
.lentot
);
166 static wxStrCacheStatsDumper s_showCacheStats
;
168 #endif // wxPROFILE_STRING_CACHE
170 #endif // wxUSE_STRING_POS_CACHE
172 // ----------------------------------------------------------------------------
174 // ----------------------------------------------------------------------------
176 #if wxUSE_STD_IOSTREAM
180 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCStrData
& str
)
182 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
183 const wxCharBuffer
buf(str
.AsCharBuf());
185 os
.clear(wxSTD
ios_base::failbit
);
191 return os
<< str
.AsInternal();
195 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxString
& str
)
197 return os
<< str
.c_str();
200 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxCharBuffer
& str
)
202 return os
<< str
.data();
206 wxSTD ostream
& operator<<(wxSTD ostream
& os
, const wxWCharBuffer
& str
)
208 return os
<< str
.data();
212 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
214 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxString
& str
)
216 return wos
<< str
.wc_str();
219 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxCStrData
& str
)
221 return wos
<< str
.AsWChar();
224 wxSTD wostream
& operator<<(wxSTD wostream
& wos
, const wxWCharBuffer
& str
)
226 return wos
<< str
.data();
229 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
231 #endif // wxUSE_STD_IOSTREAM
233 // ===========================================================================
234 // wxString class core
235 // ===========================================================================
237 #if wxUSE_UNICODE_UTF8
239 void wxString::PosLenToImpl(size_t pos
, size_t len
,
240 size_t *implPos
, size_t *implLen
) const
246 else // have valid start position
248 const const_iterator b
= GetIterForNthChar(pos
);
249 *implPos
= wxStringImpl::const_iterator(b
.impl()) - m_impl
.begin();
254 else // have valid length too
256 // we need to handle the case of length specifying a substring
257 // going beyond the end of the string, just as std::string does
258 const const_iterator
e(end());
260 while ( len
&& i
<= e
)
266 *implLen
= i
.impl() - b
.impl();
271 #endif // wxUSE_UNICODE_UTF8
273 // ----------------------------------------------------------------------------
274 // wxCStrData converted strings caching
275 // ----------------------------------------------------------------------------
277 // FIXME-UTF8: temporarily disabled because it doesn't work with global
278 // string objects; re-enable after fixing this bug and benchmarking
279 // performance to see if using a hash is a good idea at all
282 // For backward compatibility reasons, it must be possible to assign the value
283 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
284 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
285 // because the memory would be freed immediately, but it has to be valid as long
286 // as the string is not modified, so that code like this still works:
288 // const wxChar *s = str.c_str();
289 // while ( s ) { ... }
291 // FIXME-UTF8: not thread safe!
292 // FIXME-UTF8: we currently clear the cached conversion only when the string is
293 // destroyed, but we should do it when the string is modified, to
294 // keep memory usage down
295 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
296 // invalidated the cache on every change, we could keep the previous
298 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
299 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
302 static inline void DeleteStringFromConversionCache(T
& hash
, const wxString
*s
)
304 typename
T::iterator i
= hash
.find(wxConstCast(s
, wxString
));
305 if ( i
!= hash
.end() )
313 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
314 // so we have to use wxString* here and const-cast when used
315 WX_DECLARE_HASH_MAP(wxString
*, char*, wxPointerHash
, wxPointerEqual
,
316 wxStringCharConversionCache
);
317 static wxStringCharConversionCache gs_stringsCharCache
;
319 const char* wxCStrData::AsChar() const
321 // remove previously cache value, if any (see FIXMEs above):
322 DeleteStringFromConversionCache(gs_stringsCharCache
, m_str
);
324 // convert the string and keep it:
325 const char *s
= gs_stringsCharCache
[wxConstCast(m_str
, wxString
)] =
326 m_str
->mb_str().release();
330 #endif // wxUSE_UNICODE
332 #if !wxUSE_UNICODE_WCHAR
333 WX_DECLARE_HASH_MAP(wxString
*, wchar_t*, wxPointerHash
, wxPointerEqual
,
334 wxStringWCharConversionCache
);
335 static wxStringWCharConversionCache gs_stringsWCharCache
;
337 const wchar_t* wxCStrData::AsWChar() const
339 // remove previously cache value, if any (see FIXMEs above):
340 DeleteStringFromConversionCache(gs_stringsWCharCache
, m_str
);
342 // convert the string and keep it:
343 const wchar_t *s
= gs_stringsWCharCache
[wxConstCast(m_str
, wxString
)] =
344 m_str
->wc_str().release();
348 #endif // !wxUSE_UNICODE_WCHAR
350 wxString::~wxString()
353 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
354 DeleteStringFromConversionCache(gs_stringsCharCache
, this);
356 #if !wxUSE_UNICODE_WCHAR
357 DeleteStringFromConversionCache(gs_stringsWCharCache
, this);
362 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
363 const char* wxCStrData::AsChar() const
365 #if wxUSE_UNICODE_UTF8
366 if ( wxLocaleIsUtf8
)
369 // under non-UTF8 locales, we have to convert the internal UTF-8
370 // representation using wxConvLibc and cache the result
372 wxString
*str
= wxConstCast(m_str
, wxString
);
374 // convert the string:
376 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
377 // have it) but it's unfortunately not obvious to implement
378 // because we don't know how big buffer do we need for the
379 // given string length (in case of multibyte encodings, e.g.
380 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
382 // One idea would be to store more than just m_convertedToChar
383 // in wxString: then we could record the length of the string
384 // which was converted the last time and try to reuse the same
385 // buffer if the current length is not greater than it (this
386 // could still fail because string could have been modified in
387 // place but it would work most of the time, so we'd do it and
388 // only allocate the new buffer if in-place conversion returned
389 // an error). We could also store a bit saying if the string
390 // was modified since the last conversion (and update it in all
391 // operation modifying the string, of course) to avoid unneeded
392 // consequential conversions. But both of these ideas require
393 // adding more fields to wxString and require profiling results
394 // to be sure that we really gain enough from them to justify
396 wxCharBuffer
buf(str
->mb_str());
398 // if it failed, return empty string and not NULL to avoid crashes in code
399 // written with either wxWidgets 2 wxString or std::string behaviour in
400 // mind: neither of them ever returns NULL and so we shouldn't neither
404 if ( str
->m_convertedToChar
&&
405 strlen(buf
) == strlen(str
->m_convertedToChar
) )
407 // keep the same buffer for as long as possible, so that several calls
408 // to c_str() in a row still work:
409 strcpy(str
->m_convertedToChar
, buf
);
413 str
->m_convertedToChar
= buf
.release();
417 return str
->m_convertedToChar
+ m_offset
;
419 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
421 #if !wxUSE_UNICODE_WCHAR
422 const wchar_t* wxCStrData::AsWChar() const
424 wxString
*str
= wxConstCast(m_str
, wxString
);
426 // convert the string:
427 wxWCharBuffer
buf(str
->wc_str());
429 // notice that here, unlike above in AsChar(), conversion can't fail as our
430 // internal UTF-8 is always well-formed -- or the string was corrupted and
431 // all bets are off anyhow
433 // FIXME-UTF8: do the conversion in-place in the existing buffer
434 if ( str
->m_convertedToWChar
&&
435 wxWcslen(buf
) == wxWcslen(str
->m_convertedToWChar
) )
437 // keep the same buffer for as long as possible, so that several calls
438 // to c_str() in a row still work:
439 memcpy(str
->m_convertedToWChar
, buf
, sizeof(wchar_t) * wxWcslen(buf
));
443 str
->m_convertedToWChar
= buf
.release();
447 return str
->m_convertedToWChar
+ m_offset
;
449 #endif // !wxUSE_UNICODE_WCHAR
451 // ===========================================================================
452 // wxString class core
453 // ===========================================================================
455 // ---------------------------------------------------------------------------
456 // construction and conversion
457 // ---------------------------------------------------------------------------
459 #if wxUSE_UNICODE_WCHAR
461 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
462 const wxMBConv
& conv
)
465 if ( !psz
|| nLength
== 0 )
466 return SubstrBufFromMB(L
"", 0);
468 if ( nLength
== npos
)
472 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
474 return SubstrBufFromMB(_T(""), 0);
476 return SubstrBufFromMB(wcBuf
, wcLen
);
478 #endif // wxUSE_UNICODE_WCHAR
480 #if wxUSE_UNICODE_UTF8
482 wxString::SubstrBufFromMB
wxString::ConvertStr(const char *psz
, size_t nLength
,
483 const wxMBConv
& conv
)
486 if ( !psz
|| nLength
== 0 )
487 return SubstrBufFromMB("", 0);
489 // if psz is already in UTF-8, we don't have to do the roundtrip to
490 // wchar_t* and back:
493 // we need to validate the input because UTF8 iterators assume valid
494 // UTF-8 sequence and psz may be invalid:
495 if ( wxStringOperations::IsValidUtf8String(psz
, nLength
) )
497 // we must pass the real string length to SubstrBufFromMB ctor
498 if ( nLength
== npos
)
499 nLength
= psz
? strlen(psz
) : 0;
500 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz
), nLength
);
502 // else: do the roundtrip through wchar_t*
505 if ( nLength
== npos
)
508 // first convert to wide string:
510 wxWCharBuffer
wcBuf(conv
.cMB2WC(psz
, nLength
, &wcLen
));
512 return SubstrBufFromMB("", 0);
514 // and then to UTF-8:
515 SubstrBufFromMB
buf(ConvertStr(wcBuf
, wcLen
, wxMBConvStrictUTF8()));
516 // widechar -> UTF-8 conversion isn't supposed to ever fail:
517 wxASSERT_MSG( buf
.data
, _T("conversion to UTF-8 failed") );
521 #endif // wxUSE_UNICODE_UTF8
523 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
525 wxString::SubstrBufFromWC
wxString::ConvertStr(const wchar_t *pwz
, size_t nLength
,
526 const wxMBConv
& conv
)
529 if ( !pwz
|| nLength
== 0 )
530 return SubstrBufFromWC("", 0);
532 if ( nLength
== npos
)
536 wxCharBuffer
mbBuf(conv
.cWC2MB(pwz
, nLength
, &mbLen
));
538 return SubstrBufFromWC("", 0);
540 return SubstrBufFromWC(mbBuf
, mbLen
);
542 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
545 #if wxUSE_UNICODE_WCHAR
547 //Convert wxString in Unicode mode to a multi-byte string
548 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
550 return conv
.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL
);
553 #elif wxUSE_UNICODE_UTF8
555 const wxWCharBuffer
wxString::wc_str() const
557 return wxMBConvStrictUTF8().cMB2WC
560 m_impl
.length() + 1, // size, not length
565 const wxCharBuffer
wxString::mb_str(const wxMBConv
& conv
) const
568 return wxCharBuffer::CreateNonOwned(m_impl
.c_str());
570 // FIXME-UTF8: use wc_str() here once we have buffers with length
573 wxWCharBuffer
wcBuf(wxMBConvStrictUTF8().cMB2WC
576 m_impl
.length() + 1, // size
580 return wxCharBuffer("");
582 return conv
.cWC2MB(wcBuf
, wcLen
+1, NULL
);
587 //Converts this string to a wide character string if unicode
588 //mode is not enabled and wxUSE_WCHAR_T is enabled
589 const wxWCharBuffer
wxString::wc_str(const wxMBConv
& conv
) const
591 return conv
.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL
);
594 #endif // Unicode/ANSI
596 // shrink to minimal size (releasing extra memory)
597 bool wxString::Shrink()
599 wxString
tmp(begin(), end());
601 return tmp
.length() == length();
604 // deprecated compatibility code:
605 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
606 wxStringCharType
*wxString::GetWriteBuf(size_t nLen
)
608 return DoGetWriteBuf(nLen
);
611 void wxString::UngetWriteBuf()
616 void wxString::UngetWriteBuf(size_t nLen
)
618 DoUngetWriteBuf(nLen
);
620 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
623 // ---------------------------------------------------------------------------
625 // ---------------------------------------------------------------------------
627 // all functions are inline in string.h
629 // ---------------------------------------------------------------------------
630 // concatenation operators
631 // ---------------------------------------------------------------------------
634 * concatenation functions come in 5 flavours:
636 * char + string and string + char
637 * C str + string and string + C str
640 wxString
operator+(const wxString
& str1
, const wxString
& str2
)
642 #if !wxUSE_STL_BASED_WXSTRING
643 wxASSERT( str1
.IsValid() );
644 wxASSERT( str2
.IsValid() );
653 wxString
operator+(const wxString
& str
, wxUniChar ch
)
655 #if !wxUSE_STL_BASED_WXSTRING
656 wxASSERT( str
.IsValid() );
665 wxString
operator+(wxUniChar ch
, const wxString
& str
)
667 #if !wxUSE_STL_BASED_WXSTRING
668 wxASSERT( str
.IsValid() );
677 wxString
operator+(const wxString
& str
, const char *psz
)
679 #if !wxUSE_STL_BASED_WXSTRING
680 wxASSERT( str
.IsValid() );
684 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
685 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
693 wxString
operator+(const wxString
& str
, const wchar_t *pwz
)
695 #if !wxUSE_STL_BASED_WXSTRING
696 wxASSERT( str
.IsValid() );
700 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
701 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
709 wxString
operator+(const char *psz
, const wxString
& str
)
711 #if !wxUSE_STL_BASED_WXSTRING
712 wxASSERT( str
.IsValid() );
716 if ( !s
.Alloc(strlen(psz
) + str
.length()) ) {
717 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
725 wxString
operator+(const wchar_t *pwz
, const wxString
& str
)
727 #if !wxUSE_STL_BASED_WXSTRING
728 wxASSERT( str
.IsValid() );
732 if ( !s
.Alloc(wxWcslen(pwz
) + str
.length()) ) {
733 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
741 // ---------------------------------------------------------------------------
743 // ---------------------------------------------------------------------------
745 bool wxString::IsSameAs(wxUniChar c
, bool compareWithCase
) const
747 return (length() == 1) && (compareWithCase
? GetChar(0u) == c
748 : wxToupper(GetChar(0u)) == wxToupper(c
));
751 #ifdef HAVE_STD_STRING_COMPARE
753 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
754 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
755 // sort strings in characters code point order by sorting the byte sequence
756 // in byte values order (i.e. what strcmp() and memcmp() do).
758 int wxString::compare(const wxString
& str
) const
760 return m_impl
.compare(str
.m_impl
);
763 int wxString::compare(size_t nStart
, size_t nLen
,
764 const wxString
& str
) const
767 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
768 return m_impl
.compare(pos
, len
, str
.m_impl
);
771 int wxString::compare(size_t nStart
, size_t nLen
,
773 size_t nStart2
, size_t nLen2
) const
776 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
779 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
781 return m_impl
.compare(pos
, len
, str
.m_impl
, pos2
, len2
);
784 int wxString::compare(const char* sz
) const
786 return m_impl
.compare(ImplStr(sz
));
789 int wxString::compare(const wchar_t* sz
) const
791 return m_impl
.compare(ImplStr(sz
));
794 int wxString::compare(size_t nStart
, size_t nLen
,
795 const char* sz
, size_t nCount
) const
798 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
800 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
802 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
805 int wxString::compare(size_t nStart
, size_t nLen
,
806 const wchar_t* sz
, size_t nCount
) const
809 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
811 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
813 return m_impl
.compare(pos
, len
, str
.data
, str
.len
);
816 #else // !HAVE_STD_STRING_COMPARE
818 static inline int wxDoCmp(const wxStringCharType
* s1
, size_t l1
,
819 const wxStringCharType
* s2
, size_t l2
)
822 return wxStringMemcmp(s1
, s2
, l1
);
825 int ret
= wxStringMemcmp(s1
, s2
, l1
);
826 return ret
== 0 ? -1 : ret
;
830 int ret
= wxStringMemcmp(s1
, s2
, l2
);
831 return ret
== 0 ? +1 : ret
;
835 int wxString::compare(const wxString
& str
) const
837 return ::wxDoCmp(m_impl
.data(), m_impl
.length(),
838 str
.m_impl
.data(), str
.m_impl
.length());
841 int wxString::compare(size_t nStart
, size_t nLen
,
842 const wxString
& str
) const
844 wxASSERT(nStart
<= length());
845 size_type strLen
= length() - nStart
;
846 nLen
= strLen
< nLen
? strLen
: nLen
;
849 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
851 return ::wxDoCmp(m_impl
.data() + pos
, len
,
852 str
.m_impl
.data(), str
.m_impl
.length());
855 int wxString::compare(size_t nStart
, size_t nLen
,
857 size_t nStart2
, size_t nLen2
) const
859 wxASSERT(nStart
<= length());
860 wxASSERT(nStart2
<= str
.length());
861 size_type strLen
= length() - nStart
,
862 strLen2
= str
.length() - nStart2
;
863 nLen
= strLen
< nLen
? strLen
: nLen
;
864 nLen2
= strLen2
< nLen2
? strLen2
: nLen2
;
867 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
869 str
.PosLenToImpl(nStart2
, nLen2
, &pos2
, &len2
);
871 return ::wxDoCmp(m_impl
.data() + pos
, len
,
872 str
.m_impl
.data() + pos2
, len2
);
875 int wxString::compare(const char* sz
) const
877 SubstrBufFromMB
str(ImplStr(sz
, npos
));
878 if ( str
.len
== npos
)
879 str
.len
= wxStringStrlen(str
.data
);
880 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
883 int wxString::compare(const wchar_t* sz
) const
885 SubstrBufFromWC
str(ImplStr(sz
, npos
));
886 if ( str
.len
== npos
)
887 str
.len
= wxStringStrlen(str
.data
);
888 return ::wxDoCmp(m_impl
.data(), m_impl
.length(), str
.data
, str
.len
);
891 int wxString::compare(size_t nStart
, size_t nLen
,
892 const char* sz
, size_t nCount
) const
894 wxASSERT(nStart
<= length());
895 size_type strLen
= length() - nStart
;
896 nLen
= strLen
< nLen
? strLen
: nLen
;
899 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
901 SubstrBufFromMB
str(ImplStr(sz
, nCount
));
902 if ( str
.len
== npos
)
903 str
.len
= wxStringStrlen(str
.data
);
905 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
908 int wxString::compare(size_t nStart
, size_t nLen
,
909 const wchar_t* sz
, size_t nCount
) const
911 wxASSERT(nStart
<= length());
912 size_type strLen
= length() - nStart
;
913 nLen
= strLen
< nLen
? strLen
: nLen
;
916 PosLenToImpl(nStart
, nLen
, &pos
, &len
);
918 SubstrBufFromWC
str(ImplStr(sz
, nCount
));
919 if ( str
.len
== npos
)
920 str
.len
= wxStringStrlen(str
.data
);
922 return ::wxDoCmp(m_impl
.data() + pos
, len
, str
.data
, str
.len
);
925 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
928 // ---------------------------------------------------------------------------
929 // find_{first,last}_[not]_of functions
930 // ---------------------------------------------------------------------------
932 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
934 // NB: All these functions are implemented with the argument being wxChar*,
935 // i.e. widechar string in any Unicode build, even though native string
936 // representation is char* in the UTF-8 build. This is because we couldn't
937 // use memchr() to determine if a character is in a set encoded as UTF-8.
939 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
) const
941 return find_first_of(sz
, nStart
, wxStrlen(sz
));
944 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
) const
946 return find_first_not_of(sz
, nStart
, wxStrlen(sz
));
949 size_t wxString::find_first_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
951 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
954 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
956 if ( wxTmemchr(sz
, *i
, n
) )
963 size_t wxString::find_first_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
965 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
968 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
970 if ( !wxTmemchr(sz
, *i
, n
) )
978 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
) const
980 return find_last_of(sz
, nStart
, wxStrlen(sz
));
983 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
) const
985 return find_last_not_of(sz
, nStart
, wxStrlen(sz
));
988 size_t wxString::find_last_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
990 size_t len
= length();
992 if ( nStart
== npos
)
998 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1001 size_t idx
= nStart
;
1002 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1003 i
!= rend(); --idx
, ++i
)
1005 if ( wxTmemchr(sz
, *i
, n
) )
1012 size_t wxString::find_last_not_of(const wxChar
* sz
, size_t nStart
, size_t n
) const
1014 size_t len
= length();
1016 if ( nStart
== npos
)
1022 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1025 size_t idx
= nStart
;
1026 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1027 i
!= rend(); --idx
, ++i
)
1029 if ( !wxTmemchr(sz
, *i
, n
) )
1036 size_t wxString::find_first_not_of(wxUniChar ch
, size_t nStart
) const
1038 wxASSERT_MSG( nStart
<= length(), _T("invalid index") );
1040 size_t idx
= nStart
;
1041 for ( const_iterator i
= begin() + nStart
; i
!= end(); ++idx
, ++i
)
1050 size_t wxString::find_last_not_of(wxUniChar ch
, size_t nStart
) const
1052 size_t len
= length();
1054 if ( nStart
== npos
)
1060 wxASSERT_MSG( nStart
<= len
, _T("invalid index") );
1063 size_t idx
= nStart
;
1064 for ( const_reverse_iterator i
= rbegin() + (len
- nStart
- 1);
1065 i
!= rend(); --idx
, ++i
)
1074 // the functions above were implemented for wchar_t* arguments in Unicode
1075 // build and char* in ANSI build; below are implementations for the other
1078 #define wxOtherCharType char
1079 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1081 #define wxOtherCharType wchar_t
1082 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1085 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
) const
1086 { return find_first_of(STRCONV(sz
), nStart
); }
1088 size_t wxString::find_first_of(const wxOtherCharType
* sz
, size_t nStart
,
1090 { return find_first_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1091 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
) const
1092 { return find_last_of(STRCONV(sz
), nStart
); }
1093 size_t wxString::find_last_of(const wxOtherCharType
* sz
, size_t nStart
,
1095 { return find_last_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1096 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1097 { return find_first_not_of(STRCONV(sz
), nStart
); }
1098 size_t wxString::find_first_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1100 { return find_first_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1101 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
) const
1102 { return find_last_not_of(STRCONV(sz
), nStart
); }
1103 size_t wxString::find_last_not_of(const wxOtherCharType
* sz
, size_t nStart
,
1105 { return find_last_not_of(STRCONV(sz
, n
, NULL
), nStart
, n
); }
1107 #undef wxOtherCharType
1110 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1112 // ===========================================================================
1113 // other common string functions
1114 // ===========================================================================
1116 int wxString::CmpNoCase(const wxString
& s
) const
1118 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1120 const_iterator i1
= begin();
1121 const_iterator end1
= end();
1122 const_iterator i2
= s
.begin();
1123 const_iterator end2
= s
.end();
1125 for ( ; i1
!= end1
&& i2
!= end2
; ++i1
, ++i2
)
1127 wxUniChar lower1
= (wxChar
)wxTolower(*i1
);
1128 wxUniChar lower2
= (wxChar
)wxTolower(*i2
);
1129 if ( lower1
!= lower2
)
1130 return lower1
< lower2
? -1 : 1;
1133 size_t len1
= length();
1134 size_t len2
= s
.length();
1138 else if ( len1
> len2
)
1147 #ifndef __SCHAR_MAX__
1148 #define __SCHAR_MAX__ 127
1152 wxString
wxString::FromAscii(const char *ascii
, size_t len
)
1154 if (!ascii
|| len
== 0)
1155 return wxEmptyString
;
1160 wxStringInternalBuffer
buf(res
, len
);
1161 wxStringCharType
*dest
= buf
;
1163 for ( ; len
> 0; --len
)
1165 unsigned char c
= (unsigned char)*ascii
++;
1166 wxASSERT_MSG( c
< 0x80,
1167 _T("Non-ASCII value passed to FromAscii().") );
1169 *dest
++ = (wchar_t)c
;
1176 wxString
wxString::FromAscii(const char *ascii
)
1178 return FromAscii(ascii
, wxStrlen(ascii
));
1181 wxString
wxString::FromAscii(char ascii
)
1183 // What do we do with '\0' ?
1185 unsigned char c
= (unsigned char)ascii
;
1187 wxASSERT_MSG( c
< 0x80, _T("Non-ASCII value passed to FromAscii().") );
1189 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1190 return wxString(wxUniChar((wchar_t)c
));
1193 const wxCharBuffer
wxString::ToAscii() const
1195 // this will allocate enough space for the terminating NUL too
1196 wxCharBuffer
buffer(length());
1197 char *dest
= buffer
.data();
1199 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1202 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1203 *dest
++ = c
.IsAscii() ? (char)c
: '_';
1205 // the output string can't have embedded NULs anyhow, so we can safely
1206 // stop at first of them even if we do have any
1214 #endif // wxUSE_UNICODE
1216 // extract string of length nCount starting at nFirst
1217 wxString
wxString::Mid(size_t nFirst
, size_t nCount
) const
1219 size_t nLen
= length();
1221 // default value of nCount is npos and means "till the end"
1222 if ( nCount
== npos
)
1224 nCount
= nLen
- nFirst
;
1227 // out-of-bounds requests return sensible things
1228 if ( nFirst
+ nCount
> nLen
)
1230 nCount
= nLen
- nFirst
;
1233 if ( nFirst
> nLen
)
1235 // AllocCopy() will return empty string
1236 return wxEmptyString
;
1239 wxString
dest(*this, nFirst
, nCount
);
1240 if ( dest
.length() != nCount
)
1242 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1248 // check that the string starts with prefix and return the rest of the string
1249 // in the provided pointer if it is not NULL, otherwise return false
1250 bool wxString::StartsWith(const wxString
& prefix
, wxString
*rest
) const
1252 if ( compare(0, prefix
.length(), prefix
) != 0 )
1257 // put the rest of the string into provided pointer
1258 rest
->assign(*this, prefix
.length(), npos
);
1265 // check that the string ends with suffix and return the rest of it in the
1266 // provided pointer if it is not NULL, otherwise return false
1267 bool wxString::EndsWith(const wxString
& suffix
, wxString
*rest
) const
1269 int start
= length() - suffix
.length();
1271 if ( start
< 0 || compare(start
, npos
, suffix
) != 0 )
1276 // put the rest of the string into provided pointer
1277 rest
->assign(*this, 0, start
);
1284 // extract nCount last (rightmost) characters
1285 wxString
wxString::Right(size_t nCount
) const
1287 if ( nCount
> length() )
1290 wxString
dest(*this, length() - nCount
, nCount
);
1291 if ( dest
.length() != nCount
) {
1292 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1297 // get all characters after the last occurrence of ch
1298 // (returns the whole string if ch not found)
1299 wxString
wxString::AfterLast(wxUniChar ch
) const
1302 int iPos
= Find(ch
, true);
1303 if ( iPos
== wxNOT_FOUND
)
1306 str
.assign(*this, iPos
+ 1, npos
);
1311 // extract nCount first (leftmost) characters
1312 wxString
wxString::Left(size_t nCount
) const
1314 if ( nCount
> length() )
1317 wxString
dest(*this, 0, nCount
);
1318 if ( dest
.length() != nCount
) {
1319 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1324 // get all characters before the first occurrence of ch
1325 // (returns the whole string if ch not found)
1326 wxString
wxString::BeforeFirst(wxUniChar ch
) const
1328 int iPos
= Find(ch
);
1329 if ( iPos
== wxNOT_FOUND
)
1331 return wxString(*this, 0, iPos
);
1334 /// get all characters before the last occurrence of ch
1335 /// (returns empty string if ch not found)
1336 wxString
wxString::BeforeLast(wxUniChar ch
) const
1339 int iPos
= Find(ch
, true);
1340 if ( iPos
!= wxNOT_FOUND
&& iPos
!= 0 )
1341 str
= wxString(c_str(), iPos
);
1346 /// get all characters after the first occurrence of ch
1347 /// (returns empty string if ch not found)
1348 wxString
wxString::AfterFirst(wxUniChar ch
) const
1351 int iPos
= Find(ch
);
1352 if ( iPos
!= wxNOT_FOUND
)
1353 str
.assign(*this, iPos
+ 1, npos
);
1358 // replace first (or all) occurrences of some substring with another one
1359 size_t wxString::Replace(const wxString
& strOld
,
1360 const wxString
& strNew
, bool bReplaceAll
)
1362 // if we tried to replace an empty string we'd enter an infinite loop below
1363 wxCHECK_MSG( !strOld
.empty(), 0,
1364 _T("wxString::Replace(): invalid parameter") );
1366 wxSTRING_INVALIDATE_CACHE();
1368 size_t uiCount
= 0; // count of replacements made
1370 // optimize the special common case: replacement of one character by
1371 // another one (in UTF-8 case we can only do this for ASCII characters)
1373 // benchmarks show that this special version is around 3 times faster
1374 // (depending on the proportion of matching characters and UTF-8/wchar_t
1376 if ( strOld
.m_impl
.length() == 1 && strNew
.m_impl
.length() == 1 )
1378 const wxStringCharType chOld
= strOld
.m_impl
[0],
1379 chNew
= strNew
.m_impl
[0];
1381 // this loop is the simplified version of the one below
1382 for ( size_t pos
= 0; ; )
1384 pos
= m_impl
.find(chOld
, pos
);
1388 m_impl
[pos
++] = chNew
;
1396 else // general case
1398 const size_t uiOldLen
= strOld
.m_impl
.length();
1399 const size_t uiNewLen
= strNew
.m_impl
.length();
1401 for ( size_t pos
= 0; ; )
1403 pos
= m_impl
.find(strOld
.m_impl
, pos
);
1407 // replace this occurrence of the old string with the new one
1408 m_impl
.replace(pos
, uiOldLen
, strNew
.m_impl
);
1410 // move up pos past the string that was replaced
1413 // increase replace count
1416 // stop after the first one?
1425 bool wxString::IsAscii() const
1427 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1429 if ( !(*i
).IsAscii() )
1436 bool wxString::IsWord() const
1438 for ( const_iterator i
= begin(); i
!= end(); ++i
)
1440 if ( !wxIsalpha(*i
) )
1447 bool wxString::IsNumber() const
1452 const_iterator i
= begin();
1454 if ( *i
== _T('-') || *i
== _T('+') )
1457 for ( ; i
!= end(); ++i
)
1459 if ( !wxIsdigit(*i
) )
1466 wxString
wxString::Strip(stripType w
) const
1469 if ( w
& leading
) s
.Trim(false);
1470 if ( w
& trailing
) s
.Trim(true);
1474 // ---------------------------------------------------------------------------
1476 // ---------------------------------------------------------------------------
1478 wxString
& wxString::MakeUpper()
1480 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1481 *it
= (wxChar
)wxToupper(*it
);
1486 wxString
& wxString::MakeLower()
1488 for ( iterator it
= begin(), en
= end(); it
!= en
; ++it
)
1489 *it
= (wxChar
)wxTolower(*it
);
1494 wxString
& wxString::MakeCapitalized()
1496 const iterator en
= end();
1497 iterator it
= begin();
1500 *it
= (wxChar
)wxToupper(*it
);
1501 for ( ++it
; it
!= en
; ++it
)
1502 *it
= (wxChar
)wxTolower(*it
);
1508 // ---------------------------------------------------------------------------
1509 // trimming and padding
1510 // ---------------------------------------------------------------------------
1512 // some compilers (VC++ 6.0 not to name them) return true for a call to
1513 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1514 // to live with this by checking that the character is a 7 bit one - even if
1515 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1516 // space-like symbols somewhere except in the first 128 chars), it is arguably
1517 // still better than trimming away accented letters
1518 inline int wxSafeIsspace(wxChar ch
) { return (ch
< 127) && wxIsspace(ch
); }
1520 // trims spaces (in the sense of isspace) from left or right side
1521 wxString
& wxString::Trim(bool bFromRight
)
1523 // first check if we're going to modify the string at all
1526 (bFromRight
&& wxSafeIsspace(GetChar(length() - 1))) ||
1527 (!bFromRight
&& wxSafeIsspace(GetChar(0u)))
1533 // find last non-space character
1534 reverse_iterator psz
= rbegin();
1535 while ( (psz
!= rend()) && wxSafeIsspace(*psz
) )
1538 // truncate at trailing space start
1539 erase(psz
.base(), end());
1543 // find first non-space character
1544 iterator psz
= begin();
1545 while ( (psz
!= end()) && wxSafeIsspace(*psz
) )
1548 // fix up data and length
1549 erase(begin(), psz
);
1556 // adds nCount characters chPad to the string from either side
1557 wxString
& wxString::Pad(size_t nCount
, wxUniChar chPad
, bool bFromRight
)
1559 wxString
s(chPad
, nCount
);
1572 // truncate the string
1573 wxString
& wxString::Truncate(size_t uiLen
)
1575 if ( uiLen
< length() )
1577 erase(begin() + uiLen
, end());
1579 //else: nothing to do, string is already short enough
1584 // ---------------------------------------------------------------------------
1585 // finding (return wxNOT_FOUND if not found and index otherwise)
1586 // ---------------------------------------------------------------------------
1589 int wxString::Find(wxUniChar ch
, bool bFromEnd
) const
1591 size_type idx
= bFromEnd
? find_last_of(ch
) : find_first_of(ch
);
1593 return (idx
== npos
) ? wxNOT_FOUND
: (int)idx
;
1596 // ----------------------------------------------------------------------------
1597 // conversion to numbers
1598 // ----------------------------------------------------------------------------
1600 // The implementation of all the functions below is exactly the same so factor
1601 // it out. Note that number extraction works correctly on UTF-8 strings, so
1602 // we can use wxStringCharType and wx_str() for maximum efficiency.
1605 #define DO_IF_NOT_WINCE(x) x
1607 #define DO_IF_NOT_WINCE(x)
1610 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1611 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1612 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1614 DO_IF_NOT_WINCE( errno = 0; ) \
1616 const wxStringCharType *start = wx_str(); \
1617 wxStringCharType *end; \
1618 T val = func(start, &end, base); \
1620 /* return true only if scan was stopped by the terminating NUL and */ \
1621 /* if the string was not empty to start with and no under/overflow */ \
1623 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1628 bool wxString::ToLong(long *pVal
, int base
) const
1630 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtol
, long);
1633 bool wxString::ToULong(unsigned long *pVal
, int base
) const
1635 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoul
, unsigned long);
1638 bool wxString::ToLongLong(wxLongLong_t
*pVal
, int base
) const
1640 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoll
, wxLongLong_t
);
1643 bool wxString::ToULongLong(wxULongLong_t
*pVal
, int base
) const
1645 WX_STRING_TO_INT_TYPE(pVal
, base
, wxStrtoull
, wxULongLong_t
);
1648 bool wxString::ToDouble(double *pVal
) const
1650 wxCHECK_MSG( pVal
, false, _T("NULL output pointer") );
1652 DO_IF_NOT_WINCE( errno
= 0; )
1654 const wxChar
*start
= c_str();
1656 double val
= wxStrtod(start
, &end
);
1658 // return true only if scan was stopped by the terminating NUL and if the
1659 // string was not empty to start with and no under/overflow occurred
1660 if ( *end
|| end
== start
DO_IF_NOT_WINCE(|| errno
== ERANGE
) )
1668 // ---------------------------------------------------------------------------
1670 // ---------------------------------------------------------------------------
1672 #if !wxUSE_UTF8_LOCALE_ONLY
1674 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1675 wxString
wxStringPrintfMixinBase::DoFormatWchar(const wxChar
*format
, ...)
1677 wxString
wxString::DoFormatWchar(const wxChar
*format
, ...)
1681 va_start(argptr
, format
);
1684 s
.PrintfV(format
, argptr
);
1690 #endif // !wxUSE_UTF8_LOCALE_ONLY
1692 #if wxUSE_UNICODE_UTF8
1694 wxString
wxString::DoFormatUtf8(const char *format
, ...)
1697 va_start(argptr
, format
);
1700 s
.PrintfV(format
, argptr
);
1706 #endif // wxUSE_UNICODE_UTF8
1709 wxString
wxString::FormatV(const wxString
& format
, va_list argptr
)
1712 s
.PrintfV(format
, argptr
);
1716 #if !wxUSE_UTF8_LOCALE_ONLY
1717 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1718 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar
*format
, ...)
1720 int wxString::DoPrintfWchar(const wxChar
*format
, ...)
1724 va_start(argptr
, format
);
1726 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1727 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1728 // because it's the only cast that works safely for downcasting when
1729 // multiple inheritance is used:
1730 wxString
*str
= static_cast<wxString
*>(this);
1732 wxString
*str
= this;
1735 int iLen
= str
->PrintfV(format
, argptr
);
1741 #endif // !wxUSE_UTF8_LOCALE_ONLY
1743 #if wxUSE_UNICODE_UTF8
1744 int wxString::DoPrintfUtf8(const char *format
, ...)
1747 va_start(argptr
, format
);
1749 int iLen
= PrintfV(format
, argptr
);
1755 #endif // wxUSE_UNICODE_UTF8
1758 Uses wxVsnprintf and places the result into the this string.
1760 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1761 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1762 the ISO C99 (and thus SUSv3) standard the return value for the case of
1763 an undersized buffer is inconsistent. For conforming vsnprintf
1764 implementations the function must return the number of characters that
1765 would have been printed had the buffer been large enough. For conforming
1766 vswprintf implementations the function must return a negative number
1769 What vswprintf sets errno to is undefined but Darwin seems to set it to
1770 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1771 those are defined in the standard and backed up by several conformance
1772 statements. Note that ENOMEM mentioned in the manual page does not
1773 apply to swprintf, only wprintf and fwprintf.
1775 Official manual page:
1776 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1778 Some conformance statements (AIX, Solaris):
1779 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1780 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1782 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1783 EILSEQ and EINVAL are specifically defined to mean the error is other than
1784 an undersized buffer and no other errno are defined we treat those two
1785 as meaning hard errors and everything else gets the old behavior which
1786 is to keep looping and increasing buffer size until the function succeeds.
1788 In practice it's impossible to determine before compilation which behavior
1789 may be used. The vswprintf function may have vsnprintf-like behavior or
1790 vice-versa. Behavior detected on one release can theoretically change
1791 with an updated release. Not to mention that configure testing for it
1792 would require the test to be run on the host system, not the build system
1793 which makes cross compilation difficult. Therefore, we make no assumptions
1794 about behavior and try our best to handle every known case, including the
1795 case where wxVsnprintf returns a negative number and fails to set errno.
1797 There is yet one more non-standard implementation and that is our own.
1798 Fortunately, that can be detected at compile-time.
1800 On top of all that, ISO C99 explicitly defines snprintf to write a null
1801 character to the last position of the specified buffer. That would be at
1802 at the given buffer size minus 1. It is supposed to do this even if it
1803 turns out that the buffer is sized too small.
1805 Darwin (tested on 10.5) follows the C99 behavior exactly.
1807 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1808 errno even when it fails. However, it only seems to ever fail due
1809 to an undersized buffer.
1811 #if wxUSE_UNICODE_UTF8
1812 template<typename BufferType
>
1814 // we only need one version in non-UTF8 builds and at least two Windows
1815 // compilers have problems with this function template, so use just one
1816 // normal function here
1818 static int DoStringPrintfV(wxString
& str
,
1819 const wxString
& format
, va_list argptr
)
1825 #if wxUSE_UNICODE_UTF8
1826 BufferType
tmp(str
, size
+ 1);
1827 typename
BufferType::CharType
*buf
= tmp
;
1829 wxStringBuffer
tmp(str
, size
+ 1);
1837 // in UTF-8 build, leaving uninitialized junk in the buffer
1838 // could result in invalid non-empty UTF-8 string, so just
1839 // reset the string to empty on failure:
1844 // wxVsnprintf() may modify the original arg pointer, so pass it
1847 wxVaCopy(argptrcopy
, argptr
);
1850 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1853 int len
= wxVsnprintf(buf
, size
, format
, argptrcopy
);
1856 // some implementations of vsnprintf() don't NUL terminate
1857 // the string if there is not enough space for it so
1858 // always do it manually
1859 // FIXME: This really seems to be the wrong and would be an off-by-one
1860 // bug except the code above allocates an extra character.
1861 buf
[size
] = _T('\0');
1863 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1864 // total number of characters which would have been written if the
1865 // buffer were large enough (newer standards such as Unix98)
1868 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1869 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1870 // is true if *both* of them use our own implementation,
1871 // otherwise we can't be sure
1872 #if wxUSE_WXVSNPRINTF
1873 // we know that our own implementation of wxVsnprintf() returns -1
1874 // only for a format error - thus there's something wrong with
1875 // the user's format string
1878 #else // possibly using system version
1879 // assume it only returns error if there is not enough space, but
1880 // as we don't know how much we need, double the current size of
1883 if( (errno
== EILSEQ
) || (errno
== EINVAL
) )
1884 // If errno was set to one of the two well-known hard errors
1885 // then fail immediately to avoid an infinite loop.
1888 #endif // __WXWINCE__
1889 // still not enough, as we don't know how much we need, double the
1890 // current size of the buffer
1892 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1894 else if ( len
>= size
)
1896 #if wxUSE_WXVSNPRINTF
1897 // we know that our own implementation of wxVsnprintf() returns
1898 // size+1 when there's not enough space but that's not the size
1899 // of the required buffer!
1900 size
*= 2; // so we just double the current size of the buffer
1902 // some vsnprintf() implementations NUL-terminate the buffer and
1903 // some don't in len == size case, to be safe always add 1
1904 // FIXME: I don't quite understand this comment. The vsnprintf
1905 // function is specifically defined to return the number of
1906 // characters printed not including the null terminator.
1907 // So OF COURSE you need to add 1 to get the right buffer size.
1908 // The following line is definitely correct, no question.
1912 else // ok, there was enough space
1918 // we could have overshot
1921 return str
.length();
1924 int wxString::PrintfV(const wxString
& format
, va_list argptr
)
1926 #if wxUSE_UNICODE_UTF8
1927 #if wxUSE_STL_BASED_WXSTRING
1928 typedef wxStringTypeBuffer
<char> Utf8Buffer
;
1930 typedef wxStringInternalBuffer Utf8Buffer
;
1934 #if wxUSE_UTF8_LOCALE_ONLY
1935 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1937 #if wxUSE_UNICODE_UTF8
1938 if ( wxLocaleIsUtf8
)
1939 return DoStringPrintfV
<Utf8Buffer
>(*this, format
, argptr
);
1942 return DoStringPrintfV
<wxStringBuffer
>(*this, format
, argptr
);
1944 return DoStringPrintfV(*this, format
, argptr
);
1945 #endif // UTF8/WCHAR
1949 // ----------------------------------------------------------------------------
1950 // misc other operations
1951 // ----------------------------------------------------------------------------
1953 // returns true if the string matches the pattern which may contain '*' and
1954 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1956 bool wxString::Matches(const wxString
& mask
) const
1958 // I disable this code as it doesn't seem to be faster (in fact, it seems
1959 // to be much slower) than the old, hand-written code below and using it
1960 // here requires always linking with libregex even if the user code doesn't
1962 #if 0 // wxUSE_REGEX
1963 // first translate the shell-like mask into a regex
1965 pattern
.reserve(wxStrlen(pszMask
));
1977 pattern
+= _T(".*");
1988 // these characters are special in a RE, quote them
1989 // (however note that we don't quote '[' and ']' to allow
1990 // using them for Unix shell like matching)
1991 pattern
+= _T('\\');
1995 pattern
+= *pszMask
;
2003 return wxRegEx(pattern
, wxRE_NOSUB
| wxRE_EXTENDED
).Matches(c_str());
2004 #else // !wxUSE_REGEX
2005 // TODO: this is, of course, awfully inefficient...
2007 // FIXME-UTF8: implement using iterators, remove #if
2008 #if wxUSE_UNICODE_UTF8
2009 wxWCharBuffer maskBuf
= mask
.wc_str();
2010 wxWCharBuffer txtBuf
= wc_str();
2011 const wxChar
*pszMask
= maskBuf
.data();
2012 const wxChar
*pszTxt
= txtBuf
.data();
2014 const wxChar
*pszMask
= mask
.wx_str();
2015 // the char currently being checked
2016 const wxChar
*pszTxt
= wx_str();
2019 // the last location where '*' matched
2020 const wxChar
*pszLastStarInText
= NULL
;
2021 const wxChar
*pszLastStarInMask
= NULL
;
2024 for ( ; *pszMask
!= wxT('\0'); pszMask
++, pszTxt
++ ) {
2025 switch ( *pszMask
) {
2027 if ( *pszTxt
== wxT('\0') )
2030 // pszTxt and pszMask will be incremented in the loop statement
2036 // remember where we started to be able to backtrack later
2037 pszLastStarInText
= pszTxt
;
2038 pszLastStarInMask
= pszMask
;
2040 // ignore special chars immediately following this one
2041 // (should this be an error?)
2042 while ( *pszMask
== wxT('*') || *pszMask
== wxT('?') )
2045 // if there is nothing more, match
2046 if ( *pszMask
== wxT('\0') )
2049 // are there any other metacharacters in the mask?
2051 const wxChar
*pEndMask
= wxStrpbrk(pszMask
, wxT("*?"));
2053 if ( pEndMask
!= NULL
) {
2054 // we have to match the string between two metachars
2055 uiLenMask
= pEndMask
- pszMask
;
2058 // we have to match the remainder of the string
2059 uiLenMask
= wxStrlen(pszMask
);
2062 wxString
strToMatch(pszMask
, uiLenMask
);
2063 const wxChar
* pMatch
= wxStrstr(pszTxt
, strToMatch
);
2064 if ( pMatch
== NULL
)
2067 // -1 to compensate "++" in the loop
2068 pszTxt
= pMatch
+ uiLenMask
- 1;
2069 pszMask
+= uiLenMask
- 1;
2074 if ( *pszMask
!= *pszTxt
)
2080 // match only if nothing left
2081 if ( *pszTxt
== wxT('\0') )
2084 // if we failed to match, backtrack if we can
2085 if ( pszLastStarInText
) {
2086 pszTxt
= pszLastStarInText
+ 1;
2087 pszMask
= pszLastStarInMask
;
2089 pszLastStarInText
= NULL
;
2091 // don't bother resetting pszLastStarInMask, it's unnecessary
2097 #endif // wxUSE_REGEX/!wxUSE_REGEX
2100 // Count the number of chars
2101 int wxString::Freq(wxUniChar ch
) const
2104 for ( const_iterator i
= begin(); i
!= end(); ++i
)