]> git.saurik.com Git - wxWidgets.git/blame_incremental - src/common/string.cpp
better fix for #11803, don't set iconized state for hidden window
[wxWidgets.git] / src / common / string.cpp
... / ...
CommitLineData
1/////////////////////////////////////////////////////////////////////////////
2// Name: src/common/string.cpp
3// Purpose: wxString class
4// Author: Vadim Zeitlin, Ryan Norton
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9// (c) 2004 Ryan Norton <wxprojects@comcast.net>
10// Licence: wxWindows licence
11/////////////////////////////////////////////////////////////////////////////
12
13// ===========================================================================
14// headers, declarations, constants
15// ===========================================================================
16
17// For compilers that support precompilation, includes "wx.h".
18#include "wx/wxprec.h"
19
20#ifdef __BORLANDC__
21 #pragma hdrstop
22#endif
23
24#ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #include "wx/log.h"
28#endif
29
30#include <ctype.h>
31
32#ifndef __WXWINCE__
33 #include <errno.h>
34#endif
35
36#include <string.h>
37#include <stdlib.h>
38
39#include "wx/hashmap.h"
40#include "wx/vector.h"
41#include "wx/xlocale.h"
42
43#ifdef __WXMSW__
44 #include "wx/msw/wrapwin.h"
45#endif // __WXMSW__
46
47// string handling functions used by wxString:
48#if wxUSE_UNICODE_UTF8
49 #define wxStringMemcpy memcpy
50 #define wxStringMemcmp memcmp
51 #define wxStringMemchr memchr
52 #define wxStringStrlen strlen
53#else
54 #define wxStringMemcpy wxTmemcpy
55 #define wxStringMemcmp wxTmemcmp
56 #define wxStringMemchr wxTmemchr
57 #define wxStringStrlen wxStrlen
58#endif
59
60// define a function declared in wx/buffer.h here as we don't have buffer.cpp
61// and don't want to add it just because of this simple function
62namespace wxPrivate
63{
64
65// wxXXXBuffer classes can be (implicitly) used during global statics
66// initialization so wrap the status UntypedBufferData variable in a function
67// to make it safe to access it even before all global statics are initialized
68UntypedBufferData *GetUntypedNullData()
69{
70 static UntypedBufferData s_untypedNullData(NULL, 0);
71
72 return &s_untypedNullData;
73}
74
75} // namespace wxPrivate
76
77// ---------------------------------------------------------------------------
78// static class variables definition
79// ---------------------------------------------------------------------------
80
81//According to STL _must_ be a -1 size_t
82const size_t wxString::npos = (size_t) -1;
83
84#if wxUSE_STRING_POS_CACHE
85
86#ifdef wxHAS_COMPILER_TLS
87
88wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
89
90#else // !wxHAS_COMPILER_TLS
91
92struct wxStrCacheInitializer
93{
94 wxStrCacheInitializer()
95 {
96 // calling this function triggers s_cache initialization in it, and
97 // from now on it becomes safe to call from multiple threads
98 wxString::GetCache();
99 }
100};
101
102/*
103wxString::Cache& wxString::GetCache()
104{
105 static wxTLS_TYPE(Cache) s_cache;
106
107 return wxTLS_VALUE(s_cache);
108}
109*/
110
111static wxStrCacheInitializer gs_stringCacheInit;
112
113#endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
114
115// gdb seems to be unable to display thread-local variables correctly, at least
116// not my 6.4.98 version under amd64, so provide this debugging helper to do it
117#if wxDEBUG_LEVEL >= 2
118
119struct wxStrCacheDumper
120{
121 static void ShowAll()
122 {
123 puts("*** wxString cache dump:");
124 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
125 {
126 const wxString::Cache::Element&
127 c = wxString::GetCacheBegin()[n];
128
129 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
130 n,
131 n == wxString::LastUsedCacheElement() ? " [*]" : "",
132 c.str,
133 (unsigned long)c.pos,
134 (unsigned long)c.impl,
135 (long)c.len);
136 }
137 }
138};
139
140void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
141
142#endif // wxDEBUG_LEVEL >= 2
143
144#ifdef wxPROFILE_STRING_CACHE
145
146wxString::CacheStats wxString::ms_cacheStats;
147
148struct wxStrCacheStatsDumper
149{
150 ~wxStrCacheStatsDumper()
151 {
152 const wxString::CacheStats& stats = wxString::ms_cacheStats;
153
154 if ( stats.postot )
155 {
156 puts("*** wxString cache statistics:");
157 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
158 stats.postot);
159 printf("\tHits %u (of which %u not used) or %.2f%%\n",
160 stats.poshits,
161 stats.mishits,
162 100.*float(stats.poshits - stats.mishits)/stats.postot);
163 printf("\tAverage position requested: %.2f\n",
164 float(stats.sumpos) / stats.postot);
165 printf("\tAverage offset after cached hint: %.2f\n",
166 float(stats.sumofs) / stats.postot);
167 }
168
169 if ( stats.lentot )
170 {
171 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
172 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
173 }
174 }
175};
176
177static wxStrCacheStatsDumper s_showCacheStats;
178
179#endif // wxPROFILE_STRING_CACHE
180
181#endif // wxUSE_STRING_POS_CACHE
182
183// ----------------------------------------------------------------------------
184// global functions
185// ----------------------------------------------------------------------------
186
187#if wxUSE_STD_IOSTREAM
188
189#include <iostream>
190
191wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
192{
193#if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
194 const wxScopedCharBuffer buf(str.AsCharBuf());
195 if ( !buf )
196 os.clear(wxSTD ios_base::failbit);
197 else
198 os << buf.data();
199
200 return os;
201#else
202 return os << str.AsInternal();
203#endif
204}
205
206wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
207{
208 return os << str.c_str();
209}
210
211wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str)
212{
213 return os << str.data();
214}
215
216#ifndef __BORLANDC__
217wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str)
218{
219 return os << str.data();
220}
221#endif
222
223#if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
224
225wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
226{
227 return wos << str.wc_str();
228}
229
230wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
231{
232 return wos << str.AsWChar();
233}
234
235wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str)
236{
237 return wos << str.data();
238}
239
240#endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
241
242#endif // wxUSE_STD_IOSTREAM
243
244// ===========================================================================
245// wxString class core
246// ===========================================================================
247
248#if wxUSE_UNICODE_UTF8
249
250void wxString::PosLenToImpl(size_t pos, size_t len,
251 size_t *implPos, size_t *implLen) const
252{
253 if ( pos == npos )
254 {
255 *implPos = npos;
256 }
257 else // have valid start position
258 {
259 const const_iterator b = GetIterForNthChar(pos);
260 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
261 if ( len == npos )
262 {
263 *implLen = npos;
264 }
265 else // have valid length too
266 {
267 // we need to handle the case of length specifying a substring
268 // going beyond the end of the string, just as std::string does
269 const const_iterator e(end());
270 const_iterator i(b);
271 while ( len && i <= e )
272 {
273 ++i;
274 --len;
275 }
276
277 *implLen = i.impl() - b.impl();
278 }
279 }
280}
281
282#endif // wxUSE_UNICODE_UTF8
283
284// ----------------------------------------------------------------------------
285// wxCStrData converted strings caching
286// ----------------------------------------------------------------------------
287
288// FIXME-UTF8: temporarily disabled because it doesn't work with global
289// string objects; re-enable after fixing this bug and benchmarking
290// performance to see if using a hash is a good idea at all
291#if 0
292
293// For backward compatibility reasons, it must be possible to assign the value
294// returned by wxString::c_str() to a char* or wchar_t* variable and work with
295// it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
296// because the memory would be freed immediately, but it has to be valid as long
297// as the string is not modified, so that code like this still works:
298//
299// const wxChar *s = str.c_str();
300// while ( s ) { ... }
301
302// FIXME-UTF8: not thread safe!
303// FIXME-UTF8: we currently clear the cached conversion only when the string is
304// destroyed, but we should do it when the string is modified, to
305// keep memory usage down
306// FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
307// invalidated the cache on every change, we could keep the previous
308// conversion
309// FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
310// to use mb_str() or wc_str() instead of (const [w]char*)c_str()
311
312template<typename T>
313static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
314{
315 typename T::iterator i = hash.find(wxConstCast(s, wxString));
316 if ( i != hash.end() )
317 {
318 free(i->second);
319 hash.erase(i);
320 }
321}
322
323#if wxUSE_UNICODE
324// NB: non-STL implementation doesn't compile with "const wxString*" key type,
325// so we have to use wxString* here and const-cast when used
326WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
327 wxStringCharConversionCache);
328static wxStringCharConversionCache gs_stringsCharCache;
329
330const char* wxCStrData::AsChar() const
331{
332 // remove previously cache value, if any (see FIXMEs above):
333 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
334
335 // convert the string and keep it:
336 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
337 m_str->mb_str().release();
338
339 return s + m_offset;
340}
341#endif // wxUSE_UNICODE
342
343#if !wxUSE_UNICODE_WCHAR
344WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
345 wxStringWCharConversionCache);
346static wxStringWCharConversionCache gs_stringsWCharCache;
347
348const wchar_t* wxCStrData::AsWChar() const
349{
350 // remove previously cache value, if any (see FIXMEs above):
351 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
352
353 // convert the string and keep it:
354 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
355 m_str->wc_str().release();
356
357 return s + m_offset;
358}
359#endif // !wxUSE_UNICODE_WCHAR
360
361wxString::~wxString()
362{
363#if wxUSE_UNICODE
364 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
365 DeleteStringFromConversionCache(gs_stringsCharCache, this);
366#endif
367#if !wxUSE_UNICODE_WCHAR
368 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
369#endif
370}
371#endif
372
373// ===========================================================================
374// wxString class core
375// ===========================================================================
376
377// ---------------------------------------------------------------------------
378// construction and conversion
379// ---------------------------------------------------------------------------
380
381#if wxUSE_UNICODE_WCHAR
382/* static */
383wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
384 const wxMBConv& conv)
385{
386 // anything to do?
387 if ( !psz || nLength == 0 )
388 return SubstrBufFromMB(wxWCharBuffer(L""), 0);
389
390 if ( nLength == npos )
391 nLength = wxNO_LEN;
392
393 size_t wcLen;
394 wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
395 if ( !wcLen )
396 return SubstrBufFromMB(wxWCharBuffer(L""), 0);
397 else
398 return SubstrBufFromMB(wcBuf, wcLen);
399}
400#endif // wxUSE_UNICODE_WCHAR
401
402#if wxUSE_UNICODE_UTF8
403/* static */
404wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
405 const wxMBConv& conv)
406{
407 // anything to do?
408 if ( !psz || nLength == 0 )
409 return SubstrBufFromMB(wxCharBuffer(""), 0);
410
411 // if psz is already in UTF-8, we don't have to do the roundtrip to
412 // wchar_t* and back:
413 if ( conv.IsUTF8() )
414 {
415 // we need to validate the input because UTF8 iterators assume valid
416 // UTF-8 sequence and psz may be invalid:
417 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
418 {
419 // we must pass the real string length to SubstrBufFromMB ctor
420 if ( nLength == npos )
421 nLength = psz ? strlen(psz) : 0;
422 return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz, nLength),
423 nLength);
424 }
425 // else: do the roundtrip through wchar_t*
426 }
427
428 if ( nLength == npos )
429 nLength = wxNO_LEN;
430
431 // first convert to wide string:
432 size_t wcLen;
433 wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
434 if ( !wcLen )
435 return SubstrBufFromMB(wxCharBuffer(""), 0);
436
437 // and then to UTF-8:
438 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
439 // widechar -> UTF-8 conversion isn't supposed to ever fail:
440 wxASSERT_MSG( buf.data, wxT("conversion to UTF-8 failed") );
441
442 return buf;
443}
444#endif // wxUSE_UNICODE_UTF8
445
446#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
447/* static */
448wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
449 const wxMBConv& conv)
450{
451 // anything to do?
452 if ( !pwz || nLength == 0 )
453 return SubstrBufFromWC(wxCharBuffer(""), 0);
454
455 if ( nLength == npos )
456 nLength = wxNO_LEN;
457
458 size_t mbLen;
459 wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
460 if ( !mbLen )
461 return SubstrBufFromWC(wxCharBuffer(""), 0);
462 else
463 return SubstrBufFromWC(mbBuf, mbLen);
464}
465#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
466
467// This std::string::c_str()-like method returns a wide char pointer to string
468// contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return
469// a pointer to the internal representation. Otherwise a conversion is required
470// and it returns a temporary buffer.
471//
472// However for compatibility with c_str() and to avoid breaking existing code
473// doing
474//
475// for ( const wchar_t *p = s.wc_str(); *p; p++ )
476// ... use *p...
477//
478// we actually need to ensure that the returned buffer is _not_ temporary and
479// so we use wxString::m_convertedToWChar to store the returned data
480#if !wxUSE_UNICODE_WCHAR
481
482const wchar_t *wxString::AsWChar(const wxMBConv& conv) const
483{
484 const char * const strMB = m_impl.c_str();
485 const size_t lenMB = m_impl.length();
486
487 // find out the size of the buffer needed
488 const size_t lenWC = conv.ToWChar(NULL, 0, strMB, lenMB);
489 if ( lenWC == wxCONV_FAILED )
490 return NULL;
491
492 // keep the same buffer if the string size didn't change: this is not only
493 // an optimization but also ensure that code which modifies string
494 // character by character (without changing its length) can continue to use
495 // the pointer returned by a previous wc_str() call even after changing the
496 // string
497
498 // TODO-UTF8: we could check for ">" instead of "!=" here as this would
499 // allow to save on buffer reallocations but at the cost of
500 // consuming (even) more memory, we should benchmark this to
501 // determine if it's worth doing
502 if ( !m_convertedToWChar.m_str || lenWC != m_convertedToWChar.m_len )
503 {
504 if ( !const_cast<wxString *>(this)->m_convertedToWChar.Extend(lenWC) )
505 return NULL;
506 }
507
508 // finally do convert
509 m_convertedToWChar.m_str[lenWC] = L'\0';
510 if ( conv.ToWChar(m_convertedToWChar.m_str, lenWC,
511 strMB, lenMB) == wxCONV_FAILED )
512 return NULL;
513
514 return m_convertedToWChar.m_str;
515}
516
517#endif // !wxUSE_UNICODE_WCHAR
518
519
520// Same thing for mb_str() which returns a normal char pointer to string
521// contents: this always requires converting it to the specified encoding in
522// non-ANSI build except if we need to convert to UTF-8 and this is what we
523// already use internally.
524#if wxUSE_UNICODE
525
526const char *wxString::AsChar(const wxMBConv& conv) const
527{
528#if wxUSE_UNICODE_UTF8
529 if ( conv.IsUTF8() )
530 return m_impl.c_str();
531
532 const wchar_t * const strWC = AsWChar(wxMBConvStrictUTF8());
533 const size_t lenWC = m_convertedToWChar.m_len;
534#else // wxUSE_UNICODE_WCHAR
535 const wchar_t * const strWC = m_impl.c_str();
536 const size_t lenWC = m_impl.length();
537#endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR
538
539 const size_t lenMB = conv.FromWChar(NULL, 0, strWC, lenWC);
540 if ( lenMB == wxCONV_FAILED )
541 return NULL;
542
543 if ( !m_convertedToChar.m_str || lenMB != m_convertedToChar.m_len )
544 {
545 if ( !const_cast<wxString *>(this)->m_convertedToChar.Extend(lenMB) )
546 return NULL;
547 }
548
549 m_convertedToChar.m_str[lenMB] = '\0';
550 if ( conv.FromWChar(m_convertedToChar.m_str, lenMB,
551 strWC, lenWC) == wxCONV_FAILED )
552 return NULL;
553
554 return m_convertedToChar.m_str;
555}
556
557#endif // wxUSE_UNICODE
558
559// shrink to minimal size (releasing extra memory)
560bool wxString::Shrink()
561{
562 wxString tmp(begin(), end());
563 swap(tmp);
564 return tmp.length() == length();
565}
566
567// deprecated compatibility code:
568#if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
569wxStringCharType *wxString::GetWriteBuf(size_t nLen)
570{
571 return DoGetWriteBuf(nLen);
572}
573
574void wxString::UngetWriteBuf()
575{
576 DoUngetWriteBuf();
577}
578
579void wxString::UngetWriteBuf(size_t nLen)
580{
581 DoUngetWriteBuf(nLen);
582}
583#endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
584
585
586// ---------------------------------------------------------------------------
587// data access
588// ---------------------------------------------------------------------------
589
590// all functions are inline in string.h
591
592// ---------------------------------------------------------------------------
593// concatenation operators
594// ---------------------------------------------------------------------------
595
596/*
597 * concatenation functions come in 5 flavours:
598 * string + string
599 * char + string and string + char
600 * C str + string and string + C str
601 */
602
603wxString operator+(const wxString& str1, const wxString& str2)
604{
605#if !wxUSE_STL_BASED_WXSTRING
606 wxASSERT( str1.IsValid() );
607 wxASSERT( str2.IsValid() );
608#endif
609
610 wxString s = str1;
611 s += str2;
612
613 return s;
614}
615
616wxString operator+(const wxString& str, wxUniChar ch)
617{
618#if !wxUSE_STL_BASED_WXSTRING
619 wxASSERT( str.IsValid() );
620#endif
621
622 wxString s = str;
623 s += ch;
624
625 return s;
626}
627
628wxString operator+(wxUniChar ch, const wxString& str)
629{
630#if !wxUSE_STL_BASED_WXSTRING
631 wxASSERT( str.IsValid() );
632#endif
633
634 wxString s = ch;
635 s += str;
636
637 return s;
638}
639
640wxString operator+(const wxString& str, const char *psz)
641{
642#if !wxUSE_STL_BASED_WXSTRING
643 wxASSERT( str.IsValid() );
644#endif
645
646 wxString s;
647 if ( !s.Alloc(strlen(psz) + str.length()) ) {
648 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
649 }
650 s += str;
651 s += psz;
652
653 return s;
654}
655
656wxString operator+(const wxString& str, const wchar_t *pwz)
657{
658#if !wxUSE_STL_BASED_WXSTRING
659 wxASSERT( str.IsValid() );
660#endif
661
662 wxString s;
663 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
664 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
665 }
666 s += str;
667 s += pwz;
668
669 return s;
670}
671
672wxString operator+(const char *psz, const wxString& str)
673{
674#if !wxUSE_STL_BASED_WXSTRING
675 wxASSERT( str.IsValid() );
676#endif
677
678 wxString s;
679 if ( !s.Alloc(strlen(psz) + str.length()) ) {
680 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
681 }
682 s = psz;
683 s += str;
684
685 return s;
686}
687
688wxString operator+(const wchar_t *pwz, const wxString& str)
689{
690#if !wxUSE_STL_BASED_WXSTRING
691 wxASSERT( str.IsValid() );
692#endif
693
694 wxString s;
695 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
696 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
697 }
698 s = pwz;
699 s += str;
700
701 return s;
702}
703
704// ---------------------------------------------------------------------------
705// string comparison
706// ---------------------------------------------------------------------------
707
708bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
709{
710 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
711 : wxToupper(GetChar(0u)) == wxToupper(c));
712}
713
714#ifdef HAVE_STD_STRING_COMPARE
715
716// NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
717// UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
718// sort strings in characters code point order by sorting the byte sequence
719// in byte values order (i.e. what strcmp() and memcmp() do).
720
721int wxString::compare(const wxString& str) const
722{
723 return m_impl.compare(str.m_impl);
724}
725
726int wxString::compare(size_t nStart, size_t nLen,
727 const wxString& str) const
728{
729 size_t pos, len;
730 PosLenToImpl(nStart, nLen, &pos, &len);
731 return m_impl.compare(pos, len, str.m_impl);
732}
733
734int wxString::compare(size_t nStart, size_t nLen,
735 const wxString& str,
736 size_t nStart2, size_t nLen2) const
737{
738 size_t pos, len;
739 PosLenToImpl(nStart, nLen, &pos, &len);
740
741 size_t pos2, len2;
742 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
743
744 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
745}
746
747int wxString::compare(const char* sz) const
748{
749 return m_impl.compare(ImplStr(sz));
750}
751
752int wxString::compare(const wchar_t* sz) const
753{
754 return m_impl.compare(ImplStr(sz));
755}
756
757int wxString::compare(size_t nStart, size_t nLen,
758 const char* sz, size_t nCount) const
759{
760 size_t pos, len;
761 PosLenToImpl(nStart, nLen, &pos, &len);
762
763 SubstrBufFromMB str(ImplStr(sz, nCount));
764
765 return m_impl.compare(pos, len, str.data, str.len);
766}
767
768int wxString::compare(size_t nStart, size_t nLen,
769 const wchar_t* sz, size_t nCount) const
770{
771 size_t pos, len;
772 PosLenToImpl(nStart, nLen, &pos, &len);
773
774 SubstrBufFromWC str(ImplStr(sz, nCount));
775
776 return m_impl.compare(pos, len, str.data, str.len);
777}
778
779#else // !HAVE_STD_STRING_COMPARE
780
781static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
782 const wxStringCharType* s2, size_t l2)
783{
784 if( l1 == l2 )
785 return wxStringMemcmp(s1, s2, l1);
786 else if( l1 < l2 )
787 {
788 int ret = wxStringMemcmp(s1, s2, l1);
789 return ret == 0 ? -1 : ret;
790 }
791 else
792 {
793 int ret = wxStringMemcmp(s1, s2, l2);
794 return ret == 0 ? +1 : ret;
795 }
796}
797
798int wxString::compare(const wxString& str) const
799{
800 return ::wxDoCmp(m_impl.data(), m_impl.length(),
801 str.m_impl.data(), str.m_impl.length());
802}
803
804int wxString::compare(size_t nStart, size_t nLen,
805 const wxString& str) const
806{
807 wxASSERT(nStart <= length());
808 size_type strLen = length() - nStart;
809 nLen = strLen < nLen ? strLen : nLen;
810
811 size_t pos, len;
812 PosLenToImpl(nStart, nLen, &pos, &len);
813
814 return ::wxDoCmp(m_impl.data() + pos, len,
815 str.m_impl.data(), str.m_impl.length());
816}
817
818int wxString::compare(size_t nStart, size_t nLen,
819 const wxString& str,
820 size_t nStart2, size_t nLen2) const
821{
822 wxASSERT(nStart <= length());
823 wxASSERT(nStart2 <= str.length());
824 size_type strLen = length() - nStart,
825 strLen2 = str.length() - nStart2;
826 nLen = strLen < nLen ? strLen : nLen;
827 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
828
829 size_t pos, len;
830 PosLenToImpl(nStart, nLen, &pos, &len);
831 size_t pos2, len2;
832 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
833
834 return ::wxDoCmp(m_impl.data() + pos, len,
835 str.m_impl.data() + pos2, len2);
836}
837
838int wxString::compare(const char* sz) const
839{
840 SubstrBufFromMB str(ImplStr(sz, npos));
841 if ( str.len == npos )
842 str.len = wxStringStrlen(str.data);
843 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
844}
845
846int wxString::compare(const wchar_t* sz) const
847{
848 SubstrBufFromWC str(ImplStr(sz, npos));
849 if ( str.len == npos )
850 str.len = wxStringStrlen(str.data);
851 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
852}
853
854int wxString::compare(size_t nStart, size_t nLen,
855 const char* sz, size_t nCount) const
856{
857 wxASSERT(nStart <= length());
858 size_type strLen = length() - nStart;
859 nLen = strLen < nLen ? strLen : nLen;
860
861 size_t pos, len;
862 PosLenToImpl(nStart, nLen, &pos, &len);
863
864 SubstrBufFromMB str(ImplStr(sz, nCount));
865 if ( str.len == npos )
866 str.len = wxStringStrlen(str.data);
867
868 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
869}
870
871int wxString::compare(size_t nStart, size_t nLen,
872 const wchar_t* sz, size_t nCount) const
873{
874 wxASSERT(nStart <= length());
875 size_type strLen = length() - nStart;
876 nLen = strLen < nLen ? strLen : nLen;
877
878 size_t pos, len;
879 PosLenToImpl(nStart, nLen, &pos, &len);
880
881 SubstrBufFromWC str(ImplStr(sz, nCount));
882 if ( str.len == npos )
883 str.len = wxStringStrlen(str.data);
884
885 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
886}
887
888#endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
889
890
891// ---------------------------------------------------------------------------
892// find_{first,last}_[not]_of functions
893// ---------------------------------------------------------------------------
894
895#if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
896
897// NB: All these functions are implemented with the argument being wxChar*,
898// i.e. widechar string in any Unicode build, even though native string
899// representation is char* in the UTF-8 build. This is because we couldn't
900// use memchr() to determine if a character is in a set encoded as UTF-8.
901
902size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
903{
904 return find_first_of(sz, nStart, wxStrlen(sz));
905}
906
907size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
908{
909 return find_first_not_of(sz, nStart, wxStrlen(sz));
910}
911
912size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
913{
914 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
915
916 size_t idx = nStart;
917 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
918 {
919 if ( wxTmemchr(sz, *i, n) )
920 return idx;
921 }
922
923 return npos;
924}
925
926size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
927{
928 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
929
930 size_t idx = nStart;
931 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
932 {
933 if ( !wxTmemchr(sz, *i, n) )
934 return idx;
935 }
936
937 return npos;
938}
939
940
941size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
942{
943 return find_last_of(sz, nStart, wxStrlen(sz));
944}
945
946size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
947{
948 return find_last_not_of(sz, nStart, wxStrlen(sz));
949}
950
951size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
952{
953 size_t len = length();
954
955 if ( nStart == npos )
956 {
957 nStart = len - 1;
958 }
959 else
960 {
961 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
962 }
963
964 size_t idx = nStart;
965 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
966 i != rend(); --idx, ++i )
967 {
968 if ( wxTmemchr(sz, *i, n) )
969 return idx;
970 }
971
972 return npos;
973}
974
975size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
976{
977 size_t len = length();
978
979 if ( nStart == npos )
980 {
981 nStart = len - 1;
982 }
983 else
984 {
985 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
986 }
987
988 size_t idx = nStart;
989 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
990 i != rend(); --idx, ++i )
991 {
992 if ( !wxTmemchr(sz, *i, n) )
993 return idx;
994 }
995
996 return npos;
997}
998
999size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
1000{
1001 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
1002
1003 size_t idx = nStart;
1004 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1005 {
1006 if ( *i != ch )
1007 return idx;
1008 }
1009
1010 return npos;
1011}
1012
1013size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1014{
1015 size_t len = length();
1016
1017 if ( nStart == npos )
1018 {
1019 nStart = len - 1;
1020 }
1021 else
1022 {
1023 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
1024 }
1025
1026 size_t idx = nStart;
1027 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1028 i != rend(); --idx, ++i )
1029 {
1030 if ( *i != ch )
1031 return idx;
1032 }
1033
1034 return npos;
1035}
1036
1037// the functions above were implemented for wchar_t* arguments in Unicode
1038// build and char* in ANSI build; below are implementations for the other
1039// version:
1040#if wxUSE_UNICODE
1041 #define wxOtherCharType char
1042 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1043#else
1044 #define wxOtherCharType wchar_t
1045 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1046#endif
1047
1048size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1049 { return find_first_of(STRCONV(sz), nStart); }
1050
1051size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1052 size_t n) const
1053 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1054size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1055 { return find_last_of(STRCONV(sz), nStart); }
1056size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1057 size_t n) const
1058 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1059size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1060 { return find_first_not_of(STRCONV(sz), nStart); }
1061size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1062 size_t n) const
1063 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1064size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1065 { return find_last_not_of(STRCONV(sz), nStart); }
1066size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1067 size_t n) const
1068 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1069
1070#undef wxOtherCharType
1071#undef STRCONV
1072
1073#endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1074
1075// ===========================================================================
1076// other common string functions
1077// ===========================================================================
1078
1079int wxString::CmpNoCase(const wxString& s) const
1080{
1081#if defined(__WXMSW__) && !wxUSE_UNICODE_UTF8
1082 // prefer to use CompareString() if available as it's more efficient than
1083 // doing it manual or even using wxStricmp() (see #10375)
1084 switch ( ::CompareString(LOCALE_USER_DEFAULT, NORM_IGNORECASE,
1085 m_impl.c_str(), m_impl.length(),
1086 s.m_impl.c_str(), s.m_impl.length()) )
1087 {
1088 case CSTR_LESS_THAN:
1089 return -1;
1090
1091 case CSTR_EQUAL:
1092 return 0;
1093
1094 case CSTR_GREATER_THAN:
1095 return 1;
1096
1097 default:
1098 wxFAIL_MSG( "unexpected CompareString() return value" );
1099 // fall through
1100
1101 case 0:
1102 wxLogLastError("CompareString");
1103 // use generic code below
1104 }
1105#endif // __WXMSW__ && !wxUSE_UNICODE_UTF8
1106
1107 // do the comparison manually: notice that we can't use wxStricmp() as it
1108 // doesn't handle embedded NULs
1109
1110 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1111 const_iterator i1 = begin();
1112 const_iterator end1 = end();
1113 const_iterator i2 = s.begin();
1114 const_iterator end2 = s.end();
1115
1116 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1117 {
1118 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1119 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1120 if ( lower1 != lower2 )
1121 return lower1 < lower2 ? -1 : 1;
1122 }
1123
1124 size_t len1 = length();
1125 size_t len2 = s.length();
1126
1127 if ( len1 < len2 )
1128 return -1;
1129 else if ( len1 > len2 )
1130 return 1;
1131 return 0;
1132}
1133
1134
1135#if wxUSE_UNICODE
1136
1137#ifdef __MWERKS__
1138#ifndef __SCHAR_MAX__
1139#define __SCHAR_MAX__ 127
1140#endif
1141#endif
1142
1143wxString wxString::FromAscii(const char *ascii, size_t len)
1144{
1145 if (!ascii || len == 0)
1146 return wxEmptyString;
1147
1148 wxString res;
1149
1150 {
1151 wxStringInternalBuffer buf(res, len);
1152 wxStringCharType *dest = buf;
1153
1154 for ( ; len > 0; --len )
1155 {
1156 unsigned char c = (unsigned char)*ascii++;
1157 wxASSERT_MSG( c < 0x80,
1158 wxT("Non-ASCII value passed to FromAscii().") );
1159
1160 *dest++ = (wchar_t)c;
1161 }
1162 }
1163
1164 return res;
1165}
1166
1167wxString wxString::FromAscii(const char *ascii)
1168{
1169 return FromAscii(ascii, wxStrlen(ascii));
1170}
1171
1172wxString wxString::FromAscii(char ascii)
1173{
1174 // What do we do with '\0' ?
1175
1176 unsigned char c = (unsigned char)ascii;
1177
1178 wxASSERT_MSG( c < 0x80, wxT("Non-ASCII value passed to FromAscii().") );
1179
1180 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1181 return wxString(wxUniChar((wchar_t)c));
1182}
1183
1184const wxScopedCharBuffer wxString::ToAscii() const
1185{
1186 // this will allocate enough space for the terminating NUL too
1187 wxCharBuffer buffer(length());
1188 char *dest = buffer.data();
1189
1190 for ( const_iterator i = begin(); i != end(); ++i )
1191 {
1192 wxUniChar c(*i);
1193 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1194 *dest++ = c.IsAscii() ? (char)c : '_';
1195
1196 // the output string can't have embedded NULs anyhow, so we can safely
1197 // stop at first of them even if we do have any
1198 if ( !c )
1199 break;
1200 }
1201
1202 return buffer;
1203}
1204
1205#endif // wxUSE_UNICODE
1206
1207// extract string of length nCount starting at nFirst
1208wxString wxString::Mid(size_t nFirst, size_t nCount) const
1209{
1210 size_t nLen = length();
1211
1212 // default value of nCount is npos and means "till the end"
1213 if ( nCount == npos )
1214 {
1215 nCount = nLen - nFirst;
1216 }
1217
1218 // out-of-bounds requests return sensible things
1219 if ( nFirst + nCount > nLen )
1220 {
1221 nCount = nLen - nFirst;
1222 }
1223
1224 if ( nFirst > nLen )
1225 {
1226 // AllocCopy() will return empty string
1227 return wxEmptyString;
1228 }
1229
1230 wxString dest(*this, nFirst, nCount);
1231 if ( dest.length() != nCount )
1232 {
1233 wxFAIL_MSG( wxT("out of memory in wxString::Mid") );
1234 }
1235
1236 return dest;
1237}
1238
1239// check that the string starts with prefix and return the rest of the string
1240// in the provided pointer if it is not NULL, otherwise return false
1241bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1242{
1243 if ( compare(0, prefix.length(), prefix) != 0 )
1244 return false;
1245
1246 if ( rest )
1247 {
1248 // put the rest of the string into provided pointer
1249 rest->assign(*this, prefix.length(), npos);
1250 }
1251
1252 return true;
1253}
1254
1255
1256// check that the string ends with suffix and return the rest of it in the
1257// provided pointer if it is not NULL, otherwise return false
1258bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1259{
1260 int start = length() - suffix.length();
1261
1262 if ( start < 0 || compare(start, npos, suffix) != 0 )
1263 return false;
1264
1265 if ( rest )
1266 {
1267 // put the rest of the string into provided pointer
1268 rest->assign(*this, 0, start);
1269 }
1270
1271 return true;
1272}
1273
1274
1275// extract nCount last (rightmost) characters
1276wxString wxString::Right(size_t nCount) const
1277{
1278 if ( nCount > length() )
1279 nCount = length();
1280
1281 wxString dest(*this, length() - nCount, nCount);
1282 if ( dest.length() != nCount ) {
1283 wxFAIL_MSG( wxT("out of memory in wxString::Right") );
1284 }
1285 return dest;
1286}
1287
1288// get all characters after the last occurrence of ch
1289// (returns the whole string if ch not found)
1290wxString wxString::AfterLast(wxUniChar ch) const
1291{
1292 wxString str;
1293 int iPos = Find(ch, true);
1294 if ( iPos == wxNOT_FOUND )
1295 str = *this;
1296 else
1297 str.assign(*this, iPos + 1, npos);
1298
1299 return str;
1300}
1301
1302// extract nCount first (leftmost) characters
1303wxString wxString::Left(size_t nCount) const
1304{
1305 if ( nCount > length() )
1306 nCount = length();
1307
1308 wxString dest(*this, 0, nCount);
1309 if ( dest.length() != nCount ) {
1310 wxFAIL_MSG( wxT("out of memory in wxString::Left") );
1311 }
1312 return dest;
1313}
1314
1315// get all characters before the first occurrence of ch
1316// (returns the whole string if ch not found)
1317wxString wxString::BeforeFirst(wxUniChar ch) const
1318{
1319 int iPos = Find(ch);
1320 if ( iPos == wxNOT_FOUND )
1321 iPos = length();
1322 return wxString(*this, 0, iPos);
1323}
1324
1325/// get all characters before the last occurrence of ch
1326/// (returns empty string if ch not found)
1327wxString wxString::BeforeLast(wxUniChar ch) const
1328{
1329 wxString str;
1330 int iPos = Find(ch, true);
1331 if ( iPos != wxNOT_FOUND && iPos != 0 )
1332 str = wxString(c_str(), iPos);
1333
1334 return str;
1335}
1336
1337/// get all characters after the first occurrence of ch
1338/// (returns empty string if ch not found)
1339wxString wxString::AfterFirst(wxUniChar ch) const
1340{
1341 wxString str;
1342 int iPos = Find(ch);
1343 if ( iPos != wxNOT_FOUND )
1344 str.assign(*this, iPos + 1, npos);
1345
1346 return str;
1347}
1348
1349// replace first (or all) occurrences of some substring with another one
1350size_t wxString::Replace(const wxString& strOld,
1351 const wxString& strNew, bool bReplaceAll)
1352{
1353 // if we tried to replace an empty string we'd enter an infinite loop below
1354 wxCHECK_MSG( !strOld.empty(), 0,
1355 wxT("wxString::Replace(): invalid parameter") );
1356
1357 wxSTRING_INVALIDATE_CACHE();
1358
1359 size_t uiCount = 0; // count of replacements made
1360
1361 // optimize the special common case: replacement of one character by
1362 // another one (in UTF-8 case we can only do this for ASCII characters)
1363 //
1364 // benchmarks show that this special version is around 3 times faster
1365 // (depending on the proportion of matching characters and UTF-8/wchar_t
1366 // build)
1367 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1368 {
1369 const wxStringCharType chOld = strOld.m_impl[0],
1370 chNew = strNew.m_impl[0];
1371
1372 // this loop is the simplified version of the one below
1373 for ( size_t pos = 0; ; )
1374 {
1375 pos = m_impl.find(chOld, pos);
1376 if ( pos == npos )
1377 break;
1378
1379 m_impl[pos++] = chNew;
1380
1381 uiCount++;
1382
1383 if ( !bReplaceAll )
1384 break;
1385 }
1386 }
1387 else if ( !bReplaceAll)
1388 {
1389 size_t pos = m_impl.find(strOld, 0);
1390 if ( pos != npos )
1391 {
1392 m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl);
1393 uiCount = 1;
1394 }
1395 }
1396 else // replace all occurrences
1397 {
1398 const size_t uiOldLen = strOld.m_impl.length();
1399 const size_t uiNewLen = strNew.m_impl.length();
1400
1401 // first scan the string to find all positions at which the replacement
1402 // should be made
1403 wxVector<size_t> replacePositions;
1404
1405 size_t pos;
1406 for ( pos = m_impl.find(strOld.m_impl, 0);
1407 pos != npos;
1408 pos = m_impl.find(strOld.m_impl, pos + uiOldLen))
1409 {
1410 replacePositions.push_back(pos);
1411 ++uiCount;
1412 }
1413
1414 if ( !uiCount )
1415 return 0;
1416
1417 // allocate enough memory for the whole new string
1418 wxString tmp;
1419 tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen));
1420
1421 // copy this string to tmp doing replacements on the fly
1422 size_t replNum = 0;
1423 for ( pos = 0; replNum < uiCount; replNum++ )
1424 {
1425 const size_t nextReplPos = replacePositions[replNum];
1426
1427 if ( pos != nextReplPos )
1428 {
1429 tmp.m_impl.append(m_impl, pos, nextReplPos - pos);
1430 }
1431
1432 tmp.m_impl.append(strNew.m_impl);
1433 pos = nextReplPos + uiOldLen;
1434 }
1435
1436 if ( pos != m_impl.length() )
1437 {
1438 // append the rest of the string unchanged
1439 tmp.m_impl.append(m_impl, pos, m_impl.length() - pos);
1440 }
1441
1442 swap(tmp);
1443 }
1444
1445 return uiCount;
1446}
1447
1448bool wxString::IsAscii() const
1449{
1450 for ( const_iterator i = begin(); i != end(); ++i )
1451 {
1452 if ( !(*i).IsAscii() )
1453 return false;
1454 }
1455
1456 return true;
1457}
1458
1459bool wxString::IsWord() const
1460{
1461 for ( const_iterator i = begin(); i != end(); ++i )
1462 {
1463 if ( !wxIsalpha(*i) )
1464 return false;
1465 }
1466
1467 return true;
1468}
1469
1470bool wxString::IsNumber() const
1471{
1472 if ( empty() )
1473 return true;
1474
1475 const_iterator i = begin();
1476
1477 if ( *i == wxT('-') || *i == wxT('+') )
1478 ++i;
1479
1480 for ( ; i != end(); ++i )
1481 {
1482 if ( !wxIsdigit(*i) )
1483 return false;
1484 }
1485
1486 return true;
1487}
1488
1489wxString wxString::Strip(stripType w) const
1490{
1491 wxString s = *this;
1492 if ( w & leading ) s.Trim(false);
1493 if ( w & trailing ) s.Trim(true);
1494 return s;
1495}
1496
1497// ---------------------------------------------------------------------------
1498// case conversion
1499// ---------------------------------------------------------------------------
1500
1501wxString& wxString::MakeUpper()
1502{
1503 for ( iterator it = begin(), en = end(); it != en; ++it )
1504 *it = (wxChar)wxToupper(*it);
1505
1506 return *this;
1507}
1508
1509wxString& wxString::MakeLower()
1510{
1511 for ( iterator it = begin(), en = end(); it != en; ++it )
1512 *it = (wxChar)wxTolower(*it);
1513
1514 return *this;
1515}
1516
1517wxString& wxString::MakeCapitalized()
1518{
1519 const iterator en = end();
1520 iterator it = begin();
1521 if ( it != en )
1522 {
1523 *it = (wxChar)wxToupper(*it);
1524 for ( ++it; it != en; ++it )
1525 *it = (wxChar)wxTolower(*it);
1526 }
1527
1528 return *this;
1529}
1530
1531// ---------------------------------------------------------------------------
1532// trimming and padding
1533// ---------------------------------------------------------------------------
1534
1535// some compilers (VC++ 6.0 not to name them) return true for a call to
1536// isspace('\xEA') in the C locale which seems to be broken to me, but we have
1537// to live with this by checking that the character is a 7 bit one - even if
1538// this may fail to detect some spaces (I don't know if Unicode doesn't have
1539// space-like symbols somewhere except in the first 128 chars), it is arguably
1540// still better than trimming away accented letters
1541inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1542
1543// trims spaces (in the sense of isspace) from left or right side
1544wxString& wxString::Trim(bool bFromRight)
1545{
1546 // first check if we're going to modify the string at all
1547 if ( !empty() &&
1548 (
1549 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1550 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1551 )
1552 )
1553 {
1554 if ( bFromRight )
1555 {
1556 // find last non-space character
1557 reverse_iterator psz = rbegin();
1558 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1559 ++psz;
1560
1561 // truncate at trailing space start
1562 erase(psz.base(), end());
1563 }
1564 else
1565 {
1566 // find first non-space character
1567 iterator psz = begin();
1568 while ( (psz != end()) && wxSafeIsspace(*psz) )
1569 ++psz;
1570
1571 // fix up data and length
1572 erase(begin(), psz);
1573 }
1574 }
1575
1576 return *this;
1577}
1578
1579// adds nCount characters chPad to the string from either side
1580wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1581{
1582 wxString s(chPad, nCount);
1583
1584 if ( bFromRight )
1585 *this += s;
1586 else
1587 {
1588 s += *this;
1589 swap(s);
1590 }
1591
1592 return *this;
1593}
1594
1595// truncate the string
1596wxString& wxString::Truncate(size_t uiLen)
1597{
1598 if ( uiLen < length() )
1599 {
1600 erase(begin() + uiLen, end());
1601 }
1602 //else: nothing to do, string is already short enough
1603
1604 return *this;
1605}
1606
1607// ---------------------------------------------------------------------------
1608// finding (return wxNOT_FOUND if not found and index otherwise)
1609// ---------------------------------------------------------------------------
1610
1611// find a character
1612int wxString::Find(wxUniChar ch, bool bFromEnd) const
1613{
1614 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1615
1616 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1617}
1618
1619// ----------------------------------------------------------------------------
1620// conversion to numbers
1621// ----------------------------------------------------------------------------
1622
1623// The implementation of all the functions below is exactly the same so factor
1624// it out. Note that number extraction works correctly on UTF-8 strings, so
1625// we can use wxStringCharType and wx_str() for maximum efficiency.
1626
1627#ifndef __WXWINCE__
1628 #define DO_IF_NOT_WINCE(x) x
1629#else
1630 #define DO_IF_NOT_WINCE(x)
1631#endif
1632
1633#define WX_STRING_TO_X_TYPE_START \
1634 wxCHECK_MSG( pVal, false, wxT("NULL output pointer") ); \
1635 DO_IF_NOT_WINCE( errno = 0; ) \
1636 const wxStringCharType *start = wx_str(); \
1637 wxStringCharType *end;
1638
1639// notice that we return false without modifying the output parameter at all if
1640// nothing could be parsed but we do modify it and return false then if we did
1641// parse something successfully but not the entire string
1642#define WX_STRING_TO_X_TYPE_END \
1643 if ( end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1644 return false; \
1645 *pVal = val; \
1646 return !*end;
1647
1648bool wxString::ToLong(long *pVal, int base) const
1649{
1650 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1651
1652 WX_STRING_TO_X_TYPE_START
1653 long val = wxStrtol(start, &end, base);
1654 WX_STRING_TO_X_TYPE_END
1655}
1656
1657bool wxString::ToULong(unsigned long *pVal, int base) const
1658{
1659 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1660
1661 WX_STRING_TO_X_TYPE_START
1662 unsigned long val = wxStrtoul(start, &end, base);
1663 WX_STRING_TO_X_TYPE_END
1664}
1665
1666bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1667{
1668 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1669
1670 WX_STRING_TO_X_TYPE_START
1671 wxLongLong_t val = wxStrtoll(start, &end, base);
1672 WX_STRING_TO_X_TYPE_END
1673}
1674
1675bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1676{
1677 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1678
1679 WX_STRING_TO_X_TYPE_START
1680 wxULongLong_t val = wxStrtoull(start, &end, base);
1681 WX_STRING_TO_X_TYPE_END
1682}
1683
1684bool wxString::ToDouble(double *pVal) const
1685{
1686 WX_STRING_TO_X_TYPE_START
1687 double val = wxStrtod(start, &end);
1688 WX_STRING_TO_X_TYPE_END
1689}
1690
1691#if wxUSE_XLOCALE
1692
1693bool wxString::ToCLong(long *pVal, int base) const
1694{
1695 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1696
1697 WX_STRING_TO_X_TYPE_START
1698#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1699 long val = wxStrtol_lA(start, &end, base, wxCLocale);
1700#else
1701 long val = wxStrtol_l(start, &end, base, wxCLocale);
1702#endif
1703 WX_STRING_TO_X_TYPE_END
1704}
1705
1706bool wxString::ToCULong(unsigned long *pVal, int base) const
1707{
1708 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1709
1710 WX_STRING_TO_X_TYPE_START
1711#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1712 unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale);
1713#else
1714 unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale);
1715#endif
1716 WX_STRING_TO_X_TYPE_END
1717}
1718
1719bool wxString::ToCDouble(double *pVal) const
1720{
1721 WX_STRING_TO_X_TYPE_START
1722#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1723 double val = wxStrtod_lA(start, &end, wxCLocale);
1724#else
1725 double val = wxStrtod_l(start, &end, wxCLocale);
1726#endif
1727 WX_STRING_TO_X_TYPE_END
1728}
1729
1730#endif // wxUSE_XLOCALE
1731
1732// ---------------------------------------------------------------------------
1733// formatted output
1734// ---------------------------------------------------------------------------
1735
1736#if !wxUSE_UTF8_LOCALE_ONLY
1737/* static */
1738#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1739wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1740#else
1741wxString wxString::DoFormatWchar(const wxChar *format, ...)
1742#endif
1743{
1744 va_list argptr;
1745 va_start(argptr, format);
1746
1747 wxString s;
1748 s.PrintfV(format, argptr);
1749
1750 va_end(argptr);
1751
1752 return s;
1753}
1754#endif // !wxUSE_UTF8_LOCALE_ONLY
1755
1756#if wxUSE_UNICODE_UTF8
1757/* static */
1758wxString wxString::DoFormatUtf8(const char *format, ...)
1759{
1760 va_list argptr;
1761 va_start(argptr, format);
1762
1763 wxString s;
1764 s.PrintfV(format, argptr);
1765
1766 va_end(argptr);
1767
1768 return s;
1769}
1770#endif // wxUSE_UNICODE_UTF8
1771
1772/* static */
1773wxString wxString::FormatV(const wxString& format, va_list argptr)
1774{
1775 wxString s;
1776 s.PrintfV(format, argptr);
1777 return s;
1778}
1779
1780#if !wxUSE_UTF8_LOCALE_ONLY
1781#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1782int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1783#else
1784int wxString::DoPrintfWchar(const wxChar *format, ...)
1785#endif
1786{
1787 va_list argptr;
1788 va_start(argptr, format);
1789
1790#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1791 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1792 // because it's the only cast that works safely for downcasting when
1793 // multiple inheritance is used:
1794 wxString *str = static_cast<wxString*>(this);
1795#else
1796 wxString *str = this;
1797#endif
1798
1799 int iLen = str->PrintfV(format, argptr);
1800
1801 va_end(argptr);
1802
1803 return iLen;
1804}
1805#endif // !wxUSE_UTF8_LOCALE_ONLY
1806
1807#if wxUSE_UNICODE_UTF8
1808int wxString::DoPrintfUtf8(const char *format, ...)
1809{
1810 va_list argptr;
1811 va_start(argptr, format);
1812
1813 int iLen = PrintfV(format, argptr);
1814
1815 va_end(argptr);
1816
1817 return iLen;
1818}
1819#endif // wxUSE_UNICODE_UTF8
1820
1821/*
1822 Uses wxVsnprintf and places the result into the this string.
1823
1824 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1825 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1826 the ISO C99 (and thus SUSv3) standard the return value for the case of
1827 an undersized buffer is inconsistent. For conforming vsnprintf
1828 implementations the function must return the number of characters that
1829 would have been printed had the buffer been large enough. For conforming
1830 vswprintf implementations the function must return a negative number
1831 and set errno.
1832
1833 What vswprintf sets errno to is undefined but Darwin seems to set it to
1834 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1835 those are defined in the standard and backed up by several conformance
1836 statements. Note that ENOMEM mentioned in the manual page does not
1837 apply to swprintf, only wprintf and fwprintf.
1838
1839 Official manual page:
1840 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1841
1842 Some conformance statements (AIX, Solaris):
1843 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1844 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1845
1846 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1847 EILSEQ and EINVAL are specifically defined to mean the error is other than
1848 an undersized buffer and no other errno are defined we treat those two
1849 as meaning hard errors and everything else gets the old behavior which
1850 is to keep looping and increasing buffer size until the function succeeds.
1851
1852 In practice it's impossible to determine before compilation which behavior
1853 may be used. The vswprintf function may have vsnprintf-like behavior or
1854 vice-versa. Behavior detected on one release can theoretically change
1855 with an updated release. Not to mention that configure testing for it
1856 would require the test to be run on the host system, not the build system
1857 which makes cross compilation difficult. Therefore, we make no assumptions
1858 about behavior and try our best to handle every known case, including the
1859 case where wxVsnprintf returns a negative number and fails to set errno.
1860
1861 There is yet one more non-standard implementation and that is our own.
1862 Fortunately, that can be detected at compile-time.
1863
1864 On top of all that, ISO C99 explicitly defines snprintf to write a null
1865 character to the last position of the specified buffer. That would be at
1866 at the given buffer size minus 1. It is supposed to do this even if it
1867 turns out that the buffer is sized too small.
1868
1869 Darwin (tested on 10.5) follows the C99 behavior exactly.
1870
1871 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1872 errno even when it fails. However, it only seems to ever fail due
1873 to an undersized buffer.
1874*/
1875#if wxUSE_UNICODE_UTF8
1876template<typename BufferType>
1877#else
1878// we only need one version in non-UTF8 builds and at least two Windows
1879// compilers have problems with this function template, so use just one
1880// normal function here
1881#endif
1882static int DoStringPrintfV(wxString& str,
1883 const wxString& format, va_list argptr)
1884{
1885 int size = 1024;
1886
1887 for ( ;; )
1888 {
1889#if wxUSE_UNICODE_UTF8
1890 BufferType tmp(str, size + 1);
1891 typename BufferType::CharType *buf = tmp;
1892#else
1893 wxStringBuffer tmp(str, size + 1);
1894 wxChar *buf = tmp;
1895#endif
1896
1897 if ( !buf )
1898 {
1899 // out of memory
1900
1901 // in UTF-8 build, leaving uninitialized junk in the buffer
1902 // could result in invalid non-empty UTF-8 string, so just
1903 // reset the string to empty on failure:
1904 buf[0] = '\0';
1905 return -1;
1906 }
1907
1908 // wxVsnprintf() may modify the original arg pointer, so pass it
1909 // only a copy
1910 va_list argptrcopy;
1911 wxVaCopy(argptrcopy, argptr);
1912
1913#ifndef __WXWINCE__
1914 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1915 errno = 0;
1916#endif
1917 int len = wxVsnprintf(buf, size, format, argptrcopy);
1918 va_end(argptrcopy);
1919
1920 // some implementations of vsnprintf() don't NUL terminate
1921 // the string if there is not enough space for it so
1922 // always do it manually
1923 // FIXME: This really seems to be the wrong and would be an off-by-one
1924 // bug except the code above allocates an extra character.
1925 buf[size] = wxT('\0');
1926
1927 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1928 // total number of characters which would have been written if the
1929 // buffer were large enough (newer standards such as Unix98)
1930 if ( len < 0 )
1931 {
1932 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1933 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1934 // is true if *both* of them use our own implementation,
1935 // otherwise we can't be sure
1936#if wxUSE_WXVSNPRINTF
1937 // we know that our own implementation of wxVsnprintf() returns -1
1938 // only for a format error - thus there's something wrong with
1939 // the user's format string
1940 buf[0] = '\0';
1941 return -1;
1942#else // possibly using system version
1943 // assume it only returns error if there is not enough space, but
1944 // as we don't know how much we need, double the current size of
1945 // the buffer
1946#ifndef __WXWINCE__
1947 if( (errno == EILSEQ) || (errno == EINVAL) )
1948 // If errno was set to one of the two well-known hard errors
1949 // then fail immediately to avoid an infinite loop.
1950 return -1;
1951 else
1952#endif // __WXWINCE__
1953 // still not enough, as we don't know how much we need, double the
1954 // current size of the buffer
1955 size *= 2;
1956#endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1957 }
1958 else if ( len >= size )
1959 {
1960#if wxUSE_WXVSNPRINTF
1961 // we know that our own implementation of wxVsnprintf() returns
1962 // size+1 when there's not enough space but that's not the size
1963 // of the required buffer!
1964 size *= 2; // so we just double the current size of the buffer
1965#else
1966 // some vsnprintf() implementations NUL-terminate the buffer and
1967 // some don't in len == size case, to be safe always add 1
1968 // FIXME: I don't quite understand this comment. The vsnprintf
1969 // function is specifically defined to return the number of
1970 // characters printed not including the null terminator.
1971 // So OF COURSE you need to add 1 to get the right buffer size.
1972 // The following line is definitely correct, no question.
1973 size = len + 1;
1974#endif
1975 }
1976 else // ok, there was enough space
1977 {
1978 break;
1979 }
1980 }
1981
1982 // we could have overshot
1983 str.Shrink();
1984
1985 return str.length();
1986}
1987
1988int wxString::PrintfV(const wxString& format, va_list argptr)
1989{
1990#if wxUSE_UNICODE_UTF8
1991 #if wxUSE_STL_BASED_WXSTRING
1992 typedef wxStringTypeBuffer<char> Utf8Buffer;
1993 #else
1994 typedef wxStringInternalBuffer Utf8Buffer;
1995 #endif
1996#endif
1997
1998#if wxUSE_UTF8_LOCALE_ONLY
1999 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2000#else
2001 #if wxUSE_UNICODE_UTF8
2002 if ( wxLocaleIsUtf8 )
2003 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2004 else
2005 // wxChar* version
2006 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2007 #else
2008 return DoStringPrintfV(*this, format, argptr);
2009 #endif // UTF8/WCHAR
2010#endif
2011}
2012
2013// ----------------------------------------------------------------------------
2014// misc other operations
2015// ----------------------------------------------------------------------------
2016
2017// returns true if the string matches the pattern which may contain '*' and
2018// '?' metacharacters (as usual, '?' matches any character and '*' any number
2019// of them)
2020bool wxString::Matches(const wxString& mask) const
2021{
2022 // I disable this code as it doesn't seem to be faster (in fact, it seems
2023 // to be much slower) than the old, hand-written code below and using it
2024 // here requires always linking with libregex even if the user code doesn't
2025 // use it
2026#if 0 // wxUSE_REGEX
2027 // first translate the shell-like mask into a regex
2028 wxString pattern;
2029 pattern.reserve(wxStrlen(pszMask));
2030
2031 pattern += wxT('^');
2032 while ( *pszMask )
2033 {
2034 switch ( *pszMask )
2035 {
2036 case wxT('?'):
2037 pattern += wxT('.');
2038 break;
2039
2040 case wxT('*'):
2041 pattern += wxT(".*");
2042 break;
2043
2044 case wxT('^'):
2045 case wxT('.'):
2046 case wxT('$'):
2047 case wxT('('):
2048 case wxT(')'):
2049 case wxT('|'):
2050 case wxT('+'):
2051 case wxT('\\'):
2052 // these characters are special in a RE, quote them
2053 // (however note that we don't quote '[' and ']' to allow
2054 // using them for Unix shell like matching)
2055 pattern += wxT('\\');
2056 // fall through
2057
2058 default:
2059 pattern += *pszMask;
2060 }
2061
2062 pszMask++;
2063 }
2064 pattern += wxT('$');
2065
2066 // and now use it
2067 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2068#else // !wxUSE_REGEX
2069 // TODO: this is, of course, awfully inefficient...
2070
2071 // FIXME-UTF8: implement using iterators, remove #if
2072#if wxUSE_UNICODE_UTF8
2073 const wxScopedWCharBuffer maskBuf = mask.wc_str();
2074 const wxScopedWCharBuffer txtBuf = wc_str();
2075 const wxChar *pszMask = maskBuf.data();
2076 const wxChar *pszTxt = txtBuf.data();
2077#else
2078 const wxChar *pszMask = mask.wx_str();
2079 // the char currently being checked
2080 const wxChar *pszTxt = wx_str();
2081#endif
2082
2083 // the last location where '*' matched
2084 const wxChar *pszLastStarInText = NULL;
2085 const wxChar *pszLastStarInMask = NULL;
2086
2087match:
2088 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
2089 switch ( *pszMask ) {
2090 case wxT('?'):
2091 if ( *pszTxt == wxT('\0') )
2092 return false;
2093
2094 // pszTxt and pszMask will be incremented in the loop statement
2095
2096 break;
2097
2098 case wxT('*'):
2099 {
2100 // remember where we started to be able to backtrack later
2101 pszLastStarInText = pszTxt;
2102 pszLastStarInMask = pszMask;
2103
2104 // ignore special chars immediately following this one
2105 // (should this be an error?)
2106 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
2107 pszMask++;
2108
2109 // if there is nothing more, match
2110 if ( *pszMask == wxT('\0') )
2111 return true;
2112
2113 // are there any other metacharacters in the mask?
2114 size_t uiLenMask;
2115 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2116
2117 if ( pEndMask != NULL ) {
2118 // we have to match the string between two metachars
2119 uiLenMask = pEndMask - pszMask;
2120 }
2121 else {
2122 // we have to match the remainder of the string
2123 uiLenMask = wxStrlen(pszMask);
2124 }
2125
2126 wxString strToMatch(pszMask, uiLenMask);
2127 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2128 if ( pMatch == NULL )
2129 return false;
2130
2131 // -1 to compensate "++" in the loop
2132 pszTxt = pMatch + uiLenMask - 1;
2133 pszMask += uiLenMask - 1;
2134 }
2135 break;
2136
2137 default:
2138 if ( *pszMask != *pszTxt )
2139 return false;
2140 break;
2141 }
2142 }
2143
2144 // match only if nothing left
2145 if ( *pszTxt == wxT('\0') )
2146 return true;
2147
2148 // if we failed to match, backtrack if we can
2149 if ( pszLastStarInText ) {
2150 pszTxt = pszLastStarInText + 1;
2151 pszMask = pszLastStarInMask;
2152
2153 pszLastStarInText = NULL;
2154
2155 // don't bother resetting pszLastStarInMask, it's unnecessary
2156
2157 goto match;
2158 }
2159
2160 return false;
2161#endif // wxUSE_REGEX/!wxUSE_REGEX
2162}
2163
2164// Count the number of chars
2165int wxString::Freq(wxUniChar ch) const
2166{
2167 int count = 0;
2168 for ( const_iterator i = begin(); i != end(); ++i )
2169 {
2170 if ( *i == ch )
2171 count ++;
2172 }
2173 return count;
2174}
2175