]> git.saurik.com Git - wxWidgets.git/blob - src/common/string.cpp
Initial work on virtual file system support for the WebKitGTK+ backend. It now suppor...
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
16
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #include "wx/intl.h"
28 #include "wx/log.h"
29 #endif
30
31 #include <ctype.h>
32
33 #ifndef __WXWINCE__
34 #include <errno.h>
35 #endif
36
37 #include <string.h>
38 #include <stdlib.h>
39
40 #include "wx/hashmap.h"
41 #include "wx/vector.h"
42 #include "wx/xlocale.h"
43
44 #ifdef __WXMSW__
45 #include "wx/msw/wrapwin.h"
46 #endif // __WXMSW__
47
48 #if wxUSE_STD_IOSTREAM
49 #include <sstream>
50 #endif
51
52 // string handling functions used by wxString:
53 #if wxUSE_UNICODE_UTF8
54 #define wxStringMemcpy memcpy
55 #define wxStringMemcmp memcmp
56 #define wxStringMemchr memchr
57 #define wxStringStrlen strlen
58 #else
59 #define wxStringMemcpy wxTmemcpy
60 #define wxStringMemcmp wxTmemcmp
61 #define wxStringMemchr wxTmemchr
62 #define wxStringStrlen wxStrlen
63 #endif
64
65 // define a function declared in wx/buffer.h here as we don't have buffer.cpp
66 // and don't want to add it just because of this simple function
67 namespace wxPrivate
68 {
69
70 // wxXXXBuffer classes can be (implicitly) used during global statics
71 // initialization so wrap the status UntypedBufferData variable in a function
72 // to make it safe to access it even before all global statics are initialized
73 UntypedBufferData *GetUntypedNullData()
74 {
75 static UntypedBufferData s_untypedNullData(NULL, 0);
76
77 return &s_untypedNullData;
78 }
79
80 } // namespace wxPrivate
81
82 // ---------------------------------------------------------------------------
83 // static class variables definition
84 // ---------------------------------------------------------------------------
85
86 //According to STL _must_ be a -1 size_t
87 const size_t wxString::npos = (size_t) -1;
88
89 #if wxUSE_STRING_POS_CACHE
90
91 #ifdef wxHAS_COMPILER_TLS
92
93 wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
94
95 #else // !wxHAS_COMPILER_TLS
96
97 struct wxStrCacheInitializer
98 {
99 wxStrCacheInitializer()
100 {
101 // calling this function triggers s_cache initialization in it, and
102 // from now on it becomes safe to call from multiple threads
103 wxString::GetCache();
104 }
105 };
106
107 /*
108 wxString::Cache& wxString::GetCache()
109 {
110 static wxTLS_TYPE(Cache) s_cache;
111
112 return wxTLS_VALUE(s_cache);
113 }
114 */
115
116 static wxStrCacheInitializer gs_stringCacheInit;
117
118 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
119
120 // gdb seems to be unable to display thread-local variables correctly, at least
121 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
122 #if wxDEBUG_LEVEL >= 2
123
124 struct wxStrCacheDumper
125 {
126 static void ShowAll()
127 {
128 puts("*** wxString cache dump:");
129 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
130 {
131 const wxString::Cache::Element&
132 c = wxString::GetCacheBegin()[n];
133
134 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
135 n,
136 n == wxString::LastUsedCacheElement() ? " [*]" : "",
137 c.str,
138 (unsigned long)c.pos,
139 (unsigned long)c.impl,
140 (long)c.len);
141 }
142 }
143 };
144
145 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
146
147 #endif // wxDEBUG_LEVEL >= 2
148
149 #ifdef wxPROFILE_STRING_CACHE
150
151 wxString::CacheStats wxString::ms_cacheStats;
152
153 struct wxStrCacheStatsDumper
154 {
155 ~wxStrCacheStatsDumper()
156 {
157 const wxString::CacheStats& stats = wxString::ms_cacheStats;
158
159 if ( stats.postot )
160 {
161 puts("*** wxString cache statistics:");
162 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
163 stats.postot);
164 printf("\tHits %u (of which %u not used) or %.2f%%\n",
165 stats.poshits,
166 stats.mishits,
167 100.*float(stats.poshits - stats.mishits)/stats.postot);
168 printf("\tAverage position requested: %.2f\n",
169 float(stats.sumpos) / stats.postot);
170 printf("\tAverage offset after cached hint: %.2f\n",
171 float(stats.sumofs) / stats.postot);
172 }
173
174 if ( stats.lentot )
175 {
176 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
177 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
178 }
179 }
180 };
181
182 static wxStrCacheStatsDumper s_showCacheStats;
183
184 #endif // wxPROFILE_STRING_CACHE
185
186 #endif // wxUSE_STRING_POS_CACHE
187
188 // ----------------------------------------------------------------------------
189 // global functions
190 // ----------------------------------------------------------------------------
191
192 #if wxUSE_STD_IOSTREAM
193
194 #include <iostream>
195
196 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
197 {
198 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
199 const wxScopedCharBuffer buf(str.AsCharBuf());
200 if ( !buf )
201 os.clear(wxSTD ios_base::failbit);
202 else
203 os << buf.data();
204
205 return os;
206 #else
207 return os << str.AsInternal();
208 #endif
209 }
210
211 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
212 {
213 return os << str.c_str();
214 }
215
216 wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str)
217 {
218 return os << str.data();
219 }
220
221 #ifndef __BORLANDC__
222 wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str)
223 {
224 return os << str.data();
225 }
226 #endif
227
228 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
229
230 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
231 {
232 return wos << str.wc_str();
233 }
234
235 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
236 {
237 return wos << str.AsWChar();
238 }
239
240 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str)
241 {
242 return wos << str.data();
243 }
244
245 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
246
247 #endif // wxUSE_STD_IOSTREAM
248
249 // ===========================================================================
250 // wxString class core
251 // ===========================================================================
252
253 #if wxUSE_UNICODE_UTF8
254
255 void wxString::PosLenToImpl(size_t pos, size_t len,
256 size_t *implPos, size_t *implLen) const
257 {
258 if ( pos == npos )
259 {
260 *implPos = npos;
261 }
262 else // have valid start position
263 {
264 const const_iterator b = GetIterForNthChar(pos);
265 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
266 if ( len == npos )
267 {
268 *implLen = npos;
269 }
270 else // have valid length too
271 {
272 // we need to handle the case of length specifying a substring
273 // going beyond the end of the string, just as std::string does
274 const const_iterator e(end());
275 const_iterator i(b);
276 while ( len && i <= e )
277 {
278 ++i;
279 --len;
280 }
281
282 *implLen = i.impl() - b.impl();
283 }
284 }
285 }
286
287 #endif // wxUSE_UNICODE_UTF8
288
289 // ----------------------------------------------------------------------------
290 // wxCStrData converted strings caching
291 // ----------------------------------------------------------------------------
292
293 // FIXME-UTF8: temporarily disabled because it doesn't work with global
294 // string objects; re-enable after fixing this bug and benchmarking
295 // performance to see if using a hash is a good idea at all
296 #if 0
297
298 // For backward compatibility reasons, it must be possible to assign the value
299 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
300 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
301 // because the memory would be freed immediately, but it has to be valid as long
302 // as the string is not modified, so that code like this still works:
303 //
304 // const wxChar *s = str.c_str();
305 // while ( s ) { ... }
306
307 // FIXME-UTF8: not thread safe!
308 // FIXME-UTF8: we currently clear the cached conversion only when the string is
309 // destroyed, but we should do it when the string is modified, to
310 // keep memory usage down
311 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
312 // invalidated the cache on every change, we could keep the previous
313 // conversion
314 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
315 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
316
317 template<typename T>
318 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
319 {
320 typename T::iterator i = hash.find(wxConstCast(s, wxString));
321 if ( i != hash.end() )
322 {
323 free(i->second);
324 hash.erase(i);
325 }
326 }
327
328 #if wxUSE_UNICODE
329 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
330 // so we have to use wxString* here and const-cast when used
331 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
332 wxStringCharConversionCache);
333 static wxStringCharConversionCache gs_stringsCharCache;
334
335 const char* wxCStrData::AsChar() const
336 {
337 // remove previously cache value, if any (see FIXMEs above):
338 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
339
340 // convert the string and keep it:
341 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
342 m_str->mb_str().release();
343
344 return s + m_offset;
345 }
346 #endif // wxUSE_UNICODE
347
348 #if !wxUSE_UNICODE_WCHAR
349 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
350 wxStringWCharConversionCache);
351 static wxStringWCharConversionCache gs_stringsWCharCache;
352
353 const wchar_t* wxCStrData::AsWChar() const
354 {
355 // remove previously cache value, if any (see FIXMEs above):
356 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
357
358 // convert the string and keep it:
359 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
360 m_str->wc_str().release();
361
362 return s + m_offset;
363 }
364 #endif // !wxUSE_UNICODE_WCHAR
365
366 wxString::~wxString()
367 {
368 #if wxUSE_UNICODE
369 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
370 DeleteStringFromConversionCache(gs_stringsCharCache, this);
371 #endif
372 #if !wxUSE_UNICODE_WCHAR
373 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
374 #endif
375 }
376 #endif
377
378 // ===========================================================================
379 // wxString class core
380 // ===========================================================================
381
382 // ---------------------------------------------------------------------------
383 // construction and conversion
384 // ---------------------------------------------------------------------------
385
386 #if wxUSE_UNICODE_WCHAR
387 /* static */
388 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
389 const wxMBConv& conv)
390 {
391 // anything to do?
392 if ( !psz || nLength == 0 )
393 return SubstrBufFromMB(wxWCharBuffer(L""), 0);
394
395 if ( nLength == npos )
396 nLength = wxNO_LEN;
397
398 size_t wcLen;
399 wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
400 if ( !wcLen )
401 return SubstrBufFromMB(wxWCharBuffer(L""), 0);
402 else
403 return SubstrBufFromMB(wcBuf, wcLen);
404 }
405 #endif // wxUSE_UNICODE_WCHAR
406
407 #if wxUSE_UNICODE_UTF8
408 /* static */
409 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
410 const wxMBConv& conv)
411 {
412 // anything to do?
413 if ( !psz || nLength == 0 )
414 return SubstrBufFromMB(wxCharBuffer(""), 0);
415
416 // if psz is already in UTF-8, we don't have to do the roundtrip to
417 // wchar_t* and back:
418 if ( conv.IsUTF8() )
419 {
420 // we need to validate the input because UTF8 iterators assume valid
421 // UTF-8 sequence and psz may be invalid:
422 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
423 {
424 // we must pass the real string length to SubstrBufFromMB ctor
425 if ( nLength == npos )
426 nLength = psz ? strlen(psz) : 0;
427 return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz, nLength),
428 nLength);
429 }
430 // else: do the roundtrip through wchar_t*
431 }
432
433 if ( nLength == npos )
434 nLength = wxNO_LEN;
435
436 // first convert to wide string:
437 size_t wcLen;
438 wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
439 if ( !wcLen )
440 return SubstrBufFromMB(wxCharBuffer(""), 0);
441
442 // and then to UTF-8:
443 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
444 // widechar -> UTF-8 conversion isn't supposed to ever fail:
445 wxASSERT_MSG( buf.data, wxT("conversion to UTF-8 failed") );
446
447 return buf;
448 }
449 #endif // wxUSE_UNICODE_UTF8
450
451 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
452 /* static */
453 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
454 const wxMBConv& conv)
455 {
456 // anything to do?
457 if ( !pwz || nLength == 0 )
458 return SubstrBufFromWC(wxCharBuffer(""), 0);
459
460 if ( nLength == npos )
461 nLength = wxNO_LEN;
462
463 size_t mbLen;
464 wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
465 if ( !mbLen )
466 return SubstrBufFromWC(wxCharBuffer(""), 0);
467 else
468 return SubstrBufFromWC(mbBuf, mbLen);
469 }
470 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
471
472 // This std::string::c_str()-like method returns a wide char pointer to string
473 // contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return
474 // a pointer to the internal representation. Otherwise a conversion is required
475 // and it returns a temporary buffer.
476 //
477 // However for compatibility with c_str() and to avoid breaking existing code
478 // doing
479 //
480 // for ( const wchar_t *p = s.wc_str(); *p; p++ )
481 // ... use *p...
482 //
483 // we actually need to ensure that the returned buffer is _not_ temporary and
484 // so we use wxString::m_convertedToWChar to store the returned data
485 #if !wxUSE_UNICODE_WCHAR
486
487 const wchar_t *wxString::AsWChar(const wxMBConv& conv) const
488 {
489 const char * const strMB = m_impl.c_str();
490 const size_t lenMB = m_impl.length();
491
492 // find out the size of the buffer needed
493 const size_t lenWC = conv.ToWChar(NULL, 0, strMB, lenMB);
494 if ( lenWC == wxCONV_FAILED )
495 return NULL;
496
497 // keep the same buffer if the string size didn't change: this is not only
498 // an optimization but also ensure that code which modifies string
499 // character by character (without changing its length) can continue to use
500 // the pointer returned by a previous wc_str() call even after changing the
501 // string
502
503 // TODO-UTF8: we could check for ">" instead of "!=" here as this would
504 // allow to save on buffer reallocations but at the cost of
505 // consuming (even) more memory, we should benchmark this to
506 // determine if it's worth doing
507 if ( !m_convertedToWChar.m_str || lenWC != m_convertedToWChar.m_len )
508 {
509 if ( !const_cast<wxString *>(this)->m_convertedToWChar.Extend(lenWC) )
510 return NULL;
511 }
512
513 // finally do convert
514 m_convertedToWChar.m_str[lenWC] = L'\0';
515 if ( conv.ToWChar(m_convertedToWChar.m_str, lenWC,
516 strMB, lenMB) == wxCONV_FAILED )
517 return NULL;
518
519 return m_convertedToWChar.m_str;
520 }
521
522 #endif // !wxUSE_UNICODE_WCHAR
523
524
525 // Same thing for mb_str() which returns a normal char pointer to string
526 // contents: this always requires converting it to the specified encoding in
527 // non-ANSI build except if we need to convert to UTF-8 and this is what we
528 // already use internally.
529 #if wxUSE_UNICODE
530
531 const char *wxString::AsChar(const wxMBConv& conv) const
532 {
533 #if wxUSE_UNICODE_UTF8
534 if ( conv.IsUTF8() )
535 return m_impl.c_str();
536
537 const wchar_t * const strWC = AsWChar(wxMBConvStrictUTF8());
538 const size_t lenWC = m_convertedToWChar.m_len;
539 #else // wxUSE_UNICODE_WCHAR
540 const wchar_t * const strWC = m_impl.c_str();
541 const size_t lenWC = m_impl.length();
542 #endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR
543
544 const size_t lenMB = conv.FromWChar(NULL, 0, strWC, lenWC);
545 if ( lenMB == wxCONV_FAILED )
546 return NULL;
547
548 if ( !m_convertedToChar.m_str || lenMB != m_convertedToChar.m_len )
549 {
550 if ( !const_cast<wxString *>(this)->m_convertedToChar.Extend(lenMB) )
551 return NULL;
552 }
553
554 m_convertedToChar.m_str[lenMB] = '\0';
555 if ( conv.FromWChar(m_convertedToChar.m_str, lenMB,
556 strWC, lenWC) == wxCONV_FAILED )
557 return NULL;
558
559 return m_convertedToChar.m_str;
560 }
561
562 #endif // wxUSE_UNICODE
563
564 // shrink to minimal size (releasing extra memory)
565 bool wxString::Shrink()
566 {
567 wxString tmp(begin(), end());
568 swap(tmp);
569 return tmp.length() == length();
570 }
571
572 // deprecated compatibility code:
573 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
574 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
575 {
576 return DoGetWriteBuf(nLen);
577 }
578
579 void wxString::UngetWriteBuf()
580 {
581 DoUngetWriteBuf();
582 }
583
584 void wxString::UngetWriteBuf(size_t nLen)
585 {
586 DoUngetWriteBuf(nLen);
587 }
588 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
589
590
591 // ---------------------------------------------------------------------------
592 // data access
593 // ---------------------------------------------------------------------------
594
595 // all functions are inline in string.h
596
597 // ---------------------------------------------------------------------------
598 // concatenation operators
599 // ---------------------------------------------------------------------------
600
601 /*
602 * concatenation functions come in 5 flavours:
603 * string + string
604 * char + string and string + char
605 * C str + string and string + C str
606 */
607
608 wxString operator+(const wxString& str1, const wxString& str2)
609 {
610 #if !wxUSE_STL_BASED_WXSTRING
611 wxASSERT( str1.IsValid() );
612 wxASSERT( str2.IsValid() );
613 #endif
614
615 wxString s = str1;
616 s += str2;
617
618 return s;
619 }
620
621 wxString operator+(const wxString& str, wxUniChar ch)
622 {
623 #if !wxUSE_STL_BASED_WXSTRING
624 wxASSERT( str.IsValid() );
625 #endif
626
627 wxString s = str;
628 s += ch;
629
630 return s;
631 }
632
633 wxString operator+(wxUniChar ch, const wxString& str)
634 {
635 #if !wxUSE_STL_BASED_WXSTRING
636 wxASSERT( str.IsValid() );
637 #endif
638
639 wxString s = ch;
640 s += str;
641
642 return s;
643 }
644
645 wxString operator+(const wxString& str, const char *psz)
646 {
647 #if !wxUSE_STL_BASED_WXSTRING
648 wxASSERT( str.IsValid() );
649 #endif
650
651 wxString s;
652 if ( !s.Alloc(strlen(psz) + str.length()) ) {
653 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
654 }
655 s += str;
656 s += psz;
657
658 return s;
659 }
660
661 wxString operator+(const wxString& str, const wchar_t *pwz)
662 {
663 #if !wxUSE_STL_BASED_WXSTRING
664 wxASSERT( str.IsValid() );
665 #endif
666
667 wxString s;
668 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
669 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
670 }
671 s += str;
672 s += pwz;
673
674 return s;
675 }
676
677 wxString operator+(const char *psz, const wxString& str)
678 {
679 #if !wxUSE_STL_BASED_WXSTRING
680 wxASSERT( str.IsValid() );
681 #endif
682
683 wxString s;
684 if ( !s.Alloc(strlen(psz) + str.length()) ) {
685 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
686 }
687 s = psz;
688 s += str;
689
690 return s;
691 }
692
693 wxString operator+(const wchar_t *pwz, const wxString& str)
694 {
695 #if !wxUSE_STL_BASED_WXSTRING
696 wxASSERT( str.IsValid() );
697 #endif
698
699 wxString s;
700 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
701 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
702 }
703 s = pwz;
704 s += str;
705
706 return s;
707 }
708
709 // ---------------------------------------------------------------------------
710 // string comparison
711 // ---------------------------------------------------------------------------
712
713 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
714 {
715 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
716 : wxToupper(GetChar(0u)) == wxToupper(c));
717 }
718
719 #ifdef HAVE_STD_STRING_COMPARE
720
721 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
722 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
723 // sort strings in characters code point order by sorting the byte sequence
724 // in byte values order (i.e. what strcmp() and memcmp() do).
725
726 int wxString::compare(const wxString& str) const
727 {
728 return m_impl.compare(str.m_impl);
729 }
730
731 int wxString::compare(size_t nStart, size_t nLen,
732 const wxString& str) const
733 {
734 size_t pos, len;
735 PosLenToImpl(nStart, nLen, &pos, &len);
736 return m_impl.compare(pos, len, str.m_impl);
737 }
738
739 int wxString::compare(size_t nStart, size_t nLen,
740 const wxString& str,
741 size_t nStart2, size_t nLen2) const
742 {
743 size_t pos, len;
744 PosLenToImpl(nStart, nLen, &pos, &len);
745
746 size_t pos2, len2;
747 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
748
749 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
750 }
751
752 int wxString::compare(const char* sz) const
753 {
754 return m_impl.compare(ImplStr(sz));
755 }
756
757 int wxString::compare(const wchar_t* sz) const
758 {
759 return m_impl.compare(ImplStr(sz));
760 }
761
762 int wxString::compare(size_t nStart, size_t nLen,
763 const char* sz, size_t nCount) const
764 {
765 size_t pos, len;
766 PosLenToImpl(nStart, nLen, &pos, &len);
767
768 SubstrBufFromMB str(ImplStr(sz, nCount));
769
770 return m_impl.compare(pos, len, str.data, str.len);
771 }
772
773 int wxString::compare(size_t nStart, size_t nLen,
774 const wchar_t* sz, size_t nCount) const
775 {
776 size_t pos, len;
777 PosLenToImpl(nStart, nLen, &pos, &len);
778
779 SubstrBufFromWC str(ImplStr(sz, nCount));
780
781 return m_impl.compare(pos, len, str.data, str.len);
782 }
783
784 #else // !HAVE_STD_STRING_COMPARE
785
786 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
787 const wxStringCharType* s2, size_t l2)
788 {
789 if( l1 == l2 )
790 return wxStringMemcmp(s1, s2, l1);
791 else if( l1 < l2 )
792 {
793 int ret = wxStringMemcmp(s1, s2, l1);
794 return ret == 0 ? -1 : ret;
795 }
796 else
797 {
798 int ret = wxStringMemcmp(s1, s2, l2);
799 return ret == 0 ? +1 : ret;
800 }
801 }
802
803 int wxString::compare(const wxString& str) const
804 {
805 return ::wxDoCmp(m_impl.data(), m_impl.length(),
806 str.m_impl.data(), str.m_impl.length());
807 }
808
809 int wxString::compare(size_t nStart, size_t nLen,
810 const wxString& str) const
811 {
812 wxASSERT(nStart <= length());
813 size_type strLen = length() - nStart;
814 nLen = strLen < nLen ? strLen : nLen;
815
816 size_t pos, len;
817 PosLenToImpl(nStart, nLen, &pos, &len);
818
819 return ::wxDoCmp(m_impl.data() + pos, len,
820 str.m_impl.data(), str.m_impl.length());
821 }
822
823 int wxString::compare(size_t nStart, size_t nLen,
824 const wxString& str,
825 size_t nStart2, size_t nLen2) const
826 {
827 wxASSERT(nStart <= length());
828 wxASSERT(nStart2 <= str.length());
829 size_type strLen = length() - nStart,
830 strLen2 = str.length() - nStart2;
831 nLen = strLen < nLen ? strLen : nLen;
832 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
833
834 size_t pos, len;
835 PosLenToImpl(nStart, nLen, &pos, &len);
836 size_t pos2, len2;
837 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
838
839 return ::wxDoCmp(m_impl.data() + pos, len,
840 str.m_impl.data() + pos2, len2);
841 }
842
843 int wxString::compare(const char* sz) const
844 {
845 SubstrBufFromMB str(ImplStr(sz, npos));
846 if ( str.len == npos )
847 str.len = wxStringStrlen(str.data);
848 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
849 }
850
851 int wxString::compare(const wchar_t* sz) const
852 {
853 SubstrBufFromWC str(ImplStr(sz, npos));
854 if ( str.len == npos )
855 str.len = wxStringStrlen(str.data);
856 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
857 }
858
859 int wxString::compare(size_t nStart, size_t nLen,
860 const char* sz, size_t nCount) const
861 {
862 wxASSERT(nStart <= length());
863 size_type strLen = length() - nStart;
864 nLen = strLen < nLen ? strLen : nLen;
865
866 size_t pos, len;
867 PosLenToImpl(nStart, nLen, &pos, &len);
868
869 SubstrBufFromMB str(ImplStr(sz, nCount));
870 if ( str.len == npos )
871 str.len = wxStringStrlen(str.data);
872
873 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
874 }
875
876 int wxString::compare(size_t nStart, size_t nLen,
877 const wchar_t* sz, size_t nCount) const
878 {
879 wxASSERT(nStart <= length());
880 size_type strLen = length() - nStart;
881 nLen = strLen < nLen ? strLen : nLen;
882
883 size_t pos, len;
884 PosLenToImpl(nStart, nLen, &pos, &len);
885
886 SubstrBufFromWC str(ImplStr(sz, nCount));
887 if ( str.len == npos )
888 str.len = wxStringStrlen(str.data);
889
890 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
891 }
892
893 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
894
895
896 // ---------------------------------------------------------------------------
897 // find_{first,last}_[not]_of functions
898 // ---------------------------------------------------------------------------
899
900 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
901
902 // NB: All these functions are implemented with the argument being wxChar*,
903 // i.e. widechar string in any Unicode build, even though native string
904 // representation is char* in the UTF-8 build. This is because we couldn't
905 // use memchr() to determine if a character is in a set encoded as UTF-8.
906
907 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
908 {
909 return find_first_of(sz, nStart, wxStrlen(sz));
910 }
911
912 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
913 {
914 return find_first_not_of(sz, nStart, wxStrlen(sz));
915 }
916
917 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
918 {
919 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
920
921 size_t idx = nStart;
922 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
923 {
924 if ( wxTmemchr(sz, *i, n) )
925 return idx;
926 }
927
928 return npos;
929 }
930
931 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
932 {
933 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
934
935 size_t idx = nStart;
936 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
937 {
938 if ( !wxTmemchr(sz, *i, n) )
939 return idx;
940 }
941
942 return npos;
943 }
944
945
946 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
947 {
948 return find_last_of(sz, nStart, wxStrlen(sz));
949 }
950
951 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
952 {
953 return find_last_not_of(sz, nStart, wxStrlen(sz));
954 }
955
956 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
957 {
958 size_t len = length();
959
960 if ( nStart == npos )
961 {
962 nStart = len - 1;
963 }
964 else
965 {
966 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
967 }
968
969 size_t idx = nStart;
970 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
971 i != rend(); --idx, ++i )
972 {
973 if ( wxTmemchr(sz, *i, n) )
974 return idx;
975 }
976
977 return npos;
978 }
979
980 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
981 {
982 size_t len = length();
983
984 if ( nStart == npos )
985 {
986 nStart = len - 1;
987 }
988 else
989 {
990 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
991 }
992
993 size_t idx = nStart;
994 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
995 i != rend(); --idx, ++i )
996 {
997 if ( !wxTmemchr(sz, *i, n) )
998 return idx;
999 }
1000
1001 return npos;
1002 }
1003
1004 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
1005 {
1006 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
1007
1008 size_t idx = nStart;
1009 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1010 {
1011 if ( *i != ch )
1012 return idx;
1013 }
1014
1015 return npos;
1016 }
1017
1018 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1019 {
1020 size_t len = length();
1021
1022 if ( nStart == npos )
1023 {
1024 nStart = len - 1;
1025 }
1026 else
1027 {
1028 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
1029 }
1030
1031 size_t idx = nStart;
1032 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1033 i != rend(); --idx, ++i )
1034 {
1035 if ( *i != ch )
1036 return idx;
1037 }
1038
1039 return npos;
1040 }
1041
1042 // the functions above were implemented for wchar_t* arguments in Unicode
1043 // build and char* in ANSI build; below are implementations for the other
1044 // version:
1045 #if wxUSE_UNICODE
1046 #define wxOtherCharType char
1047 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1048 #else
1049 #define wxOtherCharType wchar_t
1050 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1051 #endif
1052
1053 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1054 { return find_first_of(STRCONV(sz), nStart); }
1055
1056 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1057 size_t n) const
1058 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1059 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1060 { return find_last_of(STRCONV(sz), nStart); }
1061 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1062 size_t n) const
1063 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1064 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1065 { return find_first_not_of(STRCONV(sz), nStart); }
1066 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1067 size_t n) const
1068 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1069 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1070 { return find_last_not_of(STRCONV(sz), nStart); }
1071 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1072 size_t n) const
1073 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1074
1075 #undef wxOtherCharType
1076 #undef STRCONV
1077
1078 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1079
1080 // ===========================================================================
1081 // other common string functions
1082 // ===========================================================================
1083
1084 int wxString::CmpNoCase(const wxString& s) const
1085 {
1086 #if !wxUSE_UNICODE_UTF8
1087 // We compare NUL-delimited chunks of the strings inside the loop. We will
1088 // do as many iterations as there are embedded NULs in the string, i.e.
1089 // usually we will run it just once.
1090
1091 typedef const wxStringImpl::value_type *pchar_type;
1092 const pchar_type thisBegin = m_impl.c_str();
1093 const pchar_type thatBegin = s.m_impl.c_str();
1094
1095 const pchar_type thisEnd = thisBegin + m_impl.length();
1096 const pchar_type thatEnd = thatBegin + s.m_impl.length();
1097
1098 pchar_type thisCur = thisBegin;
1099 pchar_type thatCur = thatBegin;
1100
1101 int rc;
1102 for ( ;; )
1103 {
1104 // Compare until the next NUL, if the strings differ this is the final
1105 // result.
1106 rc = wxStricmp(thisCur, thatCur);
1107 if ( rc )
1108 break;
1109
1110 const size_t lenChunk = wxStrlen(thisCur);
1111 thisCur += lenChunk;
1112 thatCur += lenChunk;
1113
1114 // Skip all the NULs as wxStricmp() doesn't handle them.
1115 for ( ; !*thisCur; thisCur++, thatCur++ )
1116 {
1117 // Check if we exhausted either of the strings.
1118 if ( thisCur == thisEnd )
1119 {
1120 // This one is exhausted, is the other one too?
1121 return thatCur == thatEnd ? 0 : -1;
1122 }
1123
1124 if ( thatCur == thatEnd )
1125 {
1126 // Because of the test above we know that this one is not
1127 // exhausted yet so it's greater than the other one that is.
1128 return 1;
1129 }
1130
1131 if ( *thatCur )
1132 {
1133 // Anything non-NUL is greater than NUL.
1134 return -1;
1135 }
1136 }
1137 }
1138
1139 return rc;
1140 #else // wxUSE_UNICODE_UTF8
1141 // CRT functions can't be used for case-insensitive comparison of UTF-8
1142 // strings so do it in the naive, simple and inefficient way.
1143
1144 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1145 const_iterator i1 = begin();
1146 const_iterator end1 = end();
1147 const_iterator i2 = s.begin();
1148 const_iterator end2 = s.end();
1149
1150 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1151 {
1152 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1153 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1154 if ( lower1 != lower2 )
1155 return lower1 < lower2 ? -1 : 1;
1156 }
1157
1158 size_t len1 = length();
1159 size_t len2 = s.length();
1160
1161 if ( len1 < len2 )
1162 return -1;
1163 else if ( len1 > len2 )
1164 return 1;
1165 return 0;
1166 #endif // !wxUSE_UNICODE_UTF8/wxUSE_UNICODE_UTF8
1167 }
1168
1169
1170 #if wxUSE_UNICODE
1171
1172 #ifdef __MWERKS__
1173 #ifndef __SCHAR_MAX__
1174 #define __SCHAR_MAX__ 127
1175 #endif
1176 #endif
1177
1178 wxString wxString::FromAscii(const char *ascii, size_t len)
1179 {
1180 if (!ascii || len == 0)
1181 return wxEmptyString;
1182
1183 wxString res;
1184
1185 {
1186 wxStringInternalBuffer buf(res, len);
1187 wxStringCharType *dest = buf;
1188
1189 for ( ; len > 0; --len )
1190 {
1191 unsigned char c = (unsigned char)*ascii++;
1192 wxASSERT_MSG( c < 0x80,
1193 wxT("Non-ASCII value passed to FromAscii().") );
1194
1195 *dest++ = (wchar_t)c;
1196 }
1197 }
1198
1199 return res;
1200 }
1201
1202 wxString wxString::FromAscii(const char *ascii)
1203 {
1204 return FromAscii(ascii, wxStrlen(ascii));
1205 }
1206
1207 wxString wxString::FromAscii(char ascii)
1208 {
1209 // What do we do with '\0' ?
1210
1211 unsigned char c = (unsigned char)ascii;
1212
1213 wxASSERT_MSG( c < 0x80, wxT("Non-ASCII value passed to FromAscii().") );
1214
1215 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1216 return wxString(wxUniChar((wchar_t)c));
1217 }
1218
1219 const wxScopedCharBuffer wxString::ToAscii() const
1220 {
1221 // this will allocate enough space for the terminating NUL too
1222 wxCharBuffer buffer(length());
1223 char *dest = buffer.data();
1224
1225 for ( const_iterator i = begin(); i != end(); ++i )
1226 {
1227 wxUniChar c(*i);
1228 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1229 *dest++ = c.IsAscii() ? (char)c : '_';
1230
1231 // the output string can't have embedded NULs anyhow, so we can safely
1232 // stop at first of them even if we do have any
1233 if ( !c )
1234 break;
1235 }
1236
1237 return buffer;
1238 }
1239
1240 #endif // wxUSE_UNICODE
1241
1242 // extract string of length nCount starting at nFirst
1243 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1244 {
1245 size_t nLen = length();
1246
1247 // default value of nCount is npos and means "till the end"
1248 if ( nCount == npos )
1249 {
1250 nCount = nLen - nFirst;
1251 }
1252
1253 // out-of-bounds requests return sensible things
1254 if ( nFirst + nCount > nLen )
1255 {
1256 nCount = nLen - nFirst;
1257 }
1258
1259 if ( nFirst > nLen )
1260 {
1261 // AllocCopy() will return empty string
1262 return wxEmptyString;
1263 }
1264
1265 wxString dest(*this, nFirst, nCount);
1266 if ( dest.length() != nCount )
1267 {
1268 wxFAIL_MSG( wxT("out of memory in wxString::Mid") );
1269 }
1270
1271 return dest;
1272 }
1273
1274 // check that the string starts with prefix and return the rest of the string
1275 // in the provided pointer if it is not NULL, otherwise return false
1276 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1277 {
1278 if ( compare(0, prefix.length(), prefix) != 0 )
1279 return false;
1280
1281 if ( rest )
1282 {
1283 // put the rest of the string into provided pointer
1284 rest->assign(*this, prefix.length(), npos);
1285 }
1286
1287 return true;
1288 }
1289
1290
1291 // check that the string ends with suffix and return the rest of it in the
1292 // provided pointer if it is not NULL, otherwise return false
1293 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1294 {
1295 int start = length() - suffix.length();
1296
1297 if ( start < 0 || compare(start, npos, suffix) != 0 )
1298 return false;
1299
1300 if ( rest )
1301 {
1302 // put the rest of the string into provided pointer
1303 rest->assign(*this, 0, start);
1304 }
1305
1306 return true;
1307 }
1308
1309
1310 // extract nCount last (rightmost) characters
1311 wxString wxString::Right(size_t nCount) const
1312 {
1313 if ( nCount > length() )
1314 nCount = length();
1315
1316 wxString dest(*this, length() - nCount, nCount);
1317 if ( dest.length() != nCount ) {
1318 wxFAIL_MSG( wxT("out of memory in wxString::Right") );
1319 }
1320 return dest;
1321 }
1322
1323 // get all characters after the last occurrence of ch
1324 // (returns the whole string if ch not found)
1325 wxString wxString::AfterLast(wxUniChar ch) const
1326 {
1327 wxString str;
1328 int iPos = Find(ch, true);
1329 if ( iPos == wxNOT_FOUND )
1330 str = *this;
1331 else
1332 str.assign(*this, iPos + 1, npos);
1333
1334 return str;
1335 }
1336
1337 // extract nCount first (leftmost) characters
1338 wxString wxString::Left(size_t nCount) const
1339 {
1340 if ( nCount > length() )
1341 nCount = length();
1342
1343 wxString dest(*this, 0, nCount);
1344 if ( dest.length() != nCount ) {
1345 wxFAIL_MSG( wxT("out of memory in wxString::Left") );
1346 }
1347 return dest;
1348 }
1349
1350 // get all characters before the first occurrence of ch
1351 // (returns the whole string if ch not found)
1352 wxString wxString::BeforeFirst(wxUniChar ch, wxString *rest) const
1353 {
1354 int iPos = Find(ch);
1355 if ( iPos == wxNOT_FOUND )
1356 {
1357 iPos = length();
1358 if ( rest )
1359 rest->clear();
1360 }
1361 else
1362 {
1363 if ( rest )
1364 rest->assign(*this, iPos + 1, npos);
1365 }
1366
1367 return wxString(*this, 0, iPos);
1368 }
1369
1370 /// get all characters before the last occurrence of ch
1371 /// (returns empty string if ch not found)
1372 wxString wxString::BeforeLast(wxUniChar ch, wxString *rest) const
1373 {
1374 wxString str;
1375 int iPos = Find(ch, true);
1376 if ( iPos != wxNOT_FOUND )
1377 {
1378 if ( iPos != 0 )
1379 str.assign(*this, 0, iPos);
1380
1381 if ( rest )
1382 rest->assign(*this, iPos + 1, npos);
1383 }
1384 else
1385 {
1386 if ( rest )
1387 *rest = *this;
1388 }
1389
1390 return str;
1391 }
1392
1393 /// get all characters after the first occurrence of ch
1394 /// (returns empty string if ch not found)
1395 wxString wxString::AfterFirst(wxUniChar ch) const
1396 {
1397 wxString str;
1398 int iPos = Find(ch);
1399 if ( iPos != wxNOT_FOUND )
1400 str.assign(*this, iPos + 1, npos);
1401
1402 return str;
1403 }
1404
1405 // replace first (or all) occurrences of some substring with another one
1406 size_t wxString::Replace(const wxString& strOld,
1407 const wxString& strNew, bool bReplaceAll)
1408 {
1409 // if we tried to replace an empty string we'd enter an infinite loop below
1410 wxCHECK_MSG( !strOld.empty(), 0,
1411 wxT("wxString::Replace(): invalid parameter") );
1412
1413 wxSTRING_INVALIDATE_CACHE();
1414
1415 size_t uiCount = 0; // count of replacements made
1416
1417 // optimize the special common case: replacement of one character by
1418 // another one (in UTF-8 case we can only do this for ASCII characters)
1419 //
1420 // benchmarks show that this special version is around 3 times faster
1421 // (depending on the proportion of matching characters and UTF-8/wchar_t
1422 // build)
1423 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1424 {
1425 const wxStringCharType chOld = strOld.m_impl[0],
1426 chNew = strNew.m_impl[0];
1427
1428 // this loop is the simplified version of the one below
1429 for ( size_t pos = 0; ; )
1430 {
1431 pos = m_impl.find(chOld, pos);
1432 if ( pos == npos )
1433 break;
1434
1435 m_impl[pos++] = chNew;
1436
1437 uiCount++;
1438
1439 if ( !bReplaceAll )
1440 break;
1441 }
1442 }
1443 else if ( !bReplaceAll)
1444 {
1445 size_t pos = m_impl.find(strOld, 0);
1446 if ( pos != npos )
1447 {
1448 m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl);
1449 uiCount = 1;
1450 }
1451 }
1452 else // replace all occurrences
1453 {
1454 const size_t uiOldLen = strOld.m_impl.length();
1455 const size_t uiNewLen = strNew.m_impl.length();
1456
1457 // first scan the string to find all positions at which the replacement
1458 // should be made
1459 wxVector<size_t> replacePositions;
1460
1461 size_t pos;
1462 for ( pos = m_impl.find(strOld.m_impl, 0);
1463 pos != npos;
1464 pos = m_impl.find(strOld.m_impl, pos + uiOldLen))
1465 {
1466 replacePositions.push_back(pos);
1467 ++uiCount;
1468 }
1469
1470 if ( !uiCount )
1471 return 0;
1472
1473 // allocate enough memory for the whole new string
1474 wxString tmp;
1475 tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen));
1476
1477 // copy this string to tmp doing replacements on the fly
1478 size_t replNum = 0;
1479 for ( pos = 0; replNum < uiCount; replNum++ )
1480 {
1481 const size_t nextReplPos = replacePositions[replNum];
1482
1483 if ( pos != nextReplPos )
1484 {
1485 tmp.m_impl.append(m_impl, pos, nextReplPos - pos);
1486 }
1487
1488 tmp.m_impl.append(strNew.m_impl);
1489 pos = nextReplPos + uiOldLen;
1490 }
1491
1492 if ( pos != m_impl.length() )
1493 {
1494 // append the rest of the string unchanged
1495 tmp.m_impl.append(m_impl, pos, m_impl.length() - pos);
1496 }
1497
1498 swap(tmp);
1499 }
1500
1501 return uiCount;
1502 }
1503
1504 bool wxString::IsAscii() const
1505 {
1506 for ( const_iterator i = begin(); i != end(); ++i )
1507 {
1508 if ( !(*i).IsAscii() )
1509 return false;
1510 }
1511
1512 return true;
1513 }
1514
1515 bool wxString::IsWord() const
1516 {
1517 for ( const_iterator i = begin(); i != end(); ++i )
1518 {
1519 if ( !wxIsalpha(*i) )
1520 return false;
1521 }
1522
1523 return true;
1524 }
1525
1526 bool wxString::IsNumber() const
1527 {
1528 if ( empty() )
1529 return true;
1530
1531 const_iterator i = begin();
1532
1533 if ( *i == wxT('-') || *i == wxT('+') )
1534 ++i;
1535
1536 for ( ; i != end(); ++i )
1537 {
1538 if ( !wxIsdigit(*i) )
1539 return false;
1540 }
1541
1542 return true;
1543 }
1544
1545 wxString wxString::Strip(stripType w) const
1546 {
1547 wxString s = *this;
1548 if ( w & leading ) s.Trim(false);
1549 if ( w & trailing ) s.Trim(true);
1550 return s;
1551 }
1552
1553 // ---------------------------------------------------------------------------
1554 // case conversion
1555 // ---------------------------------------------------------------------------
1556
1557 wxString& wxString::MakeUpper()
1558 {
1559 for ( iterator it = begin(), en = end(); it != en; ++it )
1560 *it = (wxChar)wxToupper(*it);
1561
1562 return *this;
1563 }
1564
1565 wxString& wxString::MakeLower()
1566 {
1567 for ( iterator it = begin(), en = end(); it != en; ++it )
1568 *it = (wxChar)wxTolower(*it);
1569
1570 return *this;
1571 }
1572
1573 wxString& wxString::MakeCapitalized()
1574 {
1575 const iterator en = end();
1576 iterator it = begin();
1577 if ( it != en )
1578 {
1579 *it = (wxChar)wxToupper(*it);
1580 for ( ++it; it != en; ++it )
1581 *it = (wxChar)wxTolower(*it);
1582 }
1583
1584 return *this;
1585 }
1586
1587 // ---------------------------------------------------------------------------
1588 // trimming and padding
1589 // ---------------------------------------------------------------------------
1590
1591 // some compilers (VC++ 6.0 not to name them) return true for a call to
1592 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1593 // to live with this by checking that the character is a 7 bit one - even if
1594 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1595 // space-like symbols somewhere except in the first 128 chars), it is arguably
1596 // still better than trimming away accented letters
1597 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1598
1599 // trims spaces (in the sense of isspace) from left or right side
1600 wxString& wxString::Trim(bool bFromRight)
1601 {
1602 // first check if we're going to modify the string at all
1603 if ( !empty() &&
1604 (
1605 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1606 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1607 )
1608 )
1609 {
1610 if ( bFromRight )
1611 {
1612 // find last non-space character
1613 reverse_iterator psz = rbegin();
1614 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1615 ++psz;
1616
1617 // truncate at trailing space start
1618 erase(psz.base(), end());
1619 }
1620 else
1621 {
1622 // find first non-space character
1623 iterator psz = begin();
1624 while ( (psz != end()) && wxSafeIsspace(*psz) )
1625 ++psz;
1626
1627 // fix up data and length
1628 erase(begin(), psz);
1629 }
1630 }
1631
1632 return *this;
1633 }
1634
1635 // adds nCount characters chPad to the string from either side
1636 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1637 {
1638 wxString s(chPad, nCount);
1639
1640 if ( bFromRight )
1641 *this += s;
1642 else
1643 {
1644 s += *this;
1645 swap(s);
1646 }
1647
1648 return *this;
1649 }
1650
1651 // truncate the string
1652 wxString& wxString::Truncate(size_t uiLen)
1653 {
1654 if ( uiLen < length() )
1655 {
1656 erase(begin() + uiLen, end());
1657 }
1658 //else: nothing to do, string is already short enough
1659
1660 return *this;
1661 }
1662
1663 // ---------------------------------------------------------------------------
1664 // finding (return wxNOT_FOUND if not found and index otherwise)
1665 // ---------------------------------------------------------------------------
1666
1667 // find a character
1668 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1669 {
1670 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1671
1672 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1673 }
1674
1675 // ----------------------------------------------------------------------------
1676 // conversion to numbers
1677 // ----------------------------------------------------------------------------
1678
1679 // The implementation of all the functions below is exactly the same so factor
1680 // it out. Note that number extraction works correctly on UTF-8 strings, so
1681 // we can use wxStringCharType and wx_str() for maximum efficiency.
1682
1683 #ifndef __WXWINCE__
1684 #define DO_IF_NOT_WINCE(x) x
1685 #else
1686 #define DO_IF_NOT_WINCE(x)
1687 #endif
1688
1689 #define WX_STRING_TO_X_TYPE_START \
1690 wxCHECK_MSG( pVal, false, wxT("NULL output pointer") ); \
1691 DO_IF_NOT_WINCE( errno = 0; ) \
1692 const wxStringCharType *start = wx_str(); \
1693 wxStringCharType *end;
1694
1695 // notice that we return false without modifying the output parameter at all if
1696 // nothing could be parsed but we do modify it and return false then if we did
1697 // parse something successfully but not the entire string
1698 #define WX_STRING_TO_X_TYPE_END \
1699 if ( end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1700 return false; \
1701 *pVal = val; \
1702 return !*end;
1703
1704 bool wxString::ToLong(long *pVal, int base) const
1705 {
1706 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1707
1708 WX_STRING_TO_X_TYPE_START
1709 long val = wxStrtol(start, &end, base);
1710 WX_STRING_TO_X_TYPE_END
1711 }
1712
1713 bool wxString::ToULong(unsigned long *pVal, int base) const
1714 {
1715 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1716
1717 WX_STRING_TO_X_TYPE_START
1718 unsigned long val = wxStrtoul(start, &end, base);
1719 WX_STRING_TO_X_TYPE_END
1720 }
1721
1722 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1723 {
1724 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1725
1726 WX_STRING_TO_X_TYPE_START
1727 wxLongLong_t val = wxStrtoll(start, &end, base);
1728 WX_STRING_TO_X_TYPE_END
1729 }
1730
1731 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1732 {
1733 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1734
1735 WX_STRING_TO_X_TYPE_START
1736 wxULongLong_t val = wxStrtoull(start, &end, base);
1737 WX_STRING_TO_X_TYPE_END
1738 }
1739
1740 bool wxString::ToDouble(double *pVal) const
1741 {
1742 WX_STRING_TO_X_TYPE_START
1743 double val = wxStrtod(start, &end);
1744 WX_STRING_TO_X_TYPE_END
1745 }
1746
1747 #if wxUSE_XLOCALE
1748
1749 bool wxString::ToCLong(long *pVal, int base) const
1750 {
1751 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1752
1753 WX_STRING_TO_X_TYPE_START
1754 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1755 long val = wxStrtol_lA(start, &end, base, wxCLocale);
1756 #else
1757 long val = wxStrtol_l(start, &end, base, wxCLocale);
1758 #endif
1759 WX_STRING_TO_X_TYPE_END
1760 }
1761
1762 bool wxString::ToCULong(unsigned long *pVal, int base) const
1763 {
1764 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1765
1766 WX_STRING_TO_X_TYPE_START
1767 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1768 unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale);
1769 #else
1770 unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale);
1771 #endif
1772 WX_STRING_TO_X_TYPE_END
1773 }
1774
1775 bool wxString::ToCDouble(double *pVal) const
1776 {
1777 WX_STRING_TO_X_TYPE_START
1778 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1779 double val = wxStrtod_lA(start, &end, wxCLocale);
1780 #else
1781 double val = wxStrtod_l(start, &end, wxCLocale);
1782 #endif
1783 WX_STRING_TO_X_TYPE_END
1784 }
1785
1786 #else // wxUSE_XLOCALE
1787
1788 // Provide implementation of these functions even when wxUSE_XLOCALE is
1789 // disabled, we still need them in wxWidgets internal code.
1790
1791 // For integers we just assume the current locale uses the same number
1792 // representation as the C one as there is nothing else we can do.
1793 bool wxString::ToCLong(long *pVal, int base) const
1794 {
1795 return ToLong(pVal, base);
1796 }
1797
1798 bool wxString::ToCULong(unsigned long *pVal, int base) const
1799 {
1800 return ToULong(pVal, base);
1801 }
1802
1803 // For floating point numbers we have to handle the problem of the decimal
1804 // point which is different in different locales.
1805 bool wxString::ToCDouble(double *pVal) const
1806 {
1807 // Create a copy of this string using the decimal point instead of whatever
1808 // separator the current locale uses.
1809 #if wxUSE_INTL
1810 wxString sep = wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT,
1811 wxLOCALE_CAT_NUMBER);
1812 if ( sep == "." )
1813 {
1814 // We can avoid an unnecessary string copy in this case.
1815 return ToDouble(pVal);
1816 }
1817 #else // !wxUSE_INTL
1818 // We don't know what the current separator is so it might even be a point
1819 // already, try to parse the string as a double:
1820 if ( ToDouble(pVal) )
1821 {
1822 // It must have been the point, nothing else to do.
1823 return true;
1824 }
1825
1826 // Try to guess the separator, using the most common alternative value.
1827 wxString sep(",");
1828 #endif // wxUSE_INTL/!wxUSE_INTL
1829 wxString cstr(*this);
1830 cstr.Replace(".", sep);
1831
1832 return cstr.ToDouble(pVal);
1833 }
1834
1835 #endif // wxUSE_XLOCALE/!wxUSE_XLOCALE
1836
1837 // ----------------------------------------------------------------------------
1838 // number to string conversion
1839 // ----------------------------------------------------------------------------
1840
1841 /* static */
1842 wxString wxString::FromDouble(double val, int precision)
1843 {
1844 wxCHECK_MSG( precision >= -1, wxString(), "Invalid negative precision" );
1845
1846 wxString format;
1847 if ( precision == -1 )
1848 {
1849 format = "%g";
1850 }
1851 else // Use fixed precision.
1852 {
1853 format.Printf("%%.%df", precision);
1854 }
1855
1856 return wxString::Format(format, val);
1857 }
1858
1859 /* static */
1860 wxString wxString::FromCDouble(double val, int precision)
1861 {
1862 wxCHECK_MSG( precision >= -1, wxString(), "Invalid negative precision" );
1863
1864 #if wxUSE_STD_IOSTREAM && wxUSE_STD_STRING
1865 // We assume that we can use the ostream and not wstream for numbers.
1866 wxSTD ostringstream os;
1867 if ( precision != -1 )
1868 {
1869 os.precision(precision);
1870 os.setf(std::ios::fixed, std::ios::floatfield);
1871 }
1872
1873 os << val;
1874 return os.str();
1875 #else // !wxUSE_STD_IOSTREAM
1876 // Can't use iostream locale support, fall back to the manual method
1877 // instead.
1878 wxString s = FromDouble(val, precision);
1879 #if wxUSE_INTL
1880 wxString sep = wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT,
1881 wxLOCALE_CAT_NUMBER);
1882 #else // !wxUSE_INTL
1883 // As above, this is the most common alternative value. Notice that here it
1884 // doesn't matter if we guess wrongly and the current separator is already
1885 // ".": we'll just waste a call to Replace() in this case.
1886 wxString sep(",");
1887 #endif // wxUSE_INTL/!wxUSE_INTL
1888
1889 s.Replace(sep, ".");
1890 return s;
1891 #endif // wxUSE_STD_IOSTREAM/!wxUSE_STD_IOSTREAM
1892 }
1893
1894 // ---------------------------------------------------------------------------
1895 // formatted output
1896 // ---------------------------------------------------------------------------
1897
1898 #if !wxUSE_UTF8_LOCALE_ONLY
1899 /* static */
1900 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1901 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1902 #else
1903 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1904 #endif
1905 {
1906 va_list argptr;
1907 va_start(argptr, format);
1908
1909 wxString s;
1910 s.PrintfV(format, argptr);
1911
1912 va_end(argptr);
1913
1914 return s;
1915 }
1916 #endif // !wxUSE_UTF8_LOCALE_ONLY
1917
1918 #if wxUSE_UNICODE_UTF8
1919 /* static */
1920 wxString wxString::DoFormatUtf8(const char *format, ...)
1921 {
1922 va_list argptr;
1923 va_start(argptr, format);
1924
1925 wxString s;
1926 s.PrintfV(format, argptr);
1927
1928 va_end(argptr);
1929
1930 return s;
1931 }
1932 #endif // wxUSE_UNICODE_UTF8
1933
1934 /* static */
1935 wxString wxString::FormatV(const wxString& format, va_list argptr)
1936 {
1937 wxString s;
1938 s.PrintfV(format, argptr);
1939 return s;
1940 }
1941
1942 #if !wxUSE_UTF8_LOCALE_ONLY
1943 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1944 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1945 #else
1946 int wxString::DoPrintfWchar(const wxChar *format, ...)
1947 #endif
1948 {
1949 va_list argptr;
1950 va_start(argptr, format);
1951
1952 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1953 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1954 // because it's the only cast that works safely for downcasting when
1955 // multiple inheritance is used:
1956 wxString *str = static_cast<wxString*>(this);
1957 #else
1958 wxString *str = this;
1959 #endif
1960
1961 int iLen = str->PrintfV(format, argptr);
1962
1963 va_end(argptr);
1964
1965 return iLen;
1966 }
1967 #endif // !wxUSE_UTF8_LOCALE_ONLY
1968
1969 #if wxUSE_UNICODE_UTF8
1970 int wxString::DoPrintfUtf8(const char *format, ...)
1971 {
1972 va_list argptr;
1973 va_start(argptr, format);
1974
1975 int iLen = PrintfV(format, argptr);
1976
1977 va_end(argptr);
1978
1979 return iLen;
1980 }
1981 #endif // wxUSE_UNICODE_UTF8
1982
1983 /*
1984 Uses wxVsnprintf and places the result into the this string.
1985
1986 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1987 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1988 the ISO C99 (and thus SUSv3) standard the return value for the case of
1989 an undersized buffer is inconsistent. For conforming vsnprintf
1990 implementations the function must return the number of characters that
1991 would have been printed had the buffer been large enough. For conforming
1992 vswprintf implementations the function must return a negative number
1993 and set errno.
1994
1995 What vswprintf sets errno to is undefined but Darwin seems to set it to
1996 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1997 those are defined in the standard and backed up by several conformance
1998 statements. Note that ENOMEM mentioned in the manual page does not
1999 apply to swprintf, only wprintf and fwprintf.
2000
2001 Official manual page:
2002 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
2003
2004 Some conformance statements (AIX, Solaris):
2005 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
2006 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
2007
2008 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
2009 EILSEQ and EINVAL are specifically defined to mean the error is other than
2010 an undersized buffer and no other errno are defined we treat those two
2011 as meaning hard errors and everything else gets the old behaviour which
2012 is to keep looping and increasing buffer size until the function succeeds.
2013
2014 In practice it's impossible to determine before compilation which behaviour
2015 may be used. The vswprintf function may have vsnprintf-like behaviour or
2016 vice-versa. Behaviour detected on one release can theoretically change
2017 with an updated release. Not to mention that configure testing for it
2018 would require the test to be run on the host system, not the build system
2019 which makes cross compilation difficult. Therefore, we make no assumptions
2020 about behaviour and try our best to handle every known case, including the
2021 case where wxVsnprintf returns a negative number and fails to set errno.
2022
2023 There is yet one more non-standard implementation and that is our own.
2024 Fortunately, that can be detected at compile-time.
2025
2026 On top of all that, ISO C99 explicitly defines snprintf to write a null
2027 character to the last position of the specified buffer. That would be at
2028 at the given buffer size minus 1. It is supposed to do this even if it
2029 turns out that the buffer is sized too small.
2030
2031 Darwin (tested on 10.5) follows the C99 behaviour exactly.
2032
2033 Glibc 2.6 almost follows the C99 behaviour except vswprintf never sets
2034 errno even when it fails. However, it only seems to ever fail due
2035 to an undersized buffer.
2036 */
2037 #if wxUSE_UNICODE_UTF8
2038 template<typename BufferType>
2039 #else
2040 // we only need one version in non-UTF8 builds and at least two Windows
2041 // compilers have problems with this function template, so use just one
2042 // normal function here
2043 #endif
2044 static int DoStringPrintfV(wxString& str,
2045 const wxString& format, va_list argptr)
2046 {
2047 int size = 1024;
2048
2049 for ( ;; )
2050 {
2051 #if wxUSE_UNICODE_UTF8
2052 BufferType tmp(str, size + 1);
2053 typename BufferType::CharType *buf = tmp;
2054 #else
2055 wxStringBuffer tmp(str, size + 1);
2056 wxChar *buf = tmp;
2057 #endif
2058
2059 if ( !buf )
2060 {
2061 // out of memory
2062 return -1;
2063 }
2064
2065 // wxVsnprintf() may modify the original arg pointer, so pass it
2066 // only a copy
2067 va_list argptrcopy;
2068 wxVaCopy(argptrcopy, argptr);
2069
2070 #ifndef __WXWINCE__
2071 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
2072 errno = 0;
2073 #endif
2074 int len = wxVsnprintf(buf, size, format, argptrcopy);
2075 va_end(argptrcopy);
2076
2077 // some implementations of vsnprintf() don't NUL terminate
2078 // the string if there is not enough space for it so
2079 // always do it manually
2080 // FIXME: This really seems to be the wrong and would be an off-by-one
2081 // bug except the code above allocates an extra character.
2082 buf[size] = wxT('\0');
2083
2084 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
2085 // total number of characters which would have been written if the
2086 // buffer were large enough (newer standards such as Unix98)
2087 if ( len < 0 )
2088 {
2089 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
2090 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
2091 // is true if *both* of them use our own implementation,
2092 // otherwise we can't be sure
2093 #if wxUSE_WXVSNPRINTF
2094 // we know that our own implementation of wxVsnprintf() returns -1
2095 // only for a format error - thus there's something wrong with
2096 // the user's format string
2097 buf[0] = '\0';
2098 return -1;
2099 #else // possibly using system version
2100 // assume it only returns error if there is not enough space, but
2101 // as we don't know how much we need, double the current size of
2102 // the buffer
2103 #ifndef __WXWINCE__
2104 if( (errno == EILSEQ) || (errno == EINVAL) )
2105 // If errno was set to one of the two well-known hard errors
2106 // then fail immediately to avoid an infinite loop.
2107 return -1;
2108 else
2109 #endif // __WXWINCE__
2110 // still not enough, as we don't know how much we need, double the
2111 // current size of the buffer
2112 size *= 2;
2113 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
2114 }
2115 else if ( len >= size )
2116 {
2117 #if wxUSE_WXVSNPRINTF
2118 // we know that our own implementation of wxVsnprintf() returns
2119 // size+1 when there's not enough space but that's not the size
2120 // of the required buffer!
2121 size *= 2; // so we just double the current size of the buffer
2122 #else
2123 // some vsnprintf() implementations NUL-terminate the buffer and
2124 // some don't in len == size case, to be safe always add 1
2125 // FIXME: I don't quite understand this comment. The vsnprintf
2126 // function is specifically defined to return the number of
2127 // characters printed not including the null terminator.
2128 // So OF COURSE you need to add 1 to get the right buffer size.
2129 // The following line is definitely correct, no question.
2130 size = len + 1;
2131 #endif
2132 }
2133 else // ok, there was enough space
2134 {
2135 break;
2136 }
2137 }
2138
2139 // we could have overshot
2140 str.Shrink();
2141
2142 return str.length();
2143 }
2144
2145 int wxString::PrintfV(const wxString& format, va_list argptr)
2146 {
2147 #if wxUSE_UNICODE_UTF8
2148 #if wxUSE_STL_BASED_WXSTRING
2149 typedef wxStringTypeBuffer<char> Utf8Buffer;
2150 #else
2151 typedef wxStringInternalBuffer Utf8Buffer;
2152 #endif
2153 #endif
2154
2155 #if wxUSE_UTF8_LOCALE_ONLY
2156 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2157 #else
2158 #if wxUSE_UNICODE_UTF8
2159 if ( wxLocaleIsUtf8 )
2160 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2161 else
2162 // wxChar* version
2163 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2164 #else
2165 return DoStringPrintfV(*this, format, argptr);
2166 #endif // UTF8/WCHAR
2167 #endif
2168 }
2169
2170 // ----------------------------------------------------------------------------
2171 // misc other operations
2172 // ----------------------------------------------------------------------------
2173
2174 // returns true if the string matches the pattern which may contain '*' and
2175 // '?' metacharacters (as usual, '?' matches any character and '*' any number
2176 // of them)
2177 bool wxString::Matches(const wxString& mask) const
2178 {
2179 // I disable this code as it doesn't seem to be faster (in fact, it seems
2180 // to be much slower) than the old, hand-written code below and using it
2181 // here requires always linking with libregex even if the user code doesn't
2182 // use it
2183 #if 0 // wxUSE_REGEX
2184 // first translate the shell-like mask into a regex
2185 wxString pattern;
2186 pattern.reserve(wxStrlen(pszMask));
2187
2188 pattern += wxT('^');
2189 while ( *pszMask )
2190 {
2191 switch ( *pszMask )
2192 {
2193 case wxT('?'):
2194 pattern += wxT('.');
2195 break;
2196
2197 case wxT('*'):
2198 pattern += wxT(".*");
2199 break;
2200
2201 case wxT('^'):
2202 case wxT('.'):
2203 case wxT('$'):
2204 case wxT('('):
2205 case wxT(')'):
2206 case wxT('|'):
2207 case wxT('+'):
2208 case wxT('\\'):
2209 // these characters are special in a RE, quote them
2210 // (however note that we don't quote '[' and ']' to allow
2211 // using them for Unix shell like matching)
2212 pattern += wxT('\\');
2213 // fall through
2214
2215 default:
2216 pattern += *pszMask;
2217 }
2218
2219 pszMask++;
2220 }
2221 pattern += wxT('$');
2222
2223 // and now use it
2224 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2225 #else // !wxUSE_REGEX
2226 // TODO: this is, of course, awfully inefficient...
2227
2228 // FIXME-UTF8: implement using iterators, remove #if
2229 #if wxUSE_UNICODE_UTF8
2230 const wxScopedWCharBuffer maskBuf = mask.wc_str();
2231 const wxScopedWCharBuffer txtBuf = wc_str();
2232 const wxChar *pszMask = maskBuf.data();
2233 const wxChar *pszTxt = txtBuf.data();
2234 #else
2235 const wxChar *pszMask = mask.wx_str();
2236 // the char currently being checked
2237 const wxChar *pszTxt = wx_str();
2238 #endif
2239
2240 // the last location where '*' matched
2241 const wxChar *pszLastStarInText = NULL;
2242 const wxChar *pszLastStarInMask = NULL;
2243
2244 match:
2245 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
2246 switch ( *pszMask ) {
2247 case wxT('?'):
2248 if ( *pszTxt == wxT('\0') )
2249 return false;
2250
2251 // pszTxt and pszMask will be incremented in the loop statement
2252
2253 break;
2254
2255 case wxT('*'):
2256 {
2257 // remember where we started to be able to backtrack later
2258 pszLastStarInText = pszTxt;
2259 pszLastStarInMask = pszMask;
2260
2261 // ignore special chars immediately following this one
2262 // (should this be an error?)
2263 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
2264 pszMask++;
2265
2266 // if there is nothing more, match
2267 if ( *pszMask == wxT('\0') )
2268 return true;
2269
2270 // are there any other metacharacters in the mask?
2271 size_t uiLenMask;
2272 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2273
2274 if ( pEndMask != NULL ) {
2275 // we have to match the string between two metachars
2276 uiLenMask = pEndMask - pszMask;
2277 }
2278 else {
2279 // we have to match the remainder of the string
2280 uiLenMask = wxStrlen(pszMask);
2281 }
2282
2283 wxString strToMatch(pszMask, uiLenMask);
2284 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2285 if ( pMatch == NULL )
2286 return false;
2287
2288 // -1 to compensate "++" in the loop
2289 pszTxt = pMatch + uiLenMask - 1;
2290 pszMask += uiLenMask - 1;
2291 }
2292 break;
2293
2294 default:
2295 if ( *pszMask != *pszTxt )
2296 return false;
2297 break;
2298 }
2299 }
2300
2301 // match only if nothing left
2302 if ( *pszTxt == wxT('\0') )
2303 return true;
2304
2305 // if we failed to match, backtrack if we can
2306 if ( pszLastStarInText ) {
2307 pszTxt = pszLastStarInText + 1;
2308 pszMask = pszLastStarInMask;
2309
2310 pszLastStarInText = NULL;
2311
2312 // don't bother resetting pszLastStarInMask, it's unnecessary
2313
2314 goto match;
2315 }
2316
2317 return false;
2318 #endif // wxUSE_REGEX/!wxUSE_REGEX
2319 }
2320
2321 // Count the number of chars
2322 int wxString::Freq(wxUniChar ch) const
2323 {
2324 int count = 0;
2325 for ( const_iterator i = begin(); i != end(); ++i )
2326 {
2327 if ( *i == ch )
2328 count ++;
2329 }
2330 return count;
2331 }
2332