Globally replace _T() with wxT().
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
16
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #include "wx/log.h"
28 #endif
29
30 #include <ctype.h>
31
32 #ifndef __WXWINCE__
33 #include <errno.h>
34 #endif
35
36 #include <string.h>
37 #include <stdlib.h>
38
39 #include "wx/hashmap.h"
40 #include "wx/vector.h"
41 #include "wx/xlocale.h"
42
43 #ifdef __WXMSW__
44 #include "wx/msw/wrapwin.h"
45 #endif // __WXMSW__
46
47 // string handling functions used by wxString:
48 #if wxUSE_UNICODE_UTF8
49 #define wxStringMemcpy memcpy
50 #define wxStringMemcmp memcmp
51 #define wxStringMemchr memchr
52 #define wxStringStrlen strlen
53 #else
54 #define wxStringMemcpy wxTmemcpy
55 #define wxStringMemcmp wxTmemcmp
56 #define wxStringMemchr wxTmemchr
57 #define wxStringStrlen wxStrlen
58 #endif
59
60 // ----------------------------------------------------------------------------
61 // global variables
62 // ----------------------------------------------------------------------------
63
64 namespace wxPrivate
65 {
66
67 static UntypedBufferData s_untypedNullData(NULL, 0);
68
69 UntypedBufferData * const untypedNullDataPtr = &s_untypedNullData;
70
71 } // namespace wxPrivate
72
73 // ---------------------------------------------------------------------------
74 // static class variables definition
75 // ---------------------------------------------------------------------------
76
77 //According to STL _must_ be a -1 size_t
78 const size_t wxString::npos = (size_t) -1;
79
80 #if wxUSE_STRING_POS_CACHE
81
82 #ifdef wxHAS_COMPILER_TLS
83
84 wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
85
86 #else // !wxHAS_COMPILER_TLS
87
88 struct wxStrCacheInitializer
89 {
90 wxStrCacheInitializer()
91 {
92 // calling this function triggers s_cache initialization in it, and
93 // from now on it becomes safe to call from multiple threads
94 wxString::GetCache();
95 }
96 };
97
98 /*
99 wxString::Cache& wxString::GetCache()
100 {
101 static wxTLS_TYPE(Cache) s_cache;
102
103 return wxTLS_VALUE(s_cache);
104 }
105 */
106
107 static wxStrCacheInitializer gs_stringCacheInit;
108
109 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
110
111 // gdb seems to be unable to display thread-local variables correctly, at least
112 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
113 #if wxDEBUG_LEVEL >= 2
114
115 struct wxStrCacheDumper
116 {
117 static void ShowAll()
118 {
119 puts("*** wxString cache dump:");
120 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
121 {
122 const wxString::Cache::Element&
123 c = wxString::GetCacheBegin()[n];
124
125 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
126 n,
127 n == wxString::LastUsedCacheElement() ? " [*]" : "",
128 c.str,
129 (unsigned long)c.pos,
130 (unsigned long)c.impl,
131 (long)c.len);
132 }
133 }
134 };
135
136 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
137
138 #endif // wxDEBUG_LEVEL >= 2
139
140 #ifdef wxPROFILE_STRING_CACHE
141
142 wxString::CacheStats wxString::ms_cacheStats;
143
144 struct wxStrCacheStatsDumper
145 {
146 ~wxStrCacheStatsDumper()
147 {
148 const wxString::CacheStats& stats = wxString::ms_cacheStats;
149
150 if ( stats.postot )
151 {
152 puts("*** wxString cache statistics:");
153 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
154 stats.postot);
155 printf("\tHits %u (of which %u not used) or %.2f%%\n",
156 stats.poshits,
157 stats.mishits,
158 100.*float(stats.poshits - stats.mishits)/stats.postot);
159 printf("\tAverage position requested: %.2f\n",
160 float(stats.sumpos) / stats.postot);
161 printf("\tAverage offset after cached hint: %.2f\n",
162 float(stats.sumofs) / stats.postot);
163 }
164
165 if ( stats.lentot )
166 {
167 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
168 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
169 }
170 }
171 };
172
173 static wxStrCacheStatsDumper s_showCacheStats;
174
175 #endif // wxPROFILE_STRING_CACHE
176
177 #endif // wxUSE_STRING_POS_CACHE
178
179 // ----------------------------------------------------------------------------
180 // global functions
181 // ----------------------------------------------------------------------------
182
183 #if wxUSE_STD_IOSTREAM
184
185 #include <iostream>
186
187 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
188 {
189 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
190 const wxScopedCharBuffer buf(str.AsCharBuf());
191 if ( !buf )
192 os.clear(wxSTD ios_base::failbit);
193 else
194 os << buf.data();
195
196 return os;
197 #else
198 return os << str.AsInternal();
199 #endif
200 }
201
202 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
203 {
204 return os << str.c_str();
205 }
206
207 wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str)
208 {
209 return os << str.data();
210 }
211
212 #ifndef __BORLANDC__
213 wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str)
214 {
215 return os << str.data();
216 }
217 #endif
218
219 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
220
221 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
222 {
223 return wos << str.wc_str();
224 }
225
226 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
227 {
228 return wos << str.AsWChar();
229 }
230
231 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str)
232 {
233 return wos << str.data();
234 }
235
236 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
237
238 #endif // wxUSE_STD_IOSTREAM
239
240 // ===========================================================================
241 // wxString class core
242 // ===========================================================================
243
244 #if wxUSE_UNICODE_UTF8
245
246 void wxString::PosLenToImpl(size_t pos, size_t len,
247 size_t *implPos, size_t *implLen) const
248 {
249 if ( pos == npos )
250 {
251 *implPos = npos;
252 }
253 else // have valid start position
254 {
255 const const_iterator b = GetIterForNthChar(pos);
256 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
257 if ( len == npos )
258 {
259 *implLen = npos;
260 }
261 else // have valid length too
262 {
263 // we need to handle the case of length specifying a substring
264 // going beyond the end of the string, just as std::string does
265 const const_iterator e(end());
266 const_iterator i(b);
267 while ( len && i <= e )
268 {
269 ++i;
270 --len;
271 }
272
273 *implLen = i.impl() - b.impl();
274 }
275 }
276 }
277
278 #endif // wxUSE_UNICODE_UTF8
279
280 // ----------------------------------------------------------------------------
281 // wxCStrData converted strings caching
282 // ----------------------------------------------------------------------------
283
284 // FIXME-UTF8: temporarily disabled because it doesn't work with global
285 // string objects; re-enable after fixing this bug and benchmarking
286 // performance to see if using a hash is a good idea at all
287 #if 0
288
289 // For backward compatibility reasons, it must be possible to assign the value
290 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
291 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
292 // because the memory would be freed immediately, but it has to be valid as long
293 // as the string is not modified, so that code like this still works:
294 //
295 // const wxChar *s = str.c_str();
296 // while ( s ) { ... }
297
298 // FIXME-UTF8: not thread safe!
299 // FIXME-UTF8: we currently clear the cached conversion only when the string is
300 // destroyed, but we should do it when the string is modified, to
301 // keep memory usage down
302 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
303 // invalidated the cache on every change, we could keep the previous
304 // conversion
305 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
306 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
307
308 template<typename T>
309 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
310 {
311 typename T::iterator i = hash.find(wxConstCast(s, wxString));
312 if ( i != hash.end() )
313 {
314 free(i->second);
315 hash.erase(i);
316 }
317 }
318
319 #if wxUSE_UNICODE
320 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
321 // so we have to use wxString* here and const-cast when used
322 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
323 wxStringCharConversionCache);
324 static wxStringCharConversionCache gs_stringsCharCache;
325
326 const char* wxCStrData::AsChar() const
327 {
328 // remove previously cache value, if any (see FIXMEs above):
329 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
330
331 // convert the string and keep it:
332 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
333 m_str->mb_str().release();
334
335 return s + m_offset;
336 }
337 #endif // wxUSE_UNICODE
338
339 #if !wxUSE_UNICODE_WCHAR
340 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
341 wxStringWCharConversionCache);
342 static wxStringWCharConversionCache gs_stringsWCharCache;
343
344 const wchar_t* wxCStrData::AsWChar() const
345 {
346 // remove previously cache value, if any (see FIXMEs above):
347 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
348
349 // convert the string and keep it:
350 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
351 m_str->wc_str().release();
352
353 return s + m_offset;
354 }
355 #endif // !wxUSE_UNICODE_WCHAR
356
357 wxString::~wxString()
358 {
359 #if wxUSE_UNICODE
360 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
361 DeleteStringFromConversionCache(gs_stringsCharCache, this);
362 #endif
363 #if !wxUSE_UNICODE_WCHAR
364 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
365 #endif
366 }
367 #endif
368
369 // ===========================================================================
370 // wxString class core
371 // ===========================================================================
372
373 // ---------------------------------------------------------------------------
374 // construction and conversion
375 // ---------------------------------------------------------------------------
376
377 #if wxUSE_UNICODE_WCHAR
378 /* static */
379 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
380 const wxMBConv& conv)
381 {
382 // anything to do?
383 if ( !psz || nLength == 0 )
384 return SubstrBufFromMB(wxWCharBuffer(L""), 0);
385
386 if ( nLength == npos )
387 nLength = wxNO_LEN;
388
389 size_t wcLen;
390 wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
391 if ( !wcLen )
392 return SubstrBufFromMB(wxWCharBuffer(L""), 0);
393 else
394 return SubstrBufFromMB(wcBuf, wcLen);
395 }
396 #endif // wxUSE_UNICODE_WCHAR
397
398 #if wxUSE_UNICODE_UTF8
399 /* static */
400 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
401 const wxMBConv& conv)
402 {
403 // anything to do?
404 if ( !psz || nLength == 0 )
405 return SubstrBufFromMB(wxCharBuffer(""), 0);
406
407 // if psz is already in UTF-8, we don't have to do the roundtrip to
408 // wchar_t* and back:
409 if ( conv.IsUTF8() )
410 {
411 // we need to validate the input because UTF8 iterators assume valid
412 // UTF-8 sequence and psz may be invalid:
413 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
414 {
415 // we must pass the real string length to SubstrBufFromMB ctor
416 if ( nLength == npos )
417 nLength = psz ? strlen(psz) : 0;
418 return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz, nLength),
419 nLength);
420 }
421 // else: do the roundtrip through wchar_t*
422 }
423
424 if ( nLength == npos )
425 nLength = wxNO_LEN;
426
427 // first convert to wide string:
428 size_t wcLen;
429 wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
430 if ( !wcLen )
431 return SubstrBufFromMB(wxCharBuffer(""), 0);
432
433 // and then to UTF-8:
434 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
435 // widechar -> UTF-8 conversion isn't supposed to ever fail:
436 wxASSERT_MSG( buf.data, wxT("conversion to UTF-8 failed") );
437
438 return buf;
439 }
440 #endif // wxUSE_UNICODE_UTF8
441
442 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
443 /* static */
444 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
445 const wxMBConv& conv)
446 {
447 // anything to do?
448 if ( !pwz || nLength == 0 )
449 return SubstrBufFromWC(wxCharBuffer(""), 0);
450
451 if ( nLength == npos )
452 nLength = wxNO_LEN;
453
454 size_t mbLen;
455 wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
456 if ( !mbLen )
457 return SubstrBufFromWC(wxCharBuffer(""), 0);
458 else
459 return SubstrBufFromWC(mbBuf, mbLen);
460 }
461 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
462
463 // This std::string::c_str()-like method returns a wide char pointer to string
464 // contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return
465 // a pointer to the internal representation. Otherwise a conversion is required
466 // and it returns a temporary buffer.
467 //
468 // However for compatibility with c_str() and to avoid breaking existing code
469 // doing
470 //
471 // for ( const wchar_t *p = s.wc_str(); *p; p++ )
472 // ... use *p...
473 //
474 // we actually need to ensure that the returned buffer is _not_ temporary and
475 // so we use wxString::m_convertedToWChar to store the returned data
476 #if !wxUSE_UNICODE_WCHAR
477
478 const wchar_t *wxString::AsWChar(const wxMBConv& conv) const
479 {
480 const char * const strMB = m_impl.c_str();
481 const size_t lenMB = m_impl.length();
482
483 // find out the size of the buffer needed
484 const size_t lenWC = conv.ToWChar(NULL, 0, strMB, lenMB);
485 if ( lenWC == wxCONV_FAILED )
486 return NULL;
487
488 // keep the same buffer if the string size didn't change: this is not only
489 // an optimization but also ensure that code which modifies string
490 // character by character (without changing its length) can continue to use
491 // the pointer returned by a previous wc_str() call even after changing the
492 // string
493
494 // TODO-UTF8: we could check for ">" instead of "!=" here as this would
495 // allow to save on buffer reallocations but at the cost of
496 // consuming (even) more memory, we should benchmark this to
497 // determine if it's worth doing
498 if ( !m_convertedToWChar.m_str || lenWC != m_convertedToWChar.m_len )
499 {
500 if ( !const_cast<wxString *>(this)->m_convertedToWChar.Extend(lenWC) )
501 return NULL;
502 }
503
504 // finally do convert
505 m_convertedToWChar.m_str[lenWC] = L'\0';
506 if ( conv.ToWChar(m_convertedToWChar.m_str, lenWC,
507 strMB, lenMB) == wxCONV_FAILED )
508 return NULL;
509
510 return m_convertedToWChar.m_str;
511 }
512
513 #endif // !wxUSE_UNICODE_WCHAR
514
515
516 // Same thing for mb_str() which returns a normal char pointer to string
517 // contents: this always requires converting it to the specified encoding in
518 // non-ANSI build except if we need to convert to UTF-8 and this is what we
519 // already use internally.
520 #if wxUSE_UNICODE
521
522 const char *wxString::AsChar(const wxMBConv& conv) const
523 {
524 #if wxUSE_UNICODE_UTF8
525 if ( conv.IsUTF8() )
526 return m_impl.c_str();
527
528 const wchar_t * const strWC = AsWChar(wxMBConvStrictUTF8());
529 const size_t lenWC = m_convertedToWChar.m_len;
530 #else // wxUSE_UNICODE_WCHAR
531 const wchar_t * const strWC = m_impl.c_str();
532 const size_t lenWC = m_impl.length();
533 #endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR
534
535 const size_t lenMB = conv.FromWChar(NULL, 0, strWC, lenWC);
536 if ( lenMB == wxCONV_FAILED )
537 return NULL;
538
539 if ( !m_convertedToChar.m_str || lenMB != m_convertedToChar.m_len )
540 {
541 if ( !const_cast<wxString *>(this)->m_convertedToChar.Extend(lenMB) )
542 return NULL;
543 }
544
545 m_convertedToChar.m_str[lenMB] = '\0';
546 if ( conv.FromWChar(m_convertedToChar.m_str, lenMB,
547 strWC, lenWC) == wxCONV_FAILED )
548 return NULL;
549
550 return m_convertedToChar.m_str;
551 }
552
553 #endif // wxUSE_UNICODE
554
555 // shrink to minimal size (releasing extra memory)
556 bool wxString::Shrink()
557 {
558 wxString tmp(begin(), end());
559 swap(tmp);
560 return tmp.length() == length();
561 }
562
563 // deprecated compatibility code:
564 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
565 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
566 {
567 return DoGetWriteBuf(nLen);
568 }
569
570 void wxString::UngetWriteBuf()
571 {
572 DoUngetWriteBuf();
573 }
574
575 void wxString::UngetWriteBuf(size_t nLen)
576 {
577 DoUngetWriteBuf(nLen);
578 }
579 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
580
581
582 // ---------------------------------------------------------------------------
583 // data access
584 // ---------------------------------------------------------------------------
585
586 // all functions are inline in string.h
587
588 // ---------------------------------------------------------------------------
589 // concatenation operators
590 // ---------------------------------------------------------------------------
591
592 /*
593 * concatenation functions come in 5 flavours:
594 * string + string
595 * char + string and string + char
596 * C str + string and string + C str
597 */
598
599 wxString operator+(const wxString& str1, const wxString& str2)
600 {
601 #if !wxUSE_STL_BASED_WXSTRING
602 wxASSERT( str1.IsValid() );
603 wxASSERT( str2.IsValid() );
604 #endif
605
606 wxString s = str1;
607 s += str2;
608
609 return s;
610 }
611
612 wxString operator+(const wxString& str, wxUniChar ch)
613 {
614 #if !wxUSE_STL_BASED_WXSTRING
615 wxASSERT( str.IsValid() );
616 #endif
617
618 wxString s = str;
619 s += ch;
620
621 return s;
622 }
623
624 wxString operator+(wxUniChar ch, const wxString& str)
625 {
626 #if !wxUSE_STL_BASED_WXSTRING
627 wxASSERT( str.IsValid() );
628 #endif
629
630 wxString s = ch;
631 s += str;
632
633 return s;
634 }
635
636 wxString operator+(const wxString& str, const char *psz)
637 {
638 #if !wxUSE_STL_BASED_WXSTRING
639 wxASSERT( str.IsValid() );
640 #endif
641
642 wxString s;
643 if ( !s.Alloc(strlen(psz) + str.length()) ) {
644 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
645 }
646 s += str;
647 s += psz;
648
649 return s;
650 }
651
652 wxString operator+(const wxString& str, const wchar_t *pwz)
653 {
654 #if !wxUSE_STL_BASED_WXSTRING
655 wxASSERT( str.IsValid() );
656 #endif
657
658 wxString s;
659 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
660 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
661 }
662 s += str;
663 s += pwz;
664
665 return s;
666 }
667
668 wxString operator+(const char *psz, const wxString& str)
669 {
670 #if !wxUSE_STL_BASED_WXSTRING
671 wxASSERT( str.IsValid() );
672 #endif
673
674 wxString s;
675 if ( !s.Alloc(strlen(psz) + str.length()) ) {
676 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
677 }
678 s = psz;
679 s += str;
680
681 return s;
682 }
683
684 wxString operator+(const wchar_t *pwz, const wxString& str)
685 {
686 #if !wxUSE_STL_BASED_WXSTRING
687 wxASSERT( str.IsValid() );
688 #endif
689
690 wxString s;
691 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
692 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
693 }
694 s = pwz;
695 s += str;
696
697 return s;
698 }
699
700 // ---------------------------------------------------------------------------
701 // string comparison
702 // ---------------------------------------------------------------------------
703
704 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
705 {
706 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
707 : wxToupper(GetChar(0u)) == wxToupper(c));
708 }
709
710 #ifdef HAVE_STD_STRING_COMPARE
711
712 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
713 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
714 // sort strings in characters code point order by sorting the byte sequence
715 // in byte values order (i.e. what strcmp() and memcmp() do).
716
717 int wxString::compare(const wxString& str) const
718 {
719 return m_impl.compare(str.m_impl);
720 }
721
722 int wxString::compare(size_t nStart, size_t nLen,
723 const wxString& str) const
724 {
725 size_t pos, len;
726 PosLenToImpl(nStart, nLen, &pos, &len);
727 return m_impl.compare(pos, len, str.m_impl);
728 }
729
730 int wxString::compare(size_t nStart, size_t nLen,
731 const wxString& str,
732 size_t nStart2, size_t nLen2) const
733 {
734 size_t pos, len;
735 PosLenToImpl(nStart, nLen, &pos, &len);
736
737 size_t pos2, len2;
738 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
739
740 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
741 }
742
743 int wxString::compare(const char* sz) const
744 {
745 return m_impl.compare(ImplStr(sz));
746 }
747
748 int wxString::compare(const wchar_t* sz) const
749 {
750 return m_impl.compare(ImplStr(sz));
751 }
752
753 int wxString::compare(size_t nStart, size_t nLen,
754 const char* sz, size_t nCount) const
755 {
756 size_t pos, len;
757 PosLenToImpl(nStart, nLen, &pos, &len);
758
759 SubstrBufFromMB str(ImplStr(sz, nCount));
760
761 return m_impl.compare(pos, len, str.data, str.len);
762 }
763
764 int wxString::compare(size_t nStart, size_t nLen,
765 const wchar_t* sz, size_t nCount) const
766 {
767 size_t pos, len;
768 PosLenToImpl(nStart, nLen, &pos, &len);
769
770 SubstrBufFromWC str(ImplStr(sz, nCount));
771
772 return m_impl.compare(pos, len, str.data, str.len);
773 }
774
775 #else // !HAVE_STD_STRING_COMPARE
776
777 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
778 const wxStringCharType* s2, size_t l2)
779 {
780 if( l1 == l2 )
781 return wxStringMemcmp(s1, s2, l1);
782 else if( l1 < l2 )
783 {
784 int ret = wxStringMemcmp(s1, s2, l1);
785 return ret == 0 ? -1 : ret;
786 }
787 else
788 {
789 int ret = wxStringMemcmp(s1, s2, l2);
790 return ret == 0 ? +1 : ret;
791 }
792 }
793
794 int wxString::compare(const wxString& str) const
795 {
796 return ::wxDoCmp(m_impl.data(), m_impl.length(),
797 str.m_impl.data(), str.m_impl.length());
798 }
799
800 int wxString::compare(size_t nStart, size_t nLen,
801 const wxString& str) const
802 {
803 wxASSERT(nStart <= length());
804 size_type strLen = length() - nStart;
805 nLen = strLen < nLen ? strLen : nLen;
806
807 size_t pos, len;
808 PosLenToImpl(nStart, nLen, &pos, &len);
809
810 return ::wxDoCmp(m_impl.data() + pos, len,
811 str.m_impl.data(), str.m_impl.length());
812 }
813
814 int wxString::compare(size_t nStart, size_t nLen,
815 const wxString& str,
816 size_t nStart2, size_t nLen2) const
817 {
818 wxASSERT(nStart <= length());
819 wxASSERT(nStart2 <= str.length());
820 size_type strLen = length() - nStart,
821 strLen2 = str.length() - nStart2;
822 nLen = strLen < nLen ? strLen : nLen;
823 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
824
825 size_t pos, len;
826 PosLenToImpl(nStart, nLen, &pos, &len);
827 size_t pos2, len2;
828 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
829
830 return ::wxDoCmp(m_impl.data() + pos, len,
831 str.m_impl.data() + pos2, len2);
832 }
833
834 int wxString::compare(const char* sz) const
835 {
836 SubstrBufFromMB str(ImplStr(sz, npos));
837 if ( str.len == npos )
838 str.len = wxStringStrlen(str.data);
839 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
840 }
841
842 int wxString::compare(const wchar_t* sz) const
843 {
844 SubstrBufFromWC str(ImplStr(sz, npos));
845 if ( str.len == npos )
846 str.len = wxStringStrlen(str.data);
847 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
848 }
849
850 int wxString::compare(size_t nStart, size_t nLen,
851 const char* sz, size_t nCount) const
852 {
853 wxASSERT(nStart <= length());
854 size_type strLen = length() - nStart;
855 nLen = strLen < nLen ? strLen : nLen;
856
857 size_t pos, len;
858 PosLenToImpl(nStart, nLen, &pos, &len);
859
860 SubstrBufFromMB str(ImplStr(sz, nCount));
861 if ( str.len == npos )
862 str.len = wxStringStrlen(str.data);
863
864 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
865 }
866
867 int wxString::compare(size_t nStart, size_t nLen,
868 const wchar_t* sz, size_t nCount) const
869 {
870 wxASSERT(nStart <= length());
871 size_type strLen = length() - nStart;
872 nLen = strLen < nLen ? strLen : nLen;
873
874 size_t pos, len;
875 PosLenToImpl(nStart, nLen, &pos, &len);
876
877 SubstrBufFromWC str(ImplStr(sz, nCount));
878 if ( str.len == npos )
879 str.len = wxStringStrlen(str.data);
880
881 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
882 }
883
884 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
885
886
887 // ---------------------------------------------------------------------------
888 // find_{first,last}_[not]_of functions
889 // ---------------------------------------------------------------------------
890
891 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
892
893 // NB: All these functions are implemented with the argument being wxChar*,
894 // i.e. widechar string in any Unicode build, even though native string
895 // representation is char* in the UTF-8 build. This is because we couldn't
896 // use memchr() to determine if a character is in a set encoded as UTF-8.
897
898 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
899 {
900 return find_first_of(sz, nStart, wxStrlen(sz));
901 }
902
903 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
904 {
905 return find_first_not_of(sz, nStart, wxStrlen(sz));
906 }
907
908 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
909 {
910 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
911
912 size_t idx = nStart;
913 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
914 {
915 if ( wxTmemchr(sz, *i, n) )
916 return idx;
917 }
918
919 return npos;
920 }
921
922 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
923 {
924 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
925
926 size_t idx = nStart;
927 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
928 {
929 if ( !wxTmemchr(sz, *i, n) )
930 return idx;
931 }
932
933 return npos;
934 }
935
936
937 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
938 {
939 return find_last_of(sz, nStart, wxStrlen(sz));
940 }
941
942 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
943 {
944 return find_last_not_of(sz, nStart, wxStrlen(sz));
945 }
946
947 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
948 {
949 size_t len = length();
950
951 if ( nStart == npos )
952 {
953 nStart = len - 1;
954 }
955 else
956 {
957 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
958 }
959
960 size_t idx = nStart;
961 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
962 i != rend(); --idx, ++i )
963 {
964 if ( wxTmemchr(sz, *i, n) )
965 return idx;
966 }
967
968 return npos;
969 }
970
971 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
972 {
973 size_t len = length();
974
975 if ( nStart == npos )
976 {
977 nStart = len - 1;
978 }
979 else
980 {
981 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
982 }
983
984 size_t idx = nStart;
985 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
986 i != rend(); --idx, ++i )
987 {
988 if ( !wxTmemchr(sz, *i, n) )
989 return idx;
990 }
991
992 return npos;
993 }
994
995 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
996 {
997 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
998
999 size_t idx = nStart;
1000 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1001 {
1002 if ( *i != ch )
1003 return idx;
1004 }
1005
1006 return npos;
1007 }
1008
1009 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1010 {
1011 size_t len = length();
1012
1013 if ( nStart == npos )
1014 {
1015 nStart = len - 1;
1016 }
1017 else
1018 {
1019 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
1020 }
1021
1022 size_t idx = nStart;
1023 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1024 i != rend(); --idx, ++i )
1025 {
1026 if ( *i != ch )
1027 return idx;
1028 }
1029
1030 return npos;
1031 }
1032
1033 // the functions above were implemented for wchar_t* arguments in Unicode
1034 // build and char* in ANSI build; below are implementations for the other
1035 // version:
1036 #if wxUSE_UNICODE
1037 #define wxOtherCharType char
1038 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1039 #else
1040 #define wxOtherCharType wchar_t
1041 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1042 #endif
1043
1044 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1045 { return find_first_of(STRCONV(sz), nStart); }
1046
1047 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1048 size_t n) const
1049 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1050 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1051 { return find_last_of(STRCONV(sz), nStart); }
1052 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1053 size_t n) const
1054 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1055 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1056 { return find_first_not_of(STRCONV(sz), nStart); }
1057 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1058 size_t n) const
1059 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1060 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1061 { return find_last_not_of(STRCONV(sz), nStart); }
1062 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1063 size_t n) const
1064 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1065
1066 #undef wxOtherCharType
1067 #undef STRCONV
1068
1069 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1070
1071 // ===========================================================================
1072 // other common string functions
1073 // ===========================================================================
1074
1075 int wxString::CmpNoCase(const wxString& s) const
1076 {
1077 #if defined(__WXMSW__) && !wxUSE_UNICODE_UTF8
1078 // prefer to use CompareString() if available as it's more efficient than
1079 // doing it manual or even using wxStricmp() (see #10375)
1080 switch ( ::CompareString(LOCALE_USER_DEFAULT, NORM_IGNORECASE,
1081 m_impl.c_str(), m_impl.length(),
1082 s.m_impl.c_str(), s.m_impl.length()) )
1083 {
1084 case CSTR_LESS_THAN:
1085 return -1;
1086
1087 case CSTR_EQUAL:
1088 return 0;
1089
1090 case CSTR_GREATER_THAN:
1091 return 1;
1092
1093 default:
1094 wxFAIL_MSG( "unexpected CompareString() return value" );
1095 // fall through
1096
1097 case 0:
1098 wxLogLastError("CompareString");
1099 // use generic code below
1100 }
1101 #endif // __WXMSW__ && !wxUSE_UNICODE_UTF8
1102
1103 // do the comparison manually: notice that we can't use wxStricmp() as it
1104 // doesn't handle embedded NULs
1105
1106 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1107 const_iterator i1 = begin();
1108 const_iterator end1 = end();
1109 const_iterator i2 = s.begin();
1110 const_iterator end2 = s.end();
1111
1112 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1113 {
1114 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1115 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1116 if ( lower1 != lower2 )
1117 return lower1 < lower2 ? -1 : 1;
1118 }
1119
1120 size_t len1 = length();
1121 size_t len2 = s.length();
1122
1123 if ( len1 < len2 )
1124 return -1;
1125 else if ( len1 > len2 )
1126 return 1;
1127 return 0;
1128 }
1129
1130
1131 #if wxUSE_UNICODE
1132
1133 #ifdef __MWERKS__
1134 #ifndef __SCHAR_MAX__
1135 #define __SCHAR_MAX__ 127
1136 #endif
1137 #endif
1138
1139 wxString wxString::FromAscii(const char *ascii, size_t len)
1140 {
1141 if (!ascii || len == 0)
1142 return wxEmptyString;
1143
1144 wxString res;
1145
1146 {
1147 wxStringInternalBuffer buf(res, len);
1148 wxStringCharType *dest = buf;
1149
1150 for ( ; len > 0; --len )
1151 {
1152 unsigned char c = (unsigned char)*ascii++;
1153 wxASSERT_MSG( c < 0x80,
1154 wxT("Non-ASCII value passed to FromAscii().") );
1155
1156 *dest++ = (wchar_t)c;
1157 }
1158 }
1159
1160 return res;
1161 }
1162
1163 wxString wxString::FromAscii(const char *ascii)
1164 {
1165 return FromAscii(ascii, wxStrlen(ascii));
1166 }
1167
1168 wxString wxString::FromAscii(char ascii)
1169 {
1170 // What do we do with '\0' ?
1171
1172 unsigned char c = (unsigned char)ascii;
1173
1174 wxASSERT_MSG( c < 0x80, wxT("Non-ASCII value passed to FromAscii().") );
1175
1176 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1177 return wxString(wxUniChar((wchar_t)c));
1178 }
1179
1180 const wxScopedCharBuffer wxString::ToAscii() const
1181 {
1182 // this will allocate enough space for the terminating NUL too
1183 wxCharBuffer buffer(length());
1184 char *dest = buffer.data();
1185
1186 for ( const_iterator i = begin(); i != end(); ++i )
1187 {
1188 wxUniChar c(*i);
1189 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1190 *dest++ = c.IsAscii() ? (char)c : '_';
1191
1192 // the output string can't have embedded NULs anyhow, so we can safely
1193 // stop at first of them even if we do have any
1194 if ( !c )
1195 break;
1196 }
1197
1198 return buffer;
1199 }
1200
1201 #endif // wxUSE_UNICODE
1202
1203 // extract string of length nCount starting at nFirst
1204 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1205 {
1206 size_t nLen = length();
1207
1208 // default value of nCount is npos and means "till the end"
1209 if ( nCount == npos )
1210 {
1211 nCount = nLen - nFirst;
1212 }
1213
1214 // out-of-bounds requests return sensible things
1215 if ( nFirst + nCount > nLen )
1216 {
1217 nCount = nLen - nFirst;
1218 }
1219
1220 if ( nFirst > nLen )
1221 {
1222 // AllocCopy() will return empty string
1223 return wxEmptyString;
1224 }
1225
1226 wxString dest(*this, nFirst, nCount);
1227 if ( dest.length() != nCount )
1228 {
1229 wxFAIL_MSG( wxT("out of memory in wxString::Mid") );
1230 }
1231
1232 return dest;
1233 }
1234
1235 // check that the string starts with prefix and return the rest of the string
1236 // in the provided pointer if it is not NULL, otherwise return false
1237 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1238 {
1239 if ( compare(0, prefix.length(), prefix) != 0 )
1240 return false;
1241
1242 if ( rest )
1243 {
1244 // put the rest of the string into provided pointer
1245 rest->assign(*this, prefix.length(), npos);
1246 }
1247
1248 return true;
1249 }
1250
1251
1252 // check that the string ends with suffix and return the rest of it in the
1253 // provided pointer if it is not NULL, otherwise return false
1254 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1255 {
1256 int start = length() - suffix.length();
1257
1258 if ( start < 0 || compare(start, npos, suffix) != 0 )
1259 return false;
1260
1261 if ( rest )
1262 {
1263 // put the rest of the string into provided pointer
1264 rest->assign(*this, 0, start);
1265 }
1266
1267 return true;
1268 }
1269
1270
1271 // extract nCount last (rightmost) characters
1272 wxString wxString::Right(size_t nCount) const
1273 {
1274 if ( nCount > length() )
1275 nCount = length();
1276
1277 wxString dest(*this, length() - nCount, nCount);
1278 if ( dest.length() != nCount ) {
1279 wxFAIL_MSG( wxT("out of memory in wxString::Right") );
1280 }
1281 return dest;
1282 }
1283
1284 // get all characters after the last occurrence of ch
1285 // (returns the whole string if ch not found)
1286 wxString wxString::AfterLast(wxUniChar ch) const
1287 {
1288 wxString str;
1289 int iPos = Find(ch, true);
1290 if ( iPos == wxNOT_FOUND )
1291 str = *this;
1292 else
1293 str.assign(*this, iPos + 1, npos);
1294
1295 return str;
1296 }
1297
1298 // extract nCount first (leftmost) characters
1299 wxString wxString::Left(size_t nCount) const
1300 {
1301 if ( nCount > length() )
1302 nCount = length();
1303
1304 wxString dest(*this, 0, nCount);
1305 if ( dest.length() != nCount ) {
1306 wxFAIL_MSG( wxT("out of memory in wxString::Left") );
1307 }
1308 return dest;
1309 }
1310
1311 // get all characters before the first occurrence of ch
1312 // (returns the whole string if ch not found)
1313 wxString wxString::BeforeFirst(wxUniChar ch) const
1314 {
1315 int iPos = Find(ch);
1316 if ( iPos == wxNOT_FOUND )
1317 iPos = length();
1318 return wxString(*this, 0, iPos);
1319 }
1320
1321 /// get all characters before the last occurrence of ch
1322 /// (returns empty string if ch not found)
1323 wxString wxString::BeforeLast(wxUniChar ch) const
1324 {
1325 wxString str;
1326 int iPos = Find(ch, true);
1327 if ( iPos != wxNOT_FOUND && iPos != 0 )
1328 str = wxString(c_str(), iPos);
1329
1330 return str;
1331 }
1332
1333 /// get all characters after the first occurrence of ch
1334 /// (returns empty string if ch not found)
1335 wxString wxString::AfterFirst(wxUniChar ch) const
1336 {
1337 wxString str;
1338 int iPos = Find(ch);
1339 if ( iPos != wxNOT_FOUND )
1340 str.assign(*this, iPos + 1, npos);
1341
1342 return str;
1343 }
1344
1345 // replace first (or all) occurrences of some substring with another one
1346 size_t wxString::Replace(const wxString& strOld,
1347 const wxString& strNew, bool bReplaceAll)
1348 {
1349 // if we tried to replace an empty string we'd enter an infinite loop below
1350 wxCHECK_MSG( !strOld.empty(), 0,
1351 wxT("wxString::Replace(): invalid parameter") );
1352
1353 wxSTRING_INVALIDATE_CACHE();
1354
1355 size_t uiCount = 0; // count of replacements made
1356
1357 // optimize the special common case: replacement of one character by
1358 // another one (in UTF-8 case we can only do this for ASCII characters)
1359 //
1360 // benchmarks show that this special version is around 3 times faster
1361 // (depending on the proportion of matching characters and UTF-8/wchar_t
1362 // build)
1363 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1364 {
1365 const wxStringCharType chOld = strOld.m_impl[0],
1366 chNew = strNew.m_impl[0];
1367
1368 // this loop is the simplified version of the one below
1369 for ( size_t pos = 0; ; )
1370 {
1371 pos = m_impl.find(chOld, pos);
1372 if ( pos == npos )
1373 break;
1374
1375 m_impl[pos++] = chNew;
1376
1377 uiCount++;
1378
1379 if ( !bReplaceAll )
1380 break;
1381 }
1382 }
1383 else if ( !bReplaceAll)
1384 {
1385 size_t pos = m_impl.find(strOld, 0);
1386 if ( pos != npos )
1387 {
1388 m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl);
1389 uiCount = 1;
1390 }
1391 }
1392 else // replace all occurrences
1393 {
1394 const size_t uiOldLen = strOld.m_impl.length();
1395 const size_t uiNewLen = strNew.m_impl.length();
1396
1397 // first scan the string to find all positions at which the replacement
1398 // should be made
1399 wxVector<size_t> replacePositions;
1400
1401 size_t pos;
1402 for ( pos = m_impl.find(strOld.m_impl, 0);
1403 pos != npos;
1404 pos = m_impl.find(strOld.m_impl, pos + uiOldLen))
1405 {
1406 replacePositions.push_back(pos);
1407 ++uiCount;
1408 }
1409
1410 if ( !uiCount )
1411 return 0;
1412
1413 // allocate enough memory for the whole new string
1414 wxString tmp;
1415 tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen));
1416
1417 // copy this string to tmp doing replacements on the fly
1418 size_t replNum = 0;
1419 for ( pos = 0; replNum < uiCount; replNum++ )
1420 {
1421 const size_t nextReplPos = replacePositions[replNum];
1422
1423 if ( pos != nextReplPos )
1424 {
1425 tmp.m_impl.append(m_impl, pos, nextReplPos - pos);
1426 }
1427
1428 tmp.m_impl.append(strNew.m_impl);
1429 pos = nextReplPos + uiOldLen;
1430 }
1431
1432 if ( pos != m_impl.length() )
1433 {
1434 // append the rest of the string unchanged
1435 tmp.m_impl.append(m_impl, pos, m_impl.length() - pos);
1436 }
1437
1438 swap(tmp);
1439 }
1440
1441 return uiCount;
1442 }
1443
1444 bool wxString::IsAscii() const
1445 {
1446 for ( const_iterator i = begin(); i != end(); ++i )
1447 {
1448 if ( !(*i).IsAscii() )
1449 return false;
1450 }
1451
1452 return true;
1453 }
1454
1455 bool wxString::IsWord() const
1456 {
1457 for ( const_iterator i = begin(); i != end(); ++i )
1458 {
1459 if ( !wxIsalpha(*i) )
1460 return false;
1461 }
1462
1463 return true;
1464 }
1465
1466 bool wxString::IsNumber() const
1467 {
1468 if ( empty() )
1469 return true;
1470
1471 const_iterator i = begin();
1472
1473 if ( *i == wxT('-') || *i == wxT('+') )
1474 ++i;
1475
1476 for ( ; i != end(); ++i )
1477 {
1478 if ( !wxIsdigit(*i) )
1479 return false;
1480 }
1481
1482 return true;
1483 }
1484
1485 wxString wxString::Strip(stripType w) const
1486 {
1487 wxString s = *this;
1488 if ( w & leading ) s.Trim(false);
1489 if ( w & trailing ) s.Trim(true);
1490 return s;
1491 }
1492
1493 // ---------------------------------------------------------------------------
1494 // case conversion
1495 // ---------------------------------------------------------------------------
1496
1497 wxString& wxString::MakeUpper()
1498 {
1499 for ( iterator it = begin(), en = end(); it != en; ++it )
1500 *it = (wxChar)wxToupper(*it);
1501
1502 return *this;
1503 }
1504
1505 wxString& wxString::MakeLower()
1506 {
1507 for ( iterator it = begin(), en = end(); it != en; ++it )
1508 *it = (wxChar)wxTolower(*it);
1509
1510 return *this;
1511 }
1512
1513 wxString& wxString::MakeCapitalized()
1514 {
1515 const iterator en = end();
1516 iterator it = begin();
1517 if ( it != en )
1518 {
1519 *it = (wxChar)wxToupper(*it);
1520 for ( ++it; it != en; ++it )
1521 *it = (wxChar)wxTolower(*it);
1522 }
1523
1524 return *this;
1525 }
1526
1527 // ---------------------------------------------------------------------------
1528 // trimming and padding
1529 // ---------------------------------------------------------------------------
1530
1531 // some compilers (VC++ 6.0 not to name them) return true for a call to
1532 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1533 // to live with this by checking that the character is a 7 bit one - even if
1534 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1535 // space-like symbols somewhere except in the first 128 chars), it is arguably
1536 // still better than trimming away accented letters
1537 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1538
1539 // trims spaces (in the sense of isspace) from left or right side
1540 wxString& wxString::Trim(bool bFromRight)
1541 {
1542 // first check if we're going to modify the string at all
1543 if ( !empty() &&
1544 (
1545 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1546 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1547 )
1548 )
1549 {
1550 if ( bFromRight )
1551 {
1552 // find last non-space character
1553 reverse_iterator psz = rbegin();
1554 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1555 ++psz;
1556
1557 // truncate at trailing space start
1558 erase(psz.base(), end());
1559 }
1560 else
1561 {
1562 // find first non-space character
1563 iterator psz = begin();
1564 while ( (psz != end()) && wxSafeIsspace(*psz) )
1565 ++psz;
1566
1567 // fix up data and length
1568 erase(begin(), psz);
1569 }
1570 }
1571
1572 return *this;
1573 }
1574
1575 // adds nCount characters chPad to the string from either side
1576 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1577 {
1578 wxString s(chPad, nCount);
1579
1580 if ( bFromRight )
1581 *this += s;
1582 else
1583 {
1584 s += *this;
1585 swap(s);
1586 }
1587
1588 return *this;
1589 }
1590
1591 // truncate the string
1592 wxString& wxString::Truncate(size_t uiLen)
1593 {
1594 if ( uiLen < length() )
1595 {
1596 erase(begin() + uiLen, end());
1597 }
1598 //else: nothing to do, string is already short enough
1599
1600 return *this;
1601 }
1602
1603 // ---------------------------------------------------------------------------
1604 // finding (return wxNOT_FOUND if not found and index otherwise)
1605 // ---------------------------------------------------------------------------
1606
1607 // find a character
1608 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1609 {
1610 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1611
1612 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1613 }
1614
1615 // ----------------------------------------------------------------------------
1616 // conversion to numbers
1617 // ----------------------------------------------------------------------------
1618
1619 // The implementation of all the functions below is exactly the same so factor
1620 // it out. Note that number extraction works correctly on UTF-8 strings, so
1621 // we can use wxStringCharType and wx_str() for maximum efficiency.
1622
1623 #ifndef __WXWINCE__
1624 #define DO_IF_NOT_WINCE(x) x
1625 #else
1626 #define DO_IF_NOT_WINCE(x)
1627 #endif
1628
1629 #define WX_STRING_TO_X_TYPE_START \
1630 wxCHECK_MSG( pVal, false, wxT("NULL output pointer") ); \
1631 DO_IF_NOT_WINCE( errno = 0; ) \
1632 const wxStringCharType *start = wx_str(); \
1633 wxStringCharType *end;
1634
1635 #define WX_STRING_TO_X_TYPE_END \
1636 /* return true only if scan was stopped by the terminating NUL and */ \
1637 /* if the string was not empty to start with and no under/overflow */ \
1638 /* occurred: */ \
1639 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1640 return false; \
1641 *pVal = val; \
1642 return true;
1643
1644 bool wxString::ToLong(long *pVal, int base) const
1645 {
1646 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1647
1648 WX_STRING_TO_X_TYPE_START
1649 long val = wxStrtol(start, &end, base);
1650 WX_STRING_TO_X_TYPE_END
1651 }
1652
1653 bool wxString::ToULong(unsigned long *pVal, int base) const
1654 {
1655 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1656
1657 WX_STRING_TO_X_TYPE_START
1658 unsigned long val = wxStrtoul(start, &end, base);
1659 WX_STRING_TO_X_TYPE_END
1660 }
1661
1662 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1663 {
1664 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1665
1666 WX_STRING_TO_X_TYPE_START
1667 wxLongLong_t val = wxStrtoll(start, &end, base);
1668 WX_STRING_TO_X_TYPE_END
1669 }
1670
1671 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1672 {
1673 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1674
1675 WX_STRING_TO_X_TYPE_START
1676 wxULongLong_t val = wxStrtoull(start, &end, base);
1677 WX_STRING_TO_X_TYPE_END
1678 }
1679
1680 bool wxString::ToDouble(double *pVal) const
1681 {
1682 WX_STRING_TO_X_TYPE_START
1683 double val = wxStrtod(start, &end);
1684 WX_STRING_TO_X_TYPE_END
1685 }
1686
1687 #if wxUSE_XLOCALE
1688
1689 bool wxString::ToCLong(long *pVal, int base) const
1690 {
1691 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1692
1693 WX_STRING_TO_X_TYPE_START
1694 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1695 long val = wxStrtol_lA(start, &end, base, wxCLocale);
1696 #else
1697 long val = wxStrtol_l(start, &end, base, wxCLocale);
1698 #endif
1699 WX_STRING_TO_X_TYPE_END
1700 }
1701
1702 bool wxString::ToCULong(unsigned long *pVal, int base) const
1703 {
1704 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
1705
1706 WX_STRING_TO_X_TYPE_START
1707 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1708 unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale);
1709 #else
1710 unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale);
1711 #endif
1712 WX_STRING_TO_X_TYPE_END
1713 }
1714
1715 bool wxString::ToCDouble(double *pVal) const
1716 {
1717 WX_STRING_TO_X_TYPE_START
1718 #if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
1719 double val = wxStrtod_lA(start, &end, wxCLocale);
1720 #else
1721 double val = wxStrtod_l(start, &end, wxCLocale);
1722 #endif
1723 WX_STRING_TO_X_TYPE_END
1724 }
1725
1726 #endif // wxUSE_XLOCALE
1727
1728 // ---------------------------------------------------------------------------
1729 // formatted output
1730 // ---------------------------------------------------------------------------
1731
1732 #if !wxUSE_UTF8_LOCALE_ONLY
1733 /* static */
1734 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1735 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1736 #else
1737 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1738 #endif
1739 {
1740 va_list argptr;
1741 va_start(argptr, format);
1742
1743 wxString s;
1744 s.PrintfV(format, argptr);
1745
1746 va_end(argptr);
1747
1748 return s;
1749 }
1750 #endif // !wxUSE_UTF8_LOCALE_ONLY
1751
1752 #if wxUSE_UNICODE_UTF8
1753 /* static */
1754 wxString wxString::DoFormatUtf8(const char *format, ...)
1755 {
1756 va_list argptr;
1757 va_start(argptr, format);
1758
1759 wxString s;
1760 s.PrintfV(format, argptr);
1761
1762 va_end(argptr);
1763
1764 return s;
1765 }
1766 #endif // wxUSE_UNICODE_UTF8
1767
1768 /* static */
1769 wxString wxString::FormatV(const wxString& format, va_list argptr)
1770 {
1771 wxString s;
1772 s.PrintfV(format, argptr);
1773 return s;
1774 }
1775
1776 #if !wxUSE_UTF8_LOCALE_ONLY
1777 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1778 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1779 #else
1780 int wxString::DoPrintfWchar(const wxChar *format, ...)
1781 #endif
1782 {
1783 va_list argptr;
1784 va_start(argptr, format);
1785
1786 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1787 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1788 // because it's the only cast that works safely for downcasting when
1789 // multiple inheritance is used:
1790 wxString *str = static_cast<wxString*>(this);
1791 #else
1792 wxString *str = this;
1793 #endif
1794
1795 int iLen = str->PrintfV(format, argptr);
1796
1797 va_end(argptr);
1798
1799 return iLen;
1800 }
1801 #endif // !wxUSE_UTF8_LOCALE_ONLY
1802
1803 #if wxUSE_UNICODE_UTF8
1804 int wxString::DoPrintfUtf8(const char *format, ...)
1805 {
1806 va_list argptr;
1807 va_start(argptr, format);
1808
1809 int iLen = PrintfV(format, argptr);
1810
1811 va_end(argptr);
1812
1813 return iLen;
1814 }
1815 #endif // wxUSE_UNICODE_UTF8
1816
1817 /*
1818 Uses wxVsnprintf and places the result into the this string.
1819
1820 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1821 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1822 the ISO C99 (and thus SUSv3) standard the return value for the case of
1823 an undersized buffer is inconsistent. For conforming vsnprintf
1824 implementations the function must return the number of characters that
1825 would have been printed had the buffer been large enough. For conforming
1826 vswprintf implementations the function must return a negative number
1827 and set errno.
1828
1829 What vswprintf sets errno to is undefined but Darwin seems to set it to
1830 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1831 those are defined in the standard and backed up by several conformance
1832 statements. Note that ENOMEM mentioned in the manual page does not
1833 apply to swprintf, only wprintf and fwprintf.
1834
1835 Official manual page:
1836 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1837
1838 Some conformance statements (AIX, Solaris):
1839 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1840 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1841
1842 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1843 EILSEQ and EINVAL are specifically defined to mean the error is other than
1844 an undersized buffer and no other errno are defined we treat those two
1845 as meaning hard errors and everything else gets the old behavior which
1846 is to keep looping and increasing buffer size until the function succeeds.
1847
1848 In practice it's impossible to determine before compilation which behavior
1849 may be used. The vswprintf function may have vsnprintf-like behavior or
1850 vice-versa. Behavior detected on one release can theoretically change
1851 with an updated release. Not to mention that configure testing for it
1852 would require the test to be run on the host system, not the build system
1853 which makes cross compilation difficult. Therefore, we make no assumptions
1854 about behavior and try our best to handle every known case, including the
1855 case where wxVsnprintf returns a negative number and fails to set errno.
1856
1857 There is yet one more non-standard implementation and that is our own.
1858 Fortunately, that can be detected at compile-time.
1859
1860 On top of all that, ISO C99 explicitly defines snprintf to write a null
1861 character to the last position of the specified buffer. That would be at
1862 at the given buffer size minus 1. It is supposed to do this even if it
1863 turns out that the buffer is sized too small.
1864
1865 Darwin (tested on 10.5) follows the C99 behavior exactly.
1866
1867 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1868 errno even when it fails. However, it only seems to ever fail due
1869 to an undersized buffer.
1870 */
1871 #if wxUSE_UNICODE_UTF8
1872 template<typename BufferType>
1873 #else
1874 // we only need one version in non-UTF8 builds and at least two Windows
1875 // compilers have problems with this function template, so use just one
1876 // normal function here
1877 #endif
1878 static int DoStringPrintfV(wxString& str,
1879 const wxString& format, va_list argptr)
1880 {
1881 int size = 1024;
1882
1883 for ( ;; )
1884 {
1885 #if wxUSE_UNICODE_UTF8
1886 BufferType tmp(str, size + 1);
1887 typename BufferType::CharType *buf = tmp;
1888 #else
1889 wxStringBuffer tmp(str, size + 1);
1890 wxChar *buf = tmp;
1891 #endif
1892
1893 if ( !buf )
1894 {
1895 // out of memory
1896
1897 // in UTF-8 build, leaving uninitialized junk in the buffer
1898 // could result in invalid non-empty UTF-8 string, so just
1899 // reset the string to empty on failure:
1900 buf[0] = '\0';
1901 return -1;
1902 }
1903
1904 // wxVsnprintf() may modify the original arg pointer, so pass it
1905 // only a copy
1906 va_list argptrcopy;
1907 wxVaCopy(argptrcopy, argptr);
1908
1909 #ifndef __WXWINCE__
1910 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1911 errno = 0;
1912 #endif
1913 int len = wxVsnprintf(buf, size, format, argptrcopy);
1914 va_end(argptrcopy);
1915
1916 // some implementations of vsnprintf() don't NUL terminate
1917 // the string if there is not enough space for it so
1918 // always do it manually
1919 // FIXME: This really seems to be the wrong and would be an off-by-one
1920 // bug except the code above allocates an extra character.
1921 buf[size] = wxT('\0');
1922
1923 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1924 // total number of characters which would have been written if the
1925 // buffer were large enough (newer standards such as Unix98)
1926 if ( len < 0 )
1927 {
1928 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1929 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1930 // is true if *both* of them use our own implementation,
1931 // otherwise we can't be sure
1932 #if wxUSE_WXVSNPRINTF
1933 // we know that our own implementation of wxVsnprintf() returns -1
1934 // only for a format error - thus there's something wrong with
1935 // the user's format string
1936 buf[0] = '\0';
1937 return -1;
1938 #else // possibly using system version
1939 // assume it only returns error if there is not enough space, but
1940 // as we don't know how much we need, double the current size of
1941 // the buffer
1942 #ifndef __WXWINCE__
1943 if( (errno == EILSEQ) || (errno == EINVAL) )
1944 // If errno was set to one of the two well-known hard errors
1945 // then fail immediately to avoid an infinite loop.
1946 return -1;
1947 else
1948 #endif // __WXWINCE__
1949 // still not enough, as we don't know how much we need, double the
1950 // current size of the buffer
1951 size *= 2;
1952 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1953 }
1954 else if ( len >= size )
1955 {
1956 #if wxUSE_WXVSNPRINTF
1957 // we know that our own implementation of wxVsnprintf() returns
1958 // size+1 when there's not enough space but that's not the size
1959 // of the required buffer!
1960 size *= 2; // so we just double the current size of the buffer
1961 #else
1962 // some vsnprintf() implementations NUL-terminate the buffer and
1963 // some don't in len == size case, to be safe always add 1
1964 // FIXME: I don't quite understand this comment. The vsnprintf
1965 // function is specifically defined to return the number of
1966 // characters printed not including the null terminator.
1967 // So OF COURSE you need to add 1 to get the right buffer size.
1968 // The following line is definitely correct, no question.
1969 size = len + 1;
1970 #endif
1971 }
1972 else // ok, there was enough space
1973 {
1974 break;
1975 }
1976 }
1977
1978 // we could have overshot
1979 str.Shrink();
1980
1981 return str.length();
1982 }
1983
1984 int wxString::PrintfV(const wxString& format, va_list argptr)
1985 {
1986 #if wxUSE_UNICODE_UTF8
1987 #if wxUSE_STL_BASED_WXSTRING
1988 typedef wxStringTypeBuffer<char> Utf8Buffer;
1989 #else
1990 typedef wxStringInternalBuffer Utf8Buffer;
1991 #endif
1992 #endif
1993
1994 #if wxUSE_UTF8_LOCALE_ONLY
1995 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1996 #else
1997 #if wxUSE_UNICODE_UTF8
1998 if ( wxLocaleIsUtf8 )
1999 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2000 else
2001 // wxChar* version
2002 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2003 #else
2004 return DoStringPrintfV(*this, format, argptr);
2005 #endif // UTF8/WCHAR
2006 #endif
2007 }
2008
2009 // ----------------------------------------------------------------------------
2010 // misc other operations
2011 // ----------------------------------------------------------------------------
2012
2013 // returns true if the string matches the pattern which may contain '*' and
2014 // '?' metacharacters (as usual, '?' matches any character and '*' any number
2015 // of them)
2016 bool wxString::Matches(const wxString& mask) const
2017 {
2018 // I disable this code as it doesn't seem to be faster (in fact, it seems
2019 // to be much slower) than the old, hand-written code below and using it
2020 // here requires always linking with libregex even if the user code doesn't
2021 // use it
2022 #if 0 // wxUSE_REGEX
2023 // first translate the shell-like mask into a regex
2024 wxString pattern;
2025 pattern.reserve(wxStrlen(pszMask));
2026
2027 pattern += wxT('^');
2028 while ( *pszMask )
2029 {
2030 switch ( *pszMask )
2031 {
2032 case wxT('?'):
2033 pattern += wxT('.');
2034 break;
2035
2036 case wxT('*'):
2037 pattern += wxT(".*");
2038 break;
2039
2040 case wxT('^'):
2041 case wxT('.'):
2042 case wxT('$'):
2043 case wxT('('):
2044 case wxT(')'):
2045 case wxT('|'):
2046 case wxT('+'):
2047 case wxT('\\'):
2048 // these characters are special in a RE, quote them
2049 // (however note that we don't quote '[' and ']' to allow
2050 // using them for Unix shell like matching)
2051 pattern += wxT('\\');
2052 // fall through
2053
2054 default:
2055 pattern += *pszMask;
2056 }
2057
2058 pszMask++;
2059 }
2060 pattern += wxT('$');
2061
2062 // and now use it
2063 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2064 #else // !wxUSE_REGEX
2065 // TODO: this is, of course, awfully inefficient...
2066
2067 // FIXME-UTF8: implement using iterators, remove #if
2068 #if wxUSE_UNICODE_UTF8
2069 const wxScopedWCharBuffer maskBuf = mask.wc_str();
2070 const wxScopedWCharBuffer txtBuf = wc_str();
2071 const wxChar *pszMask = maskBuf.data();
2072 const wxChar *pszTxt = txtBuf.data();
2073 #else
2074 const wxChar *pszMask = mask.wx_str();
2075 // the char currently being checked
2076 const wxChar *pszTxt = wx_str();
2077 #endif
2078
2079 // the last location where '*' matched
2080 const wxChar *pszLastStarInText = NULL;
2081 const wxChar *pszLastStarInMask = NULL;
2082
2083 match:
2084 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
2085 switch ( *pszMask ) {
2086 case wxT('?'):
2087 if ( *pszTxt == wxT('\0') )
2088 return false;
2089
2090 // pszTxt and pszMask will be incremented in the loop statement
2091
2092 break;
2093
2094 case wxT('*'):
2095 {
2096 // remember where we started to be able to backtrack later
2097 pszLastStarInText = pszTxt;
2098 pszLastStarInMask = pszMask;
2099
2100 // ignore special chars immediately following this one
2101 // (should this be an error?)
2102 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
2103 pszMask++;
2104
2105 // if there is nothing more, match
2106 if ( *pszMask == wxT('\0') )
2107 return true;
2108
2109 // are there any other metacharacters in the mask?
2110 size_t uiLenMask;
2111 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2112
2113 if ( pEndMask != NULL ) {
2114 // we have to match the string between two metachars
2115 uiLenMask = pEndMask - pszMask;
2116 }
2117 else {
2118 // we have to match the remainder of the string
2119 uiLenMask = wxStrlen(pszMask);
2120 }
2121
2122 wxString strToMatch(pszMask, uiLenMask);
2123 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2124 if ( pMatch == NULL )
2125 return false;
2126
2127 // -1 to compensate "++" in the loop
2128 pszTxt = pMatch + uiLenMask - 1;
2129 pszMask += uiLenMask - 1;
2130 }
2131 break;
2132
2133 default:
2134 if ( *pszMask != *pszTxt )
2135 return false;
2136 break;
2137 }
2138 }
2139
2140 // match only if nothing left
2141 if ( *pszTxt == wxT('\0') )
2142 return true;
2143
2144 // if we failed to match, backtrack if we can
2145 if ( pszLastStarInText ) {
2146 pszTxt = pszLastStarInText + 1;
2147 pszMask = pszLastStarInMask;
2148
2149 pszLastStarInText = NULL;
2150
2151 // don't bother resetting pszLastStarInMask, it's unnecessary
2152
2153 goto match;
2154 }
2155
2156 return false;
2157 #endif // wxUSE_REGEX/!wxUSE_REGEX
2158 }
2159
2160 // Count the number of chars
2161 int wxString::Freq(wxUniChar ch) const
2162 {
2163 int count = 0;
2164 for ( const_iterator i = begin(); i != end(); ++i )
2165 {
2166 if ( *i == ch )
2167 count ++;
2168 }
2169 return count;
2170 }
2171