made wxString::Replace, Matches and Find work with any form of string argument
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 /*
14 * About ref counting:
15 * 1) all empty strings use g_strEmpty, nRefs = -1 (set in Init())
16 * 2) AllocBuffer() sets nRefs to 1, Lock() increments it by one
17 * 3) Unlock() decrements nRefs and frees memory if it goes to 0
18 */
19
20 // ===========================================================================
21 // headers, declarations, constants
22 // ===========================================================================
23
24 // For compilers that support precompilation, includes "wx.h".
25 #include "wx/wxprec.h"
26
27 #ifdef __BORLANDC__
28 #pragma hdrstop
29 #endif
30
31 #ifndef WX_PRECOMP
32 #include "wx/string.h"
33 #endif
34
35 #include <ctype.h>
36
37 #ifndef __WXWINCE__
38 #include <errno.h>
39 #endif
40
41 #include <string.h>
42 #include <stdlib.h>
43
44 #ifdef __SALFORDC__
45 #include <clib.h>
46 #endif
47
48
49 // string handling functions used by wxString:
50 #if wxUSE_UNICODE_UTF8
51 #define wxStringMemcpy memcpy
52 #define wxStringMemcmp memcmp
53 #define wxStringMemchr memchr
54 #define wxStringStrlen strlen
55 #else
56 #define wxStringMemcpy wxTmemcpy
57 #define wxStringMemcmp wxTmemcmp
58 #define wxStringMemchr wxTmemchr
59 #define wxStringStrlen wxStrlen
60 #endif
61
62
63 // ---------------------------------------------------------------------------
64 // static class variables definition
65 // ---------------------------------------------------------------------------
66
67 //According to STL _must_ be a -1 size_t
68 const size_t wxString::npos = (size_t) -1;
69
70 // ----------------------------------------------------------------------------
71 // global functions
72 // ----------------------------------------------------------------------------
73
74 #if wxUSE_STD_IOSTREAM
75
76 #include <iostream>
77
78 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
79 {
80 // FIXME-UTF8: always, not only if wxUSE_UNICODE
81 #if wxUSE_UNICODE && !defined(__BORLANDC__)
82 return os << str.AsWChar();
83 #else
84 return os << str.AsChar();
85 #endif
86 }
87
88 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
89 {
90 return os << str.c_str();
91 }
92
93 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
94 {
95 return os << str.data();
96 }
97
98 #ifndef __BORLANDC__
99 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
100 {
101 return os << str.data();
102 }
103 #endif
104
105 #endif // wxUSE_STD_IOSTREAM
106
107 // ===========================================================================
108 // wxString class core
109 // ===========================================================================
110
111 // ---------------------------------------------------------------------------
112 // construction and conversion
113 // ---------------------------------------------------------------------------
114
115 #if wxUSE_UNICODE
116 /* static */
117 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
118 const wxMBConv& conv)
119 {
120 // anything to do?
121 if ( !psz || nLength == 0 )
122 return SubstrBufFromMB();
123
124 if ( nLength == npos )
125 nLength = wxNO_LEN;
126
127 size_t wcLen;
128 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
129 if ( !wcLen )
130 return SubstrBufFromMB();
131 else
132 return SubstrBufFromMB(wcBuf, wcLen);
133 }
134 #else
135 /* static */
136 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
137 const wxMBConv& conv)
138 {
139 // anything to do?
140 if ( !pwz || nLength == 0 )
141 return SubstrBufFromWC();
142
143 if ( nLength == npos )
144 nLength = wxNO_LEN;
145
146 size_t mbLen;
147 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
148 if ( !mbLen )
149 return SubstrBufFromWC();
150 else
151 return SubstrBufFromWC(mbBuf, mbLen);
152 }
153 #endif
154
155
156 #if wxUSE_UNICODE
157
158 //Convert wxString in Unicode mode to a multi-byte string
159 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
160 {
161 return conv.cWC2MB(c_str(), length() + 1 /* size, not length */, NULL);
162 }
163
164 #else // ANSI
165
166 #if wxUSE_WCHAR_T
167
168 //Converts this string to a wide character string if unicode
169 //mode is not enabled and wxUSE_WCHAR_T is enabled
170 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
171 {
172 return conv.cMB2WC(c_str(), length() + 1 /* size, not length */, NULL);
173 }
174
175 #endif // wxUSE_WCHAR_T
176
177 #endif // Unicode/ANSI
178
179 // shrink to minimal size (releasing extra memory)
180 bool wxString::Shrink()
181 {
182 wxString tmp(begin(), end());
183 swap(tmp);
184 return tmp.length() == length();
185 }
186
187 // deprecated compatibility code:
188 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
189 wxChar *wxString::GetWriteBuf(size_t nLen)
190 {
191 return DoGetWriteBuf(nLen);
192 }
193
194 void wxString::UngetWriteBuf()
195 {
196 DoUngetWriteBuf();
197 }
198
199 void wxString::UngetWriteBuf(size_t nLen)
200 {
201 DoUngetWriteBuf(nLen);
202 }
203 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
204
205
206 // ---------------------------------------------------------------------------
207 // data access
208 // ---------------------------------------------------------------------------
209
210 // all functions are inline in string.h
211
212 // ---------------------------------------------------------------------------
213 // concatenation operators
214 // ---------------------------------------------------------------------------
215
216 /*
217 * concatenation functions come in 5 flavours:
218 * string + string
219 * char + string and string + char
220 * C str + string and string + C str
221 */
222
223 wxString operator+(const wxString& str1, const wxString& str2)
224 {
225 #if !wxUSE_STL_BASED_WXSTRING
226 wxASSERT( str1.IsValid() );
227 wxASSERT( str2.IsValid() );
228 #endif
229
230 wxString s = str1;
231 s += str2;
232
233 return s;
234 }
235
236 wxString operator+(const wxString& str, wxUniChar ch)
237 {
238 #if !wxUSE_STL_BASED_WXSTRING
239 wxASSERT( str.IsValid() );
240 #endif
241
242 wxString s = str;
243 s += ch;
244
245 return s;
246 }
247
248 wxString operator+(wxUniChar ch, const wxString& str)
249 {
250 #if !wxUSE_STL_BASED_WXSTRING
251 wxASSERT( str.IsValid() );
252 #endif
253
254 wxString s = ch;
255 s += str;
256
257 return s;
258 }
259
260 wxString operator+(const wxString& str, const char *psz)
261 {
262 #if !wxUSE_STL_BASED_WXSTRING
263 wxASSERT( str.IsValid() );
264 #endif
265
266 wxString s;
267 if ( !s.Alloc(strlen(psz) + str.length()) ) {
268 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
269 }
270 s += str;
271 s += psz;
272
273 return s;
274 }
275
276 wxString operator+(const wxString& str, const wchar_t *pwz)
277 {
278 #if !wxUSE_STL_BASED_WXSTRING
279 wxASSERT( str.IsValid() );
280 #endif
281
282 wxString s;
283 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
284 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
285 }
286 s += str;
287 s += pwz;
288
289 return s;
290 }
291
292 wxString operator+(const char *psz, const wxString& str)
293 {
294 #if !wxUSE_STL_BASED_WXSTRING
295 wxASSERT( str.IsValid() );
296 #endif
297
298 wxString s;
299 if ( !s.Alloc(strlen(psz) + str.length()) ) {
300 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
301 }
302 s = psz;
303 s += str;
304
305 return s;
306 }
307
308 wxString operator+(const wchar_t *pwz, const wxString& str)
309 {
310 #if !wxUSE_STL_BASED_WXSTRING
311 wxASSERT( str.IsValid() );
312 #endif
313
314 wxString s;
315 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
316 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
317 }
318 s = pwz;
319 s += str;
320
321 return s;
322 }
323
324 // ---------------------------------------------------------------------------
325 // string comparison
326 // ---------------------------------------------------------------------------
327
328 #ifdef HAVE_STD_STRING_COMPARE
329
330 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
331 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
332 // sort strings in characters code point order by sorting the byte sequence
333 // in byte values order (i.e. what strcmp() and memcmp() do).
334
335 int wxString::compare(const wxString& str) const
336 {
337 return m_impl.compare(str.m_impl);
338 }
339
340 int wxString::compare(size_t nStart, size_t nLen,
341 const wxString& str) const
342 {
343 size_t pos, len;
344 PosLenToImpl(nStart, nLen, &pos, &len);
345 return m_impl.compare(pos, len, str.m_impl);
346 }
347
348 int wxString::compare(size_t nStart, size_t nLen,
349 const wxString& str,
350 size_t nStart2, size_t nLen2) const
351 {
352 size_t pos, len;
353 PosLenToImpl(nStart, nLen, &pos, &len);
354
355 size_t pos2, len2;
356 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
357
358 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
359 }
360
361 int wxString::compare(const char* sz) const
362 {
363 return m_impl.compare(ImplStr(sz));
364 }
365
366 int wxString::compare(const wchar_t* sz) const
367 {
368 return m_impl.compare(ImplStr(sz));
369 }
370
371 int wxString::compare(size_t nStart, size_t nLen,
372 const char* sz, size_t nCount) const
373 {
374 size_t pos, len;
375 PosLenToImpl(nStart, nLen, &pos, &len);
376
377 SubstrBufFromMB str(ImplStr(sz, nCount));
378
379 return m_impl.compare(pos, len, str.data, str.len);
380 }
381
382 int wxString::compare(size_t nStart, size_t nLen,
383 const wchar_t* sz, size_t nCount) const
384 {
385 size_t pos, len;
386 PosLenToImpl(nStart, nLen, &pos, &len);
387
388 SubstrBufFromWC str(ImplStr(sz, nCount));
389
390 return m_impl.compare(pos, len, str.data, str.len);
391 }
392
393 #else // !HAVE_STD_STRING_COMPARE
394
395 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
396 const wxStringCharType* s2, size_t l2)
397 {
398 if( l1 == l2 )
399 return wxStringMemcmp(s1, s2, l1);
400 else if( l1 < l2 )
401 {
402 int ret = wxStringMemcmp(s1, s2, l1);
403 return ret == 0 ? -1 : ret;
404 }
405 else
406 {
407 int ret = wxStringMemcmp(s1, s2, l2);
408 return ret == 0 ? +1 : ret;
409 }
410 }
411
412 int wxString::compare(const wxString& str) const
413 {
414 return ::wxDoCmp(m_impl.data(), m_impl.length(),
415 str.m_impl.data(), str.m_impl.length());
416 }
417
418 int wxString::compare(size_t nStart, size_t nLen,
419 const wxString& str) const
420 {
421 wxASSERT(nStart <= length());
422 size_type strLen = length() - nStart;
423 nLen = strLen < nLen ? strLen : nLen;
424
425 size_t pos, len;
426 PosLenToImpl(nStart, nLen, &pos, &len);
427
428 return ::wxDoCmp(m_impl.data() + pos, len,
429 str.m_impl.data(), str.m_impl.length());
430 }
431
432 int wxString::compare(size_t nStart, size_t nLen,
433 const wxString& str,
434 size_t nStart2, size_t nLen2) const
435 {
436 wxASSERT(nStart <= length());
437 wxASSERT(nStart2 <= str.length());
438 size_type strLen = length() - nStart,
439 strLen2 = str.length() - nStart2;
440 nLen = strLen < nLen ? strLen : nLen;
441 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
442
443 size_t pos, len;
444 PosLenToImpl(nStart, nLen, &pos, &len);
445 size_t pos2, len2;
446 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
447
448 return ::wxDoCmp(m_impl.data() + pos, len,
449 str.m_impl.data() + pos2, len2);
450 }
451
452 int wxString::compare(const char* sz) const
453 {
454 SubstrBufFromMB str(ImplStr(sz, npos));
455 if ( str.len == npos )
456 str.len = wxStringStrlen(str.data);
457 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
458 }
459
460 int wxString::compare(const wchar_t* sz) const
461 {
462 SubstrBufFromWC str(ImplStr(sz, npos));
463 if ( str.len == npos )
464 str.len = wxStringStrlen(str.data);
465 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
466 }
467
468 int wxString::compare(size_t nStart, size_t nLen,
469 const char* sz, size_t nCount) const
470 {
471 wxASSERT(nStart <= length());
472 size_type strLen = length() - nStart;
473 nLen = strLen < nLen ? strLen : nLen;
474
475 size_t pos, len;
476 PosLenToImpl(nStart, nLen, &pos, &len);
477
478 SubstrBufFromMB str(ImplStr(sz, nCount));
479 if ( str.len == npos )
480 str.len = wxStringStrlen(str.data);
481
482 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
483 }
484
485 int wxString::compare(size_t nStart, size_t nLen,
486 const wchar_t* sz, size_t nCount) const
487 {
488 wxASSERT(nStart <= length());
489 size_type strLen = length() - nStart;
490 nLen = strLen < nLen ? strLen : nLen;
491
492 size_t pos, len;
493 PosLenToImpl(nStart, nLen, &pos, &len);
494
495 SubstrBufFromWC str(ImplStr(sz, nCount));
496 if ( str.len == npos )
497 str.len = wxStringStrlen(str.data);
498
499 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
500 }
501
502 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
503
504
505 // ---------------------------------------------------------------------------
506 // find_{first,last}_[not]_of functions
507 // ---------------------------------------------------------------------------
508
509 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
510
511 // NB: All these functions are implemented with the argument being wxChar*,
512 // i.e. widechar string in any Unicode build, even though native string
513 // representation is char* in the UTF-8 build. This is because we couldn't
514 // use memchr() to determine if a character is in a set encoded as UTF-8.
515
516 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
517 {
518 return find_first_of(sz, nStart, wxStrlen(sz));
519 }
520
521 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
522 {
523 return find_first_not_of(sz, nStart, wxStrlen(sz));
524 }
525
526 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
527 {
528 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
529
530 size_t idx = nStart;
531 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
532 {
533 if ( wxTmemchr(sz, *i, n) )
534 return idx;
535 }
536
537 return npos;
538 }
539
540 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
541 {
542 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
543
544 size_t idx = nStart;
545 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
546 {
547 if ( !wxTmemchr(sz, *i, n) )
548 return idx;
549 }
550
551 return npos;
552 }
553
554
555 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
556 {
557 return find_last_of(sz, nStart, wxStrlen(sz));
558 }
559
560 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
561 {
562 return find_last_not_of(sz, nStart, wxStrlen(sz));
563 }
564
565 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
566 {
567 size_t len = length();
568
569 if ( nStart == npos )
570 {
571 nStart = len - 1;
572 }
573 else
574 {
575 wxASSERT_MSG( nStart <= len, _T("invalid index") );
576 }
577
578 size_t idx = nStart;
579 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
580 i != rend(); --idx, ++i )
581 {
582 if ( wxTmemchr(sz, *i, n) )
583 return idx;
584 }
585
586 return npos;
587 }
588
589 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
590 {
591 size_t len = length();
592
593 if ( nStart == npos )
594 {
595 nStart = len - 1;
596 }
597 else
598 {
599 wxASSERT_MSG( nStart <= len, _T("invalid index") );
600 }
601
602 size_t idx = nStart;
603 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
604 i != rend(); --idx, ++i )
605 {
606 if ( !wxTmemchr(sz, *i, n) )
607 return idx;
608 }
609
610 return npos;
611 }
612
613 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
614 {
615 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
616
617 size_t idx = nStart;
618 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
619 {
620 if ( *i != ch )
621 return idx;
622 }
623
624 return npos;
625 }
626
627 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
628 {
629 size_t len = length();
630
631 if ( nStart == npos )
632 {
633 nStart = len - 1;
634 }
635 else
636 {
637 wxASSERT_MSG( nStart <= len, _T("invalid index") );
638 }
639
640 size_t idx = nStart;
641 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
642 i != rend(); --idx, ++i )
643 {
644 if ( *i != ch )
645 return idx;
646 }
647
648 return npos;
649 }
650
651 // the functions above were implemented for wchar_t* arguments in Unicode
652 // build and char* in ANSI build; below are implementations for the other
653 // version:
654 #if wxUSE_UNICODE
655 #define wxOtherCharType char
656 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
657 #else
658 #define wxOtherCharType wchar_t
659 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
660 #endif
661
662 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
663 { return find_first_of(STRCONV(sz), nStart); }
664
665 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
666 size_t n) const
667 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
668 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
669 { return find_last_of(STRCONV(sz), nStart); }
670 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
671 size_t n) const
672 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
673 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
674 { return find_first_not_of(STRCONV(sz), nStart); }
675 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
676 size_t n) const
677 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
678 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
679 { return find_last_not_of(STRCONV(sz), nStart); }
680 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
681 size_t n) const
682 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
683
684 #undef wxOtherCharType
685 #undef STRCONV
686
687 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
688
689 // ===========================================================================
690 // other common string functions
691 // ===========================================================================
692
693 int wxString::CmpNoCase(const wxString& s) const
694 {
695 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
696
697 size_t idx = 0;
698 const_iterator i1 = begin();
699 const_iterator end1 = end();
700 const_iterator i2 = s.begin();
701 const_iterator end2 = s.end();
702
703 for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
704 {
705 wxUniChar lower1 = (wxChar)wxTolower(*i1);
706 wxUniChar lower2 = (wxChar)wxTolower(*i2);
707 if ( lower1 != lower2 )
708 return lower1 < lower2 ? -1 : 1;
709 }
710
711 size_t len1 = length();
712 size_t len2 = s.length();
713
714 if ( len1 < len2 )
715 return -1;
716 else if ( len1 > len2 )
717 return 1;
718 return 0;
719 }
720
721
722 #if wxUSE_UNICODE
723
724 #ifdef __MWERKS__
725 #ifndef __SCHAR_MAX__
726 #define __SCHAR_MAX__ 127
727 #endif
728 #endif
729
730 wxString wxString::FromAscii(const char *ascii)
731 {
732 if (!ascii)
733 return wxEmptyString;
734
735 size_t len = strlen( ascii );
736 wxString res;
737
738 if ( len )
739 {
740 wxStringBuffer buf(res, len);
741
742 wchar_t *dest = buf;
743
744 for ( ;; )
745 {
746 if ( (*dest++ = (wchar_t)(unsigned char)*ascii++) == L'\0' )
747 break;
748 }
749 }
750
751 return res;
752 }
753
754 wxString wxString::FromAscii(const char ascii)
755 {
756 // What do we do with '\0' ?
757
758 wxString res;
759 res += (wchar_t)(unsigned char) ascii;
760
761 return res;
762 }
763
764 const wxCharBuffer wxString::ToAscii() const
765 {
766 // this will allocate enough space for the terminating NUL too
767 wxCharBuffer buffer(length());
768
769
770 char *dest = buffer.data();
771
772 const wchar_t *pwc = c_str();
773 for ( ;; )
774 {
775 *dest++ = (char)(*pwc > SCHAR_MAX ? wxT('_') : *pwc);
776
777 // the output string can't have embedded NULs anyhow, so we can safely
778 // stop at first of them even if we do have any
779 if ( !*pwc++ )
780 break;
781 }
782
783 return buffer;
784 }
785
786 #endif // Unicode
787
788 // extract string of length nCount starting at nFirst
789 wxString wxString::Mid(size_t nFirst, size_t nCount) const
790 {
791 size_t nLen = length();
792
793 // default value of nCount is npos and means "till the end"
794 if ( nCount == npos )
795 {
796 nCount = nLen - nFirst;
797 }
798
799 // out-of-bounds requests return sensible things
800 if ( nFirst + nCount > nLen )
801 {
802 nCount = nLen - nFirst;
803 }
804
805 if ( nFirst > nLen )
806 {
807 // AllocCopy() will return empty string
808 return wxEmptyString;
809 }
810
811 wxString dest(*this, nFirst, nCount);
812 if ( dest.length() != nCount )
813 {
814 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
815 }
816
817 return dest;
818 }
819
820 // check that the string starts with prefix and return the rest of the string
821 // in the provided pointer if it is not NULL, otherwise return false
822 bool wxString::StartsWith(const wxChar *prefix, wxString *rest) const
823 {
824 wxASSERT_MSG( prefix, _T("invalid parameter in wxString::StartsWith") );
825
826 // first check if the beginning of the string matches the prefix: note
827 // that we don't have to check that we don't run out of this string as
828 // when we reach the terminating NUL, either prefix string ends too (and
829 // then it's ok) or we break out of the loop because there is no match
830 const wxChar *p = c_str();
831 while ( *prefix )
832 {
833 if ( *prefix++ != *p++ )
834 {
835 // no match
836 return false;
837 }
838 }
839
840 if ( rest )
841 {
842 // put the rest of the string into provided pointer
843 *rest = p;
844 }
845
846 return true;
847 }
848
849
850 // check that the string ends with suffix and return the rest of it in the
851 // provided pointer if it is not NULL, otherwise return false
852 bool wxString::EndsWith(const wxChar *suffix, wxString *rest) const
853 {
854 wxASSERT_MSG( suffix, _T("invalid parameter in wxString::EndssWith") );
855
856 int start = length() - wxStrlen(suffix);
857 if ( start < 0 || wxStrcmp(wx_str() + start, suffix) != 0 )
858 return false;
859
860 if ( rest )
861 {
862 // put the rest of the string into provided pointer
863 rest->assign(*this, 0, start);
864 }
865
866 return true;
867 }
868
869
870 // extract nCount last (rightmost) characters
871 wxString wxString::Right(size_t nCount) const
872 {
873 if ( nCount > length() )
874 nCount = length();
875
876 wxString dest(*this, length() - nCount, nCount);
877 if ( dest.length() != nCount ) {
878 wxFAIL_MSG( _T("out of memory in wxString::Right") );
879 }
880 return dest;
881 }
882
883 // get all characters after the last occurence of ch
884 // (returns the whole string if ch not found)
885 wxString wxString::AfterLast(wxUniChar ch) const
886 {
887 wxString str;
888 int iPos = Find(ch, true);
889 if ( iPos == wxNOT_FOUND )
890 str = *this;
891 else
892 str = wx_str() + iPos + 1;
893
894 return str;
895 }
896
897 // extract nCount first (leftmost) characters
898 wxString wxString::Left(size_t nCount) const
899 {
900 if ( nCount > length() )
901 nCount = length();
902
903 wxString dest(*this, 0, nCount);
904 if ( dest.length() != nCount ) {
905 wxFAIL_MSG( _T("out of memory in wxString::Left") );
906 }
907 return dest;
908 }
909
910 // get all characters before the first occurence of ch
911 // (returns the whole string if ch not found)
912 wxString wxString::BeforeFirst(wxUniChar ch) const
913 {
914 int iPos = Find(ch);
915 if ( iPos == wxNOT_FOUND ) iPos = length();
916 return wxString(*this, 0, iPos);
917 }
918
919 /// get all characters before the last occurence of ch
920 /// (returns empty string if ch not found)
921 wxString wxString::BeforeLast(wxUniChar ch) const
922 {
923 wxString str;
924 int iPos = Find(ch, true);
925 if ( iPos != wxNOT_FOUND && iPos != 0 )
926 str = wxString(c_str(), iPos);
927
928 return str;
929 }
930
931 /// get all characters after the first occurence of ch
932 /// (returns empty string if ch not found)
933 wxString wxString::AfterFirst(wxUniChar ch) const
934 {
935 wxString str;
936 int iPos = Find(ch);
937 if ( iPos != wxNOT_FOUND )
938 str = wx_str() + iPos + 1;
939
940 return str;
941 }
942
943 // replace first (or all) occurences of some substring with another one
944 size_t wxString::Replace(const wxString& strOld,
945 const wxString& strNew, bool bReplaceAll)
946 {
947 // if we tried to replace an empty string we'd enter an infinite loop below
948 wxCHECK_MSG( !strOld.empty(), 0,
949 _T("wxString::Replace(): invalid parameter") );
950
951 size_t uiCount = 0; // count of replacements made
952
953 size_t uiOldLen = strOld.length();
954 size_t uiNewLen = strNew.length();
955
956 size_t dwPos = 0;
957
958 while ( (*this)[dwPos] != wxT('\0') )
959 {
960 //DO NOT USE STRSTR HERE
961 //this string can contain embedded null characters,
962 //so strstr will function incorrectly
963 dwPos = find(strOld, dwPos);
964 if ( dwPos == npos )
965 break; // exit the loop
966 else
967 {
968 //replace this occurance of the old string with the new one
969 replace(dwPos, uiOldLen, strNew, uiNewLen);
970
971 //move up pos past the string that was replaced
972 dwPos += uiNewLen;
973
974 //increase replace count
975 ++uiCount;
976
977 // stop now?
978 if ( !bReplaceAll )
979 break; // exit the loop
980 }
981 }
982
983 return uiCount;
984 }
985
986 bool wxString::IsAscii() const
987 {
988 const wxChar *s = (const wxChar*) *this;
989 while(*s){
990 if(!isascii(*s)) return(false);
991 s++;
992 }
993 return(true);
994 }
995
996 bool wxString::IsWord() const
997 {
998 const wxChar *s = (const wxChar*) *this;
999 while(*s){
1000 if(!wxIsalpha(*s)) return(false);
1001 s++;
1002 }
1003 return(true);
1004 }
1005
1006 bool wxString::IsNumber() const
1007 {
1008 const wxChar *s = (const wxChar*) *this;
1009 if (wxStrlen(s))
1010 if ((s[0] == wxT('-')) || (s[0] == wxT('+'))) s++;
1011 while(*s){
1012 if(!wxIsdigit(*s)) return(false);
1013 s++;
1014 }
1015 return(true);
1016 }
1017
1018 wxString wxString::Strip(stripType w) const
1019 {
1020 wxString s = *this;
1021 if ( w & leading ) s.Trim(false);
1022 if ( w & trailing ) s.Trim(true);
1023 return s;
1024 }
1025
1026 // ---------------------------------------------------------------------------
1027 // case conversion
1028 // ---------------------------------------------------------------------------
1029
1030 wxString& wxString::MakeUpper()
1031 {
1032 for ( iterator it = begin(), en = end(); it != en; ++it )
1033 *it = (wxChar)wxToupper(*it);
1034
1035 return *this;
1036 }
1037
1038 wxString& wxString::MakeLower()
1039 {
1040 for ( iterator it = begin(), en = end(); it != en; ++it )
1041 *it = (wxChar)wxTolower(*it);
1042
1043 return *this;
1044 }
1045
1046 // ---------------------------------------------------------------------------
1047 // trimming and padding
1048 // ---------------------------------------------------------------------------
1049
1050 // some compilers (VC++ 6.0 not to name them) return true for a call to
1051 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1052 // live with this by checking that the character is a 7 bit one - even if this
1053 // may fail to detect some spaces (I don't know if Unicode doesn't have
1054 // space-like symbols somewhere except in the first 128 chars), it is arguably
1055 // still better than trimming away accented letters
1056 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1057
1058 // trims spaces (in the sense of isspace) from left or right side
1059 wxString& wxString::Trim(bool bFromRight)
1060 {
1061 // first check if we're going to modify the string at all
1062 if ( !empty() &&
1063 (
1064 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1065 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1066 )
1067 )
1068 {
1069 if ( bFromRight )
1070 {
1071 // find last non-space character
1072 reverse_iterator psz = rbegin();
1073 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1074 psz++;
1075
1076 // truncate at trailing space start
1077 erase(psz.base(), end());
1078 }
1079 else
1080 {
1081 // find first non-space character
1082 iterator psz = begin();
1083 while ( (psz != end()) && wxSafeIsspace(*psz) )
1084 psz++;
1085
1086 // fix up data and length
1087 erase(begin(), psz);
1088 }
1089 }
1090
1091 return *this;
1092 }
1093
1094 // adds nCount characters chPad to the string from either side
1095 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1096 {
1097 wxString s(chPad, nCount);
1098
1099 if ( bFromRight )
1100 *this += s;
1101 else
1102 {
1103 s += *this;
1104 swap(s);
1105 }
1106
1107 return *this;
1108 }
1109
1110 // truncate the string
1111 wxString& wxString::Truncate(size_t uiLen)
1112 {
1113 if ( uiLen < length() )
1114 {
1115 erase(begin() + uiLen, end());
1116 }
1117 //else: nothing to do, string is already short enough
1118
1119 return *this;
1120 }
1121
1122 // ---------------------------------------------------------------------------
1123 // finding (return wxNOT_FOUND if not found and index otherwise)
1124 // ---------------------------------------------------------------------------
1125
1126 // find a character
1127 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1128 {
1129 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1130
1131 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1132 }
1133
1134 // ----------------------------------------------------------------------------
1135 // conversion to numbers
1136 // ----------------------------------------------------------------------------
1137
1138 // the implementation of all the functions below is exactly the same so factor
1139 // it out
1140
1141 template <typename T, typename F>
1142 bool wxStringToIntType(const wxChar *start,
1143 T *val,
1144 int base,
1145 F func)
1146 {
1147 wxCHECK_MSG( val, false, _T("NULL output pointer") );
1148 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1149
1150 #ifndef __WXWINCE__
1151 errno = 0;
1152 #endif
1153
1154 wxChar *end;
1155 *val = (*func)(start, &end, base);
1156
1157 // return true only if scan was stopped by the terminating NUL and if the
1158 // string was not empty to start with and no under/overflow occurred
1159 return !*end && (end != start)
1160 #ifndef __WXWINCE__
1161 && (errno != ERANGE)
1162 #endif
1163 ;
1164 }
1165
1166 bool wxString::ToLong(long *val, int base) const
1167 {
1168 return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtol);
1169 }
1170
1171 bool wxString::ToULong(unsigned long *val, int base) const
1172 {
1173 return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoul);
1174 }
1175
1176 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1177 {
1178 #ifdef wxHAS_STRTOLL
1179 return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoll);
1180 #else
1181 // TODO: implement this ourselves
1182 wxUnusedVar(val);
1183 wxUnusedVar(base);
1184 return false;
1185 #endif // wxHAS_STRTOLL
1186 }
1187
1188 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1189 {
1190 #ifdef wxHAS_STRTOLL
1191 return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoull);
1192 #else
1193 // TODO: implement this ourselves
1194 wxUnusedVar(val);
1195 wxUnusedVar(base);
1196 return false;
1197 #endif
1198 }
1199
1200 bool wxString::ToDouble(double *val) const
1201 {
1202 wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1203
1204 #ifndef __WXWINCE__
1205 errno = 0;
1206 #endif
1207
1208 const wxChar *start = c_str();
1209 wxChar *end;
1210 *val = wxStrtod(start, &end);
1211
1212 // return true only if scan was stopped by the terminating NUL and if the
1213 // string was not empty to start with and no under/overflow occurred
1214 return !*end && (end != start)
1215 #ifndef __WXWINCE__
1216 && (errno != ERANGE)
1217 #endif
1218 ;
1219 }
1220
1221 // ---------------------------------------------------------------------------
1222 // formatted output
1223 // ---------------------------------------------------------------------------
1224
1225 /* static */
1226 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1227 wxString wxStringPrintfMixinBase::DoFormat(const wxChar *format, ...)
1228 #else
1229 wxString wxString::DoFormat(const wxChar *format, ...)
1230 #endif
1231 {
1232 va_list argptr;
1233 va_start(argptr, format);
1234
1235 wxString s;
1236 s.PrintfV(format, argptr);
1237
1238 va_end(argptr);
1239
1240 return s;
1241 }
1242
1243 /* static */
1244 wxString wxString::FormatV(const wxString& format, va_list argptr)
1245 {
1246 wxString s;
1247 s.PrintfV(format, argptr);
1248 return s;
1249 }
1250
1251 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1252 int wxStringPrintfMixinBase::DoPrintf(const wxChar *format, ...)
1253 #else
1254 int wxString::DoPrintf(const wxChar *format, ...)
1255 #endif
1256 {
1257 va_list argptr;
1258 va_start(argptr, format);
1259
1260 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1261 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1262 // because it's the only cast that works safely for downcasting when
1263 // multiple inheritance is used:
1264 wxString *str = static_cast<wxString*>(this);
1265 #else
1266 wxString *str = this;
1267 #endif
1268
1269 int iLen = str->PrintfV(format, argptr);
1270
1271 va_end(argptr);
1272
1273 return iLen;
1274 }
1275
1276 int wxString::PrintfV(const wxString& format, va_list argptr)
1277 {
1278 int size = 1024;
1279
1280 for ( ;; )
1281 {
1282 wxStringBuffer tmp(*this, size + 1);
1283 wxChar *buf = tmp;
1284
1285 if ( !buf )
1286 {
1287 // out of memory
1288 return -1;
1289 }
1290
1291 // wxVsnprintf() may modify the original arg pointer, so pass it
1292 // only a copy
1293 va_list argptrcopy;
1294 wxVaCopy(argptrcopy, argptr);
1295 int len = wxVsnprintf(buf, size, format, argptrcopy);
1296 va_end(argptrcopy);
1297
1298 // some implementations of vsnprintf() don't NUL terminate
1299 // the string if there is not enough space for it so
1300 // always do it manually
1301 buf[size] = _T('\0');
1302
1303 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1304 // total number of characters which would have been written if the
1305 // buffer were large enough (newer standards such as Unix98)
1306 if ( len < 0 )
1307 {
1308 #if wxUSE_WXVSNPRINTF
1309 // we know that our own implementation of wxVsnprintf() returns -1
1310 // only for a format error - thus there's something wrong with
1311 // the user's format string
1312 return -1;
1313 #else // assume that system version only returns error if not enough space
1314 // still not enough, as we don't know how much we need, double the
1315 // current size of the buffer
1316 size *= 2;
1317 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1318 }
1319 else if ( len >= size )
1320 {
1321 #if wxUSE_WXVSNPRINTF
1322 // we know that our own implementation of wxVsnprintf() returns
1323 // size+1 when there's not enough space but that's not the size
1324 // of the required buffer!
1325 size *= 2; // so we just double the current size of the buffer
1326 #else
1327 // some vsnprintf() implementations NUL-terminate the buffer and
1328 // some don't in len == size case, to be safe always add 1
1329 size = len + 1;
1330 #endif
1331 }
1332 else // ok, there was enough space
1333 {
1334 break;
1335 }
1336 }
1337
1338 // we could have overshot
1339 Shrink();
1340
1341 return length();
1342 }
1343
1344 // ----------------------------------------------------------------------------
1345 // misc other operations
1346 // ----------------------------------------------------------------------------
1347
1348 // returns true if the string matches the pattern which may contain '*' and
1349 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1350 // of them)
1351 bool wxString::Matches(const wxString& mask) const
1352 {
1353 // I disable this code as it doesn't seem to be faster (in fact, it seems
1354 // to be much slower) than the old, hand-written code below and using it
1355 // here requires always linking with libregex even if the user code doesn't
1356 // use it
1357 #if 0 // wxUSE_REGEX
1358 // first translate the shell-like mask into a regex
1359 wxString pattern;
1360 pattern.reserve(wxStrlen(pszMask));
1361
1362 pattern += _T('^');
1363 while ( *pszMask )
1364 {
1365 switch ( *pszMask )
1366 {
1367 case _T('?'):
1368 pattern += _T('.');
1369 break;
1370
1371 case _T('*'):
1372 pattern += _T(".*");
1373 break;
1374
1375 case _T('^'):
1376 case _T('.'):
1377 case _T('$'):
1378 case _T('('):
1379 case _T(')'):
1380 case _T('|'):
1381 case _T('+'):
1382 case _T('\\'):
1383 // these characters are special in a RE, quote them
1384 // (however note that we don't quote '[' and ']' to allow
1385 // using them for Unix shell like matching)
1386 pattern += _T('\\');
1387 // fall through
1388
1389 default:
1390 pattern += *pszMask;
1391 }
1392
1393 pszMask++;
1394 }
1395 pattern += _T('$');
1396
1397 // and now use it
1398 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1399 #else // !wxUSE_REGEX
1400 // TODO: this is, of course, awfully inefficient...
1401
1402 // FIXME-UTF8: implement using iterators, remove #if
1403 #if wxUSE_UNICODE_UTF8
1404 wxWCharBuffer maskBuf = mask.wc_str();
1405 wxWCharBuffer txtBuf = wc_str();
1406 const wxChar *pszMask = maskBuf.data();
1407 const wxChar *pszTxt = txtBuf.data();
1408 #else
1409 const wxChar *pszMask = mask.wx_str();
1410 // the char currently being checked
1411 const wxChar *pszTxt = wx_str();
1412 #endif
1413
1414 // the last location where '*' matched
1415 const wxChar *pszLastStarInText = NULL;
1416 const wxChar *pszLastStarInMask = NULL;
1417
1418 match:
1419 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1420 switch ( *pszMask ) {
1421 case wxT('?'):
1422 if ( *pszTxt == wxT('\0') )
1423 return false;
1424
1425 // pszTxt and pszMask will be incremented in the loop statement
1426
1427 break;
1428
1429 case wxT('*'):
1430 {
1431 // remember where we started to be able to backtrack later
1432 pszLastStarInText = pszTxt;
1433 pszLastStarInMask = pszMask;
1434
1435 // ignore special chars immediately following this one
1436 // (should this be an error?)
1437 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1438 pszMask++;
1439
1440 // if there is nothing more, match
1441 if ( *pszMask == wxT('\0') )
1442 return true;
1443
1444 // are there any other metacharacters in the mask?
1445 size_t uiLenMask;
1446 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1447
1448 if ( pEndMask != NULL ) {
1449 // we have to match the string between two metachars
1450 uiLenMask = pEndMask - pszMask;
1451 }
1452 else {
1453 // we have to match the remainder of the string
1454 uiLenMask = wxStrlen(pszMask);
1455 }
1456
1457 wxString strToMatch(pszMask, uiLenMask);
1458 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1459 if ( pMatch == NULL )
1460 return false;
1461
1462 // -1 to compensate "++" in the loop
1463 pszTxt = pMatch + uiLenMask - 1;
1464 pszMask += uiLenMask - 1;
1465 }
1466 break;
1467
1468 default:
1469 if ( *pszMask != *pszTxt )
1470 return false;
1471 break;
1472 }
1473 }
1474
1475 // match only if nothing left
1476 if ( *pszTxt == wxT('\0') )
1477 return true;
1478
1479 // if we failed to match, backtrack if we can
1480 if ( pszLastStarInText ) {
1481 pszTxt = pszLastStarInText + 1;
1482 pszMask = pszLastStarInMask;
1483
1484 pszLastStarInText = NULL;
1485
1486 // don't bother resetting pszLastStarInMask, it's unnecessary
1487
1488 goto match;
1489 }
1490
1491 return false;
1492 #endif // wxUSE_REGEX/!wxUSE_REGEX
1493 }
1494
1495 // Count the number of chars
1496 int wxString::Freq(wxUniChar ch) const
1497 {
1498 int count = 0;
1499 for ( const_iterator i = begin(); i != end(); ++i )
1500 {
1501 if ( *i == ch )
1502 count ++;
1503 }
1504 return count;
1505 }
1506
1507 // convert to upper case, return the copy of the string
1508 wxString wxString::Upper() const
1509 { wxString s(*this); return s.MakeUpper(); }
1510
1511 // convert to lower case, return the copy of the string
1512 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
1513