cleanup of wxString ctors and operator=: made both char* and wchar_t* versions availa...
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 /*
14 * About ref counting:
15 * 1) all empty strings use g_strEmpty, nRefs = -1 (set in Init())
16 * 2) AllocBuffer() sets nRefs to 1, Lock() increments it by one
17 * 3) Unlock() decrements nRefs and frees memory if it goes to 0
18 */
19
20 // ===========================================================================
21 // headers, declarations, constants
22 // ===========================================================================
23
24 // For compilers that support precompilation, includes "wx.h".
25 #include "wx/wxprec.h"
26
27 #ifdef __BORLANDC__
28 #pragma hdrstop
29 #endif
30
31 #ifndef WX_PRECOMP
32 #include "wx/string.h"
33 #endif
34
35 #include <ctype.h>
36
37 #ifndef __WXWINCE__
38 #include <errno.h>
39 #endif
40
41 #include <string.h>
42 #include <stdlib.h>
43
44 #ifdef __SALFORDC__
45 #include <clib.h>
46 #endif
47
48
49 // string handling functions used by wxString:
50 #if wxUSE_UNICODE_UTF8
51 #define wxStringMemcpy memcpy
52 #define wxStringMemcmp memcmp
53 #define wxStringMemchr memchr
54 #define wxStringStrlen strlen
55 #else
56 #define wxStringMemcpy wxTmemcpy
57 #define wxStringMemcmp wxTmemcmp
58 #define wxStringMemchr wxTmemchr
59 #define wxStringStrlen wxStrlen
60 #endif
61
62
63 // ---------------------------------------------------------------------------
64 // static class variables definition
65 // ---------------------------------------------------------------------------
66
67 //According to STL _must_ be a -1 size_t
68 const size_t wxString::npos = (size_t) -1;
69
70 // ----------------------------------------------------------------------------
71 // global functions
72 // ----------------------------------------------------------------------------
73
74 #if wxUSE_STD_IOSTREAM
75
76 #include <iostream>
77
78 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
79 {
80 // FIXME-UTF8: always, not only if wxUSE_UNICODE
81 #if wxUSE_UNICODE && !defined(__BORLANDC__)
82 return os << str.AsWChar();
83 #else
84 return os << str.AsChar();
85 #endif
86 }
87
88 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
89 {
90 return os << str.c_str();
91 }
92
93 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
94 {
95 return os << str.data();
96 }
97
98 #ifndef __BORLANDC__
99 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
100 {
101 return os << str.data();
102 }
103 #endif
104
105 #endif // wxUSE_STD_IOSTREAM
106
107 // ===========================================================================
108 // wxString class core
109 // ===========================================================================
110
111 // ---------------------------------------------------------------------------
112 // construction and conversion
113 // ---------------------------------------------------------------------------
114
115 #if wxUSE_UNICODE
116 /* static */
117 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
118 const wxMBConv& conv)
119 {
120 // anything to do?
121 if ( !psz || nLength == 0 )
122 return SubstrBufFromMB();
123
124 if ( nLength == npos )
125 nLength = wxNO_LEN;
126
127 size_t wcLen;
128 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
129 if ( !wcLen )
130 return SubstrBufFromMB();
131 else
132 return SubstrBufFromMB(wcBuf, wcLen);
133 }
134 #else
135 /* static */
136 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
137 const wxMBConv& conv)
138 {
139 // anything to do?
140 if ( !pwz || nLength == 0 )
141 return SubstrBufFromWC();
142
143 if ( nLength == npos )
144 nLength = wxNO_LEN;
145
146 size_t mbLen;
147 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
148 if ( !mbLen )
149 return SubstrBufFromWC();
150 else
151 return SubstrBufFromWC(mbBuf, mbLen);
152 }
153 #endif
154
155
156 #if wxUSE_UNICODE
157
158 //Convert wxString in Unicode mode to a multi-byte string
159 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
160 {
161 return conv.cWC2MB(c_str(), length() + 1 /* size, not length */, NULL);
162 }
163
164 #else // ANSI
165
166 #if wxUSE_WCHAR_T
167
168 //Converts this string to a wide character string if unicode
169 //mode is not enabled and wxUSE_WCHAR_T is enabled
170 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
171 {
172 return conv.cMB2WC(c_str(), length() + 1 /* size, not length */, NULL);
173 }
174
175 #endif // wxUSE_WCHAR_T
176
177 #endif // Unicode/ANSI
178
179 // shrink to minimal size (releasing extra memory)
180 bool wxString::Shrink()
181 {
182 wxString tmp(begin(), end());
183 swap(tmp);
184 return tmp.length() == length();
185 }
186
187 // deprecated compatibility code:
188 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
189 wxChar *wxString::GetWriteBuf(size_t nLen)
190 {
191 return DoGetWriteBuf(nLen);
192 }
193
194 void wxString::UngetWriteBuf()
195 {
196 DoUngetWriteBuf();
197 }
198
199 void wxString::UngetWriteBuf(size_t nLen)
200 {
201 DoUngetWriteBuf(nLen);
202 }
203 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
204
205
206 // ---------------------------------------------------------------------------
207 // data access
208 // ---------------------------------------------------------------------------
209
210 // all functions are inline in string.h
211
212 // ---------------------------------------------------------------------------
213 // assignment operators
214 // ---------------------------------------------------------------------------
215
216 #if !wxUSE_UNICODE
217
218 // same as 'signed char' variant
219 wxString& wxString::operator=(const unsigned char* psz)
220 {
221 *this = (const char *)psz;
222 return *this;
223 }
224
225 #if wxUSE_WCHAR_T
226 wxString& wxString::operator=(const wchar_t *pwz)
227 {
228 wxString str(pwz);
229 swap(str);
230 return *this;
231 }
232 #endif
233
234 #endif
235
236 /*
237 * concatenation functions come in 5 flavours:
238 * string + string
239 * char + string and string + char
240 * C str + string and string + C str
241 */
242
243 wxString operator+(const wxString& str1, const wxString& str2)
244 {
245 #if !wxUSE_STL_BASED_WXSTRING
246 wxASSERT( str1.IsValid() );
247 wxASSERT( str2.IsValid() );
248 #endif
249
250 wxString s = str1;
251 s += str2;
252
253 return s;
254 }
255
256 wxString operator+(const wxString& str, wxUniChar ch)
257 {
258 #if !wxUSE_STL_BASED_WXSTRING
259 wxASSERT( str.IsValid() );
260 #endif
261
262 wxString s = str;
263 s += ch;
264
265 return s;
266 }
267
268 wxString operator+(wxUniChar ch, const wxString& str)
269 {
270 #if !wxUSE_STL_BASED_WXSTRING
271 wxASSERT( str.IsValid() );
272 #endif
273
274 wxString s = ch;
275 s += str;
276
277 return s;
278 }
279
280 wxString operator+(const wxString& str, const char *psz)
281 {
282 #if !wxUSE_STL_BASED_WXSTRING
283 wxASSERT( str.IsValid() );
284 #endif
285
286 wxString s;
287 if ( !s.Alloc(strlen(psz) + str.length()) ) {
288 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
289 }
290 s += str;
291 s += psz;
292
293 return s;
294 }
295
296 wxString operator+(const wxString& str, const wchar_t *pwz)
297 {
298 #if !wxUSE_STL_BASED_WXSTRING
299 wxASSERT( str.IsValid() );
300 #endif
301
302 wxString s;
303 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
304 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
305 }
306 s += str;
307 s += pwz;
308
309 return s;
310 }
311
312 wxString operator+(const char *psz, const wxString& str)
313 {
314 #if !wxUSE_STL_BASED_WXSTRING
315 wxASSERT( str.IsValid() );
316 #endif
317
318 wxString s;
319 if ( !s.Alloc(strlen(psz) + str.length()) ) {
320 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
321 }
322 s = psz;
323 s += str;
324
325 return s;
326 }
327
328 wxString operator+(const wchar_t *pwz, const wxString& str)
329 {
330 #if !wxUSE_STL_BASED_WXSTRING
331 wxASSERT( str.IsValid() );
332 #endif
333
334 wxString s;
335 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
336 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
337 }
338 s = pwz;
339 s += str;
340
341 return s;
342 }
343
344 // ---------------------------------------------------------------------------
345 // string comparison
346 // ---------------------------------------------------------------------------
347
348 #ifdef HAVE_STD_STRING_COMPARE
349
350 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
351 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
352 // sort strings in characters code point order by sorting the byte sequence
353 // in byte values order (i.e. what strcmp() and memcmp() do).
354
355 int wxString::compare(const wxString& str) const
356 {
357 return m_impl.compare(str.m_impl);
358 }
359
360 int wxString::compare(size_t nStart, size_t nLen,
361 const wxString& str) const
362 {
363 size_t pos, len;
364 PosLenToImpl(nStart, nLen, &pos, &len);
365 return m_impl.compare(pos, len, str.m_impl);
366 }
367
368 int wxString::compare(size_t nStart, size_t nLen,
369 const wxString& str,
370 size_t nStart2, size_t nLen2) const
371 {
372 size_t pos, len;
373 PosLenToImpl(nStart, nLen, &pos, &len);
374
375 size_t pos2, len2;
376 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
377
378 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
379 }
380
381 int wxString::compare(const char* sz) const
382 {
383 return m_impl.compare(ImplStr(sz));
384 }
385
386 int wxString::compare(const wchar_t* sz) const
387 {
388 return m_impl.compare(ImplStr(sz));
389 }
390
391 int wxString::compare(size_t nStart, size_t nLen,
392 const char* sz, size_t nCount) const
393 {
394 size_t pos, len;
395 PosLenToImpl(nStart, nLen, &pos, &len);
396
397 SubstrBufFromMB str(ImplStr(sz, nCount));
398
399 return m_impl.compare(pos, len, str.data, str.len);
400 }
401
402 int wxString::compare(size_t nStart, size_t nLen,
403 const wchar_t* sz, size_t nCount) const
404 {
405 size_t pos, len;
406 PosLenToImpl(nStart, nLen, &pos, &len);
407
408 SubstrBufFromWC str(ImplStr(sz, nCount));
409
410 return m_impl.compare(pos, len, str.data, str.len);
411 }
412
413 #else // !HAVE_STD_STRING_COMPARE
414
415 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
416 const wxStringCharType* s2, size_t l2)
417 {
418 if( l1 == l2 )
419 return wxStringMemcmp(s1, s2, l1);
420 else if( l1 < l2 )
421 {
422 int ret = wxStringMemcmp(s1, s2, l1);
423 return ret == 0 ? -1 : ret;
424 }
425 else
426 {
427 int ret = wxStringMemcmp(s1, s2, l2);
428 return ret == 0 ? +1 : ret;
429 }
430 }
431
432 int wxString::compare(const wxString& str) const
433 {
434 return ::wxDoCmp(m_impl.data(), m_impl.length(),
435 str.m_impl.data(), str.m_impl.length());
436 }
437
438 int wxString::compare(size_t nStart, size_t nLen,
439 const wxString& str) const
440 {
441 wxASSERT(nStart <= length());
442 size_type strLen = length() - nStart;
443 nLen = strLen < nLen ? strLen : nLen;
444
445 size_t pos, len;
446 PosLenToImpl(nStart, nLen, &pos, &len);
447
448 return ::wxDoCmp(m_impl.data() + pos, len,
449 str.m_impl.data(), str.m_impl.length());
450 }
451
452 int wxString::compare(size_t nStart, size_t nLen,
453 const wxString& str,
454 size_t nStart2, size_t nLen2) const
455 {
456 wxASSERT(nStart <= length());
457 wxASSERT(nStart2 <= str.length());
458 size_type strLen = length() - nStart,
459 strLen2 = str.length() - nStart2;
460 nLen = strLen < nLen ? strLen : nLen;
461 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
462
463 size_t pos, len;
464 PosLenToImpl(nStart, nLen, &pos, &len);
465 size_t pos2, len2;
466 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
467
468 return ::wxDoCmp(m_impl.data() + pos, len,
469 str.m_impl.data() + pos2, len2);
470 }
471
472 int wxString::compare(const char* sz) const
473 {
474 SubstrBufFromMB str(ImplStr(sz, npos));
475 if ( str.len == npos )
476 str.len = wxStringStrlen(str.data);
477 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
478 }
479
480 int wxString::compare(const wchar_t* sz) const
481 {
482 SubstrBufFromWC str(ImplStr(sz, npos));
483 if ( str.len == npos )
484 str.len = wxStringStrlen(str.data);
485 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
486 }
487
488 int wxString::compare(size_t nStart, size_t nLen,
489 const char* sz, size_t nCount) const
490 {
491 wxASSERT(nStart <= length());
492 size_type strLen = length() - nStart;
493 nLen = strLen < nLen ? strLen : nLen;
494
495 size_t pos, len;
496 PosLenToImpl(nStart, nLen, &pos, &len);
497
498 SubstrBufFromMB str(ImplStr(sz, nCount));
499 if ( str.len == npos )
500 str.len = wxStringStrlen(str.data);
501
502 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
503 }
504
505 int wxString::compare(size_t nStart, size_t nLen,
506 const wchar_t* sz, size_t nCount) const
507 {
508 wxASSERT(nStart <= length());
509 size_type strLen = length() - nStart;
510 nLen = strLen < nLen ? strLen : nLen;
511
512 size_t pos, len;
513 PosLenToImpl(nStart, nLen, &pos, &len);
514
515 SubstrBufFromWC str(ImplStr(sz, nCount));
516 if ( str.len == npos )
517 str.len = wxStringStrlen(str.data);
518
519 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
520 }
521
522 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
523
524
525 // ---------------------------------------------------------------------------
526 // find_{first,last}_[not]_of functions
527 // ---------------------------------------------------------------------------
528
529 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
530
531 // NB: All these functions are implemented with the argument being wxChar*,
532 // i.e. widechar string in any Unicode build, even though native string
533 // representation is char* in the UTF-8 build. This is because we couldn't
534 // use memchr() to determine if a character is in a set encoded as UTF-8.
535
536 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
537 {
538 return find_first_of(sz, nStart, wxStrlen(sz));
539 }
540
541 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
542 {
543 return find_first_not_of(sz, nStart, wxStrlen(sz));
544 }
545
546 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
547 {
548 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
549
550 size_t idx = nStart;
551 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
552 {
553 if ( wxTmemchr(sz, *i, n) )
554 return idx;
555 }
556
557 return npos;
558 }
559
560 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
561 {
562 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
563
564 size_t idx = nStart;
565 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
566 {
567 if ( !wxTmemchr(sz, *i, n) )
568 return idx;
569 }
570
571 return npos;
572 }
573
574
575 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
576 {
577 return find_last_of(sz, nStart, wxStrlen(sz));
578 }
579
580 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
581 {
582 return find_last_not_of(sz, nStart, wxStrlen(sz));
583 }
584
585 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
586 {
587 size_t len = length();
588
589 if ( nStart == npos )
590 {
591 nStart = len - 1;
592 }
593 else
594 {
595 wxASSERT_MSG( nStart <= len, _T("invalid index") );
596 }
597
598 size_t idx = nStart;
599 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
600 i != rend(); --idx, ++i )
601 {
602 if ( wxTmemchr(sz, *i, n) )
603 return idx;
604 }
605
606 return npos;
607 }
608
609 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
610 {
611 size_t len = length();
612
613 if ( nStart == npos )
614 {
615 nStart = len - 1;
616 }
617 else
618 {
619 wxASSERT_MSG( nStart <= len, _T("invalid index") );
620 }
621
622 size_t idx = nStart;
623 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
624 i != rend(); --idx, ++i )
625 {
626 if ( !wxTmemchr(sz, *i, n) )
627 return idx;
628 }
629
630 return npos;
631 }
632
633 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
634 {
635 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
636
637 size_t idx = nStart;
638 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
639 {
640 if ( *i != ch )
641 return idx;
642 }
643
644 return npos;
645 }
646
647 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
648 {
649 size_t len = length();
650
651 if ( nStart == npos )
652 {
653 nStart = len - 1;
654 }
655 else
656 {
657 wxASSERT_MSG( nStart <= len, _T("invalid index") );
658 }
659
660 size_t idx = nStart;
661 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
662 i != rend(); --idx, ++i )
663 {
664 if ( *i != ch )
665 return idx;
666 }
667
668 return npos;
669 }
670
671 // the functions above were implemented for wchar_t* arguments in Unicode
672 // build and char* in ANSI build; below are implementations for the other
673 // version:
674 #if wxUSE_UNICODE
675 #define wxOtherCharType char
676 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
677 #else
678 #define wxOtherCharType wchar_t
679 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
680 #endif
681
682 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
683 { return find_first_of(STRCONV(sz), nStart); }
684
685 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
686 size_t n) const
687 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
688 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
689 { return find_last_of(STRCONV(sz), nStart); }
690 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
691 size_t n) const
692 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
693 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
694 { return find_first_not_of(STRCONV(sz), nStart); }
695 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
696 size_t n) const
697 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
698 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
699 { return find_last_not_of(STRCONV(sz), nStart); }
700 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
701 size_t n) const
702 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
703
704 #undef wxOtherCharType
705 #undef STRCONV
706
707 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
708
709 // ===========================================================================
710 // other common string functions
711 // ===========================================================================
712
713 int wxString::CmpNoCase(const wxString& s) const
714 {
715 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
716
717 size_t idx = 0;
718 const_iterator i1 = begin();
719 const_iterator end1 = end();
720 const_iterator i2 = s.begin();
721 const_iterator end2 = s.end();
722
723 for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
724 {
725 wxUniChar lower1 = (wxChar)wxTolower(*i1);
726 wxUniChar lower2 = (wxChar)wxTolower(*i2);
727 if ( lower1 != lower2 )
728 return lower1 < lower2 ? -1 : 1;
729 }
730
731 size_t len1 = length();
732 size_t len2 = s.length();
733
734 if ( len1 < len2 )
735 return -1;
736 else if ( len1 > len2 )
737 return 1;
738 return 0;
739 }
740
741
742 #if wxUSE_UNICODE
743
744 #ifdef __MWERKS__
745 #ifndef __SCHAR_MAX__
746 #define __SCHAR_MAX__ 127
747 #endif
748 #endif
749
750 wxString wxString::FromAscii(const char *ascii)
751 {
752 if (!ascii)
753 return wxEmptyString;
754
755 size_t len = strlen( ascii );
756 wxString res;
757
758 if ( len )
759 {
760 wxStringBuffer buf(res, len);
761
762 wchar_t *dest = buf;
763
764 for ( ;; )
765 {
766 if ( (*dest++ = (wchar_t)(unsigned char)*ascii++) == L'\0' )
767 break;
768 }
769 }
770
771 return res;
772 }
773
774 wxString wxString::FromAscii(const char ascii)
775 {
776 // What do we do with '\0' ?
777
778 wxString res;
779 res += (wchar_t)(unsigned char) ascii;
780
781 return res;
782 }
783
784 const wxCharBuffer wxString::ToAscii() const
785 {
786 // this will allocate enough space for the terminating NUL too
787 wxCharBuffer buffer(length());
788
789
790 char *dest = buffer.data();
791
792 const wchar_t *pwc = c_str();
793 for ( ;; )
794 {
795 *dest++ = (char)(*pwc > SCHAR_MAX ? wxT('_') : *pwc);
796
797 // the output string can't have embedded NULs anyhow, so we can safely
798 // stop at first of them even if we do have any
799 if ( !*pwc++ )
800 break;
801 }
802
803 return buffer;
804 }
805
806 #endif // Unicode
807
808 // extract string of length nCount starting at nFirst
809 wxString wxString::Mid(size_t nFirst, size_t nCount) const
810 {
811 size_t nLen = length();
812
813 // default value of nCount is npos and means "till the end"
814 if ( nCount == npos )
815 {
816 nCount = nLen - nFirst;
817 }
818
819 // out-of-bounds requests return sensible things
820 if ( nFirst + nCount > nLen )
821 {
822 nCount = nLen - nFirst;
823 }
824
825 if ( nFirst > nLen )
826 {
827 // AllocCopy() will return empty string
828 return wxEmptyString;
829 }
830
831 wxString dest(*this, nFirst, nCount);
832 if ( dest.length() != nCount )
833 {
834 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
835 }
836
837 return dest;
838 }
839
840 // check that the string starts with prefix and return the rest of the string
841 // in the provided pointer if it is not NULL, otherwise return false
842 bool wxString::StartsWith(const wxChar *prefix, wxString *rest) const
843 {
844 wxASSERT_MSG( prefix, _T("invalid parameter in wxString::StartsWith") );
845
846 // first check if the beginning of the string matches the prefix: note
847 // that we don't have to check that we don't run out of this string as
848 // when we reach the terminating NUL, either prefix string ends too (and
849 // then it's ok) or we break out of the loop because there is no match
850 const wxChar *p = c_str();
851 while ( *prefix )
852 {
853 if ( *prefix++ != *p++ )
854 {
855 // no match
856 return false;
857 }
858 }
859
860 if ( rest )
861 {
862 // put the rest of the string into provided pointer
863 *rest = p;
864 }
865
866 return true;
867 }
868
869
870 // check that the string ends with suffix and return the rest of it in the
871 // provided pointer if it is not NULL, otherwise return false
872 bool wxString::EndsWith(const wxChar *suffix, wxString *rest) const
873 {
874 wxASSERT_MSG( suffix, _T("invalid parameter in wxString::EndssWith") );
875
876 int start = length() - wxStrlen(suffix);
877 if ( start < 0 || wxStrcmp(wx_str() + start, suffix) != 0 )
878 return false;
879
880 if ( rest )
881 {
882 // put the rest of the string into provided pointer
883 rest->assign(*this, 0, start);
884 }
885
886 return true;
887 }
888
889
890 // extract nCount last (rightmost) characters
891 wxString wxString::Right(size_t nCount) const
892 {
893 if ( nCount > length() )
894 nCount = length();
895
896 wxString dest(*this, length() - nCount, nCount);
897 if ( dest.length() != nCount ) {
898 wxFAIL_MSG( _T("out of memory in wxString::Right") );
899 }
900 return dest;
901 }
902
903 // get all characters after the last occurence of ch
904 // (returns the whole string if ch not found)
905 wxString wxString::AfterLast(wxUniChar ch) const
906 {
907 wxString str;
908 int iPos = Find(ch, true);
909 if ( iPos == wxNOT_FOUND )
910 str = *this;
911 else
912 str = wx_str() + iPos + 1;
913
914 return str;
915 }
916
917 // extract nCount first (leftmost) characters
918 wxString wxString::Left(size_t nCount) const
919 {
920 if ( nCount > length() )
921 nCount = length();
922
923 wxString dest(*this, 0, nCount);
924 if ( dest.length() != nCount ) {
925 wxFAIL_MSG( _T("out of memory in wxString::Left") );
926 }
927 return dest;
928 }
929
930 // get all characters before the first occurence of ch
931 // (returns the whole string if ch not found)
932 wxString wxString::BeforeFirst(wxUniChar ch) const
933 {
934 int iPos = Find(ch);
935 if ( iPos == wxNOT_FOUND ) iPos = length();
936 return wxString(*this, 0, iPos);
937 }
938
939 /// get all characters before the last occurence of ch
940 /// (returns empty string if ch not found)
941 wxString wxString::BeforeLast(wxUniChar ch) const
942 {
943 wxString str;
944 int iPos = Find(ch, true);
945 if ( iPos != wxNOT_FOUND && iPos != 0 )
946 str = wxString(c_str(), iPos);
947
948 return str;
949 }
950
951 /// get all characters after the first occurence of ch
952 /// (returns empty string if ch not found)
953 wxString wxString::AfterFirst(wxUniChar ch) const
954 {
955 wxString str;
956 int iPos = Find(ch);
957 if ( iPos != wxNOT_FOUND )
958 str = wx_str() + iPos + 1;
959
960 return str;
961 }
962
963 // replace first (or all) occurences of some substring with another one
964 size_t wxString::Replace(const wxChar *szOld,
965 const wxChar *szNew, bool bReplaceAll)
966 {
967 // if we tried to replace an empty string we'd enter an infinite loop below
968 wxCHECK_MSG( szOld && *szOld && szNew, 0,
969 _T("wxString::Replace(): invalid parameter") );
970
971 size_t uiCount = 0; // count of replacements made
972
973 size_t uiOldLen = wxStrlen(szOld);
974 size_t uiNewLen = wxStrlen(szNew);
975
976 size_t dwPos = 0;
977
978 while ( this->c_str()[dwPos] != wxT('\0') )
979 {
980 //DO NOT USE STRSTR HERE
981 //this string can contain embedded null characters,
982 //so strstr will function incorrectly
983 dwPos = find(szOld, dwPos);
984 if ( dwPos == npos )
985 break; // exit the loop
986 else
987 {
988 //replace this occurance of the old string with the new one
989 replace(dwPos, uiOldLen, szNew, uiNewLen);
990
991 //move up pos past the string that was replaced
992 dwPos += uiNewLen;
993
994 //increase replace count
995 ++uiCount;
996
997 // stop now?
998 if ( !bReplaceAll )
999 break; // exit the loop
1000 }
1001 }
1002
1003 return uiCount;
1004 }
1005
1006 bool wxString::IsAscii() const
1007 {
1008 const wxChar *s = (const wxChar*) *this;
1009 while(*s){
1010 if(!isascii(*s)) return(false);
1011 s++;
1012 }
1013 return(true);
1014 }
1015
1016 bool wxString::IsWord() const
1017 {
1018 const wxChar *s = (const wxChar*) *this;
1019 while(*s){
1020 if(!wxIsalpha(*s)) return(false);
1021 s++;
1022 }
1023 return(true);
1024 }
1025
1026 bool wxString::IsNumber() const
1027 {
1028 const wxChar *s = (const wxChar*) *this;
1029 if (wxStrlen(s))
1030 if ((s[0] == wxT('-')) || (s[0] == wxT('+'))) s++;
1031 while(*s){
1032 if(!wxIsdigit(*s)) return(false);
1033 s++;
1034 }
1035 return(true);
1036 }
1037
1038 wxString wxString::Strip(stripType w) const
1039 {
1040 wxString s = *this;
1041 if ( w & leading ) s.Trim(false);
1042 if ( w & trailing ) s.Trim(true);
1043 return s;
1044 }
1045
1046 // ---------------------------------------------------------------------------
1047 // case conversion
1048 // ---------------------------------------------------------------------------
1049
1050 wxString& wxString::MakeUpper()
1051 {
1052 for ( iterator it = begin(), en = end(); it != en; ++it )
1053 *it = (wxChar)wxToupper(*it);
1054
1055 return *this;
1056 }
1057
1058 wxString& wxString::MakeLower()
1059 {
1060 for ( iterator it = begin(), en = end(); it != en; ++it )
1061 *it = (wxChar)wxTolower(*it);
1062
1063 return *this;
1064 }
1065
1066 // ---------------------------------------------------------------------------
1067 // trimming and padding
1068 // ---------------------------------------------------------------------------
1069
1070 // some compilers (VC++ 6.0 not to name them) return true for a call to
1071 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1072 // live with this by checking that the character is a 7 bit one - even if this
1073 // may fail to detect some spaces (I don't know if Unicode doesn't have
1074 // space-like symbols somewhere except in the first 128 chars), it is arguably
1075 // still better than trimming away accented letters
1076 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1077
1078 // trims spaces (in the sense of isspace) from left or right side
1079 wxString& wxString::Trim(bool bFromRight)
1080 {
1081 // first check if we're going to modify the string at all
1082 if ( !empty() &&
1083 (
1084 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1085 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1086 )
1087 )
1088 {
1089 if ( bFromRight )
1090 {
1091 // find last non-space character
1092 reverse_iterator psz = rbegin();
1093 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1094 psz++;
1095
1096 // truncate at trailing space start
1097 erase(psz.base(), end());
1098 }
1099 else
1100 {
1101 // find first non-space character
1102 iterator psz = begin();
1103 while ( (psz != end()) && wxSafeIsspace(*psz) )
1104 psz++;
1105
1106 // fix up data and length
1107 erase(begin(), psz);
1108 }
1109 }
1110
1111 return *this;
1112 }
1113
1114 // adds nCount characters chPad to the string from either side
1115 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1116 {
1117 wxString s(chPad, nCount);
1118
1119 if ( bFromRight )
1120 *this += s;
1121 else
1122 {
1123 s += *this;
1124 swap(s);
1125 }
1126
1127 return *this;
1128 }
1129
1130 // truncate the string
1131 wxString& wxString::Truncate(size_t uiLen)
1132 {
1133 if ( uiLen < length() )
1134 {
1135 erase(begin() + uiLen, end());
1136 }
1137 //else: nothing to do, string is already short enough
1138
1139 return *this;
1140 }
1141
1142 // ---------------------------------------------------------------------------
1143 // finding (return wxNOT_FOUND if not found and index otherwise)
1144 // ---------------------------------------------------------------------------
1145
1146 // find a character
1147 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1148 {
1149 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1150
1151 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1152 }
1153
1154 // find a sub-string (like strstr)
1155 int wxString::Find(const wxChar *pszSub) const
1156 {
1157 size_type idx = find(pszSub);
1158
1159 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1160 }
1161
1162 // ----------------------------------------------------------------------------
1163 // conversion to numbers
1164 // ----------------------------------------------------------------------------
1165
1166 // the implementation of all the functions below is exactly the same so factor
1167 // it out
1168
1169 template <typename T, typename F>
1170 bool wxStringToIntType(const wxChar *start,
1171 T *val,
1172 int base,
1173 F func)
1174 {
1175 wxCHECK_MSG( val, false, _T("NULL output pointer") );
1176 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1177
1178 #ifndef __WXWINCE__
1179 errno = 0;
1180 #endif
1181
1182 wxChar *end;
1183 *val = (*func)(start, &end, base);
1184
1185 // return true only if scan was stopped by the terminating NUL and if the
1186 // string was not empty to start with and no under/overflow occurred
1187 return !*end && (end != start)
1188 #ifndef __WXWINCE__
1189 && (errno != ERANGE)
1190 #endif
1191 ;
1192 }
1193
1194 bool wxString::ToLong(long *val, int base) const
1195 {
1196 return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtol);
1197 }
1198
1199 bool wxString::ToULong(unsigned long *val, int base) const
1200 {
1201 return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoul);
1202 }
1203
1204 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1205 {
1206 #ifdef wxHAS_STRTOLL
1207 return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoll);
1208 #else
1209 // TODO: implement this ourselves
1210 wxUnusedVar(val);
1211 wxUnusedVar(base);
1212 return false;
1213 #endif // wxHAS_STRTOLL
1214 }
1215
1216 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1217 {
1218 #ifdef wxHAS_STRTOLL
1219 return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoull);
1220 #else
1221 // TODO: implement this ourselves
1222 wxUnusedVar(val);
1223 wxUnusedVar(base);
1224 return false;
1225 #endif
1226 }
1227
1228 bool wxString::ToDouble(double *val) const
1229 {
1230 wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1231
1232 #ifndef __WXWINCE__
1233 errno = 0;
1234 #endif
1235
1236 const wxChar *start = c_str();
1237 wxChar *end;
1238 *val = wxStrtod(start, &end);
1239
1240 // return true only if scan was stopped by the terminating NUL and if the
1241 // string was not empty to start with and no under/overflow occurred
1242 return !*end && (end != start)
1243 #ifndef __WXWINCE__
1244 && (errno != ERANGE)
1245 #endif
1246 ;
1247 }
1248
1249 // ---------------------------------------------------------------------------
1250 // formatted output
1251 // ---------------------------------------------------------------------------
1252
1253 /* static */
1254 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1255 wxString wxStringPrintfMixinBase::DoFormat(const wxChar *format, ...)
1256 #else
1257 wxString wxString::DoFormat(const wxChar *format, ...)
1258 #endif
1259 {
1260 va_list argptr;
1261 va_start(argptr, format);
1262
1263 wxString s;
1264 s.PrintfV(format, argptr);
1265
1266 va_end(argptr);
1267
1268 return s;
1269 }
1270
1271 /* static */
1272 wxString wxString::FormatV(const wxString& format, va_list argptr)
1273 {
1274 wxString s;
1275 s.PrintfV(format, argptr);
1276 return s;
1277 }
1278
1279 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1280 int wxStringPrintfMixinBase::DoPrintf(const wxChar *format, ...)
1281 #else
1282 int wxString::DoPrintf(const wxChar *format, ...)
1283 #endif
1284 {
1285 va_list argptr;
1286 va_start(argptr, format);
1287
1288 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1289 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1290 // because it's the only cast that works safely for downcasting when
1291 // multiple inheritance is used:
1292 wxString *str = static_cast<wxString*>(this);
1293 #else
1294 wxString *str = this;
1295 #endif
1296
1297 int iLen = str->PrintfV(format, argptr);
1298
1299 va_end(argptr);
1300
1301 return iLen;
1302 }
1303
1304 int wxString::PrintfV(const wxString& format, va_list argptr)
1305 {
1306 int size = 1024;
1307
1308 for ( ;; )
1309 {
1310 wxStringBuffer tmp(*this, size + 1);
1311 wxChar *buf = tmp;
1312
1313 if ( !buf )
1314 {
1315 // out of memory
1316 return -1;
1317 }
1318
1319 // wxVsnprintf() may modify the original arg pointer, so pass it
1320 // only a copy
1321 va_list argptrcopy;
1322 wxVaCopy(argptrcopy, argptr);
1323 int len = wxVsnprintf(buf, size, format, argptrcopy);
1324 va_end(argptrcopy);
1325
1326 // some implementations of vsnprintf() don't NUL terminate
1327 // the string if there is not enough space for it so
1328 // always do it manually
1329 buf[size] = _T('\0');
1330
1331 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1332 // total number of characters which would have been written if the
1333 // buffer were large enough (newer standards such as Unix98)
1334 if ( len < 0 )
1335 {
1336 #if wxUSE_WXVSNPRINTF
1337 // we know that our own implementation of wxVsnprintf() returns -1
1338 // only for a format error - thus there's something wrong with
1339 // the user's format string
1340 return -1;
1341 #else // assume that system version only returns error if not enough space
1342 // still not enough, as we don't know how much we need, double the
1343 // current size of the buffer
1344 size *= 2;
1345 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1346 }
1347 else if ( len >= size )
1348 {
1349 #if wxUSE_WXVSNPRINTF
1350 // we know that our own implementation of wxVsnprintf() returns
1351 // size+1 when there's not enough space but that's not the size
1352 // of the required buffer!
1353 size *= 2; // so we just double the current size of the buffer
1354 #else
1355 // some vsnprintf() implementations NUL-terminate the buffer and
1356 // some don't in len == size case, to be safe always add 1
1357 size = len + 1;
1358 #endif
1359 }
1360 else // ok, there was enough space
1361 {
1362 break;
1363 }
1364 }
1365
1366 // we could have overshot
1367 Shrink();
1368
1369 return length();
1370 }
1371
1372 // ----------------------------------------------------------------------------
1373 // misc other operations
1374 // ----------------------------------------------------------------------------
1375
1376 // returns true if the string matches the pattern which may contain '*' and
1377 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1378 // of them)
1379 bool wxString::Matches(const wxChar *pszMask) const
1380 {
1381 // I disable this code as it doesn't seem to be faster (in fact, it seems
1382 // to be much slower) than the old, hand-written code below and using it
1383 // here requires always linking with libregex even if the user code doesn't
1384 // use it
1385 #if 0 // wxUSE_REGEX
1386 // first translate the shell-like mask into a regex
1387 wxString pattern;
1388 pattern.reserve(wxStrlen(pszMask));
1389
1390 pattern += _T('^');
1391 while ( *pszMask )
1392 {
1393 switch ( *pszMask )
1394 {
1395 case _T('?'):
1396 pattern += _T('.');
1397 break;
1398
1399 case _T('*'):
1400 pattern += _T(".*");
1401 break;
1402
1403 case _T('^'):
1404 case _T('.'):
1405 case _T('$'):
1406 case _T('('):
1407 case _T(')'):
1408 case _T('|'):
1409 case _T('+'):
1410 case _T('\\'):
1411 // these characters are special in a RE, quote them
1412 // (however note that we don't quote '[' and ']' to allow
1413 // using them for Unix shell like matching)
1414 pattern += _T('\\');
1415 // fall through
1416
1417 default:
1418 pattern += *pszMask;
1419 }
1420
1421 pszMask++;
1422 }
1423 pattern += _T('$');
1424
1425 // and now use it
1426 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1427 #else // !wxUSE_REGEX
1428 // TODO: this is, of course, awfully inefficient...
1429
1430 // the char currently being checked
1431 const wxChar *pszTxt = c_str();
1432
1433 // the last location where '*' matched
1434 const wxChar *pszLastStarInText = NULL;
1435 const wxChar *pszLastStarInMask = NULL;
1436
1437 match:
1438 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1439 switch ( *pszMask ) {
1440 case wxT('?'):
1441 if ( *pszTxt == wxT('\0') )
1442 return false;
1443
1444 // pszTxt and pszMask will be incremented in the loop statement
1445
1446 break;
1447
1448 case wxT('*'):
1449 {
1450 // remember where we started to be able to backtrack later
1451 pszLastStarInText = pszTxt;
1452 pszLastStarInMask = pszMask;
1453
1454 // ignore special chars immediately following this one
1455 // (should this be an error?)
1456 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1457 pszMask++;
1458
1459 // if there is nothing more, match
1460 if ( *pszMask == wxT('\0') )
1461 return true;
1462
1463 // are there any other metacharacters in the mask?
1464 size_t uiLenMask;
1465 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1466
1467 if ( pEndMask != NULL ) {
1468 // we have to match the string between two metachars
1469 uiLenMask = pEndMask - pszMask;
1470 }
1471 else {
1472 // we have to match the remainder of the string
1473 uiLenMask = wxStrlen(pszMask);
1474 }
1475
1476 wxString strToMatch(pszMask, uiLenMask);
1477 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1478 if ( pMatch == NULL )
1479 return false;
1480
1481 // -1 to compensate "++" in the loop
1482 pszTxt = pMatch + uiLenMask - 1;
1483 pszMask += uiLenMask - 1;
1484 }
1485 break;
1486
1487 default:
1488 if ( *pszMask != *pszTxt )
1489 return false;
1490 break;
1491 }
1492 }
1493
1494 // match only if nothing left
1495 if ( *pszTxt == wxT('\0') )
1496 return true;
1497
1498 // if we failed to match, backtrack if we can
1499 if ( pszLastStarInText ) {
1500 pszTxt = pszLastStarInText + 1;
1501 pszMask = pszLastStarInMask;
1502
1503 pszLastStarInText = NULL;
1504
1505 // don't bother resetting pszLastStarInMask, it's unnecessary
1506
1507 goto match;
1508 }
1509
1510 return false;
1511 #endif // wxUSE_REGEX/!wxUSE_REGEX
1512 }
1513
1514 // Count the number of chars
1515 int wxString::Freq(wxUniChar ch) const
1516 {
1517 int count = 0;
1518 for ( const_iterator i = begin(); i != end(); ++i )
1519 {
1520 if ( *i == ch )
1521 count ++;
1522 }
1523 return count;
1524 }
1525
1526 // convert to upper case, return the copy of the string
1527 wxString wxString::Upper() const
1528 { wxString s(*this); return s.MakeUpper(); }
1529
1530 // convert to lower case, return the copy of the string
1531 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
1532