split string.{h,cpp} into {string,stringimpl,arrstr}.{h,cpp} to make the files more...
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 /*
14 * About ref counting:
15 * 1) all empty strings use g_strEmpty, nRefs = -1 (set in Init())
16 * 2) AllocBuffer() sets nRefs to 1, Lock() increments it by one
17 * 3) Unlock() decrements nRefs and frees memory if it goes to 0
18 */
19
20 // ===========================================================================
21 // headers, declarations, constants
22 // ===========================================================================
23
24 // For compilers that support precompilation, includes "wx.h".
25 #include "wx/wxprec.h"
26
27 #ifdef __BORLANDC__
28 #pragma hdrstop
29 #endif
30
31 #ifndef WX_PRECOMP
32 #include "wx/string.h"
33 #endif
34
35 #include <ctype.h>
36
37 #ifndef __WXWINCE__
38 #include <errno.h>
39 #endif
40
41 #include <string.h>
42 #include <stdlib.h>
43
44 #ifdef __SALFORDC__
45 #include <clib.h>
46 #endif
47
48
49 // string handling functions used by wxString:
50 #if wxUSE_UNICODE_UTF8
51 #define wxStringMemcpy memcpy
52 #define wxStringMemcmp memcmp
53 #define wxStringMemchr memchr
54 #define wxStringStrlen strlen
55 #else
56 #define wxStringMemcpy wxTmemcpy
57 #define wxStringMemcmp wxTmemcmp
58 #define wxStringMemchr wxTmemchr
59 #define wxStringStrlen wxStrlen
60 #endif
61
62
63 // ---------------------------------------------------------------------------
64 // static class variables definition
65 // ---------------------------------------------------------------------------
66
67 //According to STL _must_ be a -1 size_t
68 const size_t wxString::npos = (size_t) -1;
69
70 // ----------------------------------------------------------------------------
71 // global functions
72 // ----------------------------------------------------------------------------
73
74 #if wxUSE_STD_IOSTREAM
75
76 #include <iostream>
77
78 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
79 {
80 return os << str.c_str();
81 }
82
83 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
84 {
85 #if wxUSE_UNICODE && !defined(__BORLANDC__)
86 return os << str.AsWChar();
87 #else
88 return os << str.AsChar();
89 #endif
90 }
91
92 #endif // wxUSE_STD_IOSTREAM
93
94 // ===========================================================================
95 // wxString class core
96 // ===========================================================================
97
98 // ---------------------------------------------------------------------------
99 // construction and conversion
100 // ---------------------------------------------------------------------------
101
102 #if wxUSE_UNICODE
103 /* static */
104 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
105 const wxMBConv& conv)
106 {
107 // anything to do?
108 if ( !psz || nLength == 0 )
109 return SubstrBufFromMB();
110
111 if ( nLength == npos )
112 nLength = wxNO_LEN;
113
114 size_t wcLen;
115 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
116 if ( !wcLen )
117 return SubstrBufFromMB();
118 else
119 return SubstrBufFromMB(wcBuf, wcLen);
120 }
121 #else
122 /* static */
123 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
124 const wxMBConv& conv)
125 {
126 // anything to do?
127 if ( !pwz || nLength == 0 )
128 return SubstrBufFromWC();
129
130 if ( nLength == npos )
131 nLength = wxNO_LEN;
132
133 size_t mbLen;
134 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
135 if ( !mbLen )
136 return SubstrBufFromWC();
137 else
138 return SubstrBufFromWC(mbBuf, mbLen);
139 }
140 #endif
141
142
143 #if wxUSE_UNICODE
144
145 // from multibyte string
146 wxString::wxString(const char *psz, const wxMBConv& conv, size_t nLength)
147 {
148 // FIXME-UTF8: this will need changes
149
150 // anything to do?
151 if ( psz && nLength != 0 )
152 {
153 if ( nLength == npos )
154 {
155 nLength = wxNO_LEN;
156 }
157
158 size_t nLenWide;
159 wxWCharBuffer wbuf = conv.cMB2WC(psz, nLength, &nLenWide);
160
161 if ( nLenWide )
162 assign(wbuf, nLenWide);
163 }
164 }
165
166 wxString::wxString(const char *psz, size_t nLength)
167 {
168 assign(psz, nLength);
169 }
170
171 //Convert wxString in Unicode mode to a multi-byte string
172 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
173 {
174 return conv.cWC2MB(c_str(), length() + 1 /* size, not length */, NULL);
175 }
176
177 #else // ANSI
178
179 #if wxUSE_WCHAR_T
180
181 // from wide string
182 wxString::wxString(const wchar_t *pwz, const wxMBConv& conv, size_t nLength)
183 {
184 // FIXME-UTF8: this will need changes
185
186 // anything to do?
187 if ( pwz && nLength != 0 )
188 {
189 if ( nLength == npos )
190 {
191 nLength = wxNO_LEN;
192 }
193
194 size_t nLenMB;
195 wxCharBuffer buf = conv.cWC2MB(pwz, nLength, &nLenMB);
196
197 if ( nLenMB )
198 assign(buf, nLenMB);
199 }
200
201 }
202
203 wxString::wxString(const wchar_t *pwz, size_t nLength)
204 {
205 assign(pwz, nLength);
206 }
207
208 //Converts this string to a wide character string if unicode
209 //mode is not enabled and wxUSE_WCHAR_T is enabled
210 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
211 {
212 return conv.cMB2WC(c_str(), length() + 1 /* size, not length */, NULL);
213 }
214
215 #endif // wxUSE_WCHAR_T
216
217 #endif // Unicode/ANSI
218
219 // shrink to minimal size (releasing extra memory)
220 bool wxString::Shrink()
221 {
222 wxString tmp(begin(), end());
223 swap(tmp);
224 return tmp.length() == length();
225 }
226
227 // deprecated compatibility code:
228 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
229 wxChar *wxString::GetWriteBuf(size_t nLen)
230 {
231 return DoGetWriteBuf(nLen);
232 }
233
234 void wxString::UngetWriteBuf()
235 {
236 DoUngetWriteBuf();
237 }
238
239 void wxString::UngetWriteBuf(size_t nLen)
240 {
241 DoUngetWriteBuf(nLen);
242 }
243 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
244
245
246 // ---------------------------------------------------------------------------
247 // data access
248 // ---------------------------------------------------------------------------
249
250 // all functions are inline in string.h
251
252 // ---------------------------------------------------------------------------
253 // assignment operators
254 // ---------------------------------------------------------------------------
255
256 #if !wxUSE_UNICODE
257
258 // same as 'signed char' variant
259 wxString& wxString::operator=(const unsigned char* psz)
260 {
261 *this = (const char *)psz;
262 return *this;
263 }
264
265 #if wxUSE_WCHAR_T
266 wxString& wxString::operator=(const wchar_t *pwz)
267 {
268 wxString str(pwz);
269 swap(str);
270 return *this;
271 }
272 #endif
273
274 #endif
275
276 /*
277 * concatenation functions come in 5 flavours:
278 * string + string
279 * char + string and string + char
280 * C str + string and string + C str
281 */
282
283 wxString operator+(const wxString& str1, const wxString& str2)
284 {
285 #if !wxUSE_STL_BASED_WXSTRING
286 wxASSERT( str1.IsValid() );
287 wxASSERT( str2.IsValid() );
288 #endif
289
290 wxString s = str1;
291 s += str2;
292
293 return s;
294 }
295
296 wxString operator+(const wxString& str, wxUniChar ch)
297 {
298 #if !wxUSE_STL_BASED_WXSTRING
299 wxASSERT( str.IsValid() );
300 #endif
301
302 wxString s = str;
303 s += ch;
304
305 return s;
306 }
307
308 wxString operator+(wxUniChar ch, const wxString& str)
309 {
310 #if !wxUSE_STL_BASED_WXSTRING
311 wxASSERT( str.IsValid() );
312 #endif
313
314 wxString s = ch;
315 s += str;
316
317 return s;
318 }
319
320 wxString operator+(const wxString& str, const char *psz)
321 {
322 #if !wxUSE_STL_BASED_WXSTRING
323 wxASSERT( str.IsValid() );
324 #endif
325
326 wxString s;
327 if ( !s.Alloc(strlen(psz) + str.length()) ) {
328 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
329 }
330 s += str;
331 s += psz;
332
333 return s;
334 }
335
336 wxString operator+(const wxString& str, const wchar_t *pwz)
337 {
338 #if !wxUSE_STL_BASED_WXSTRING
339 wxASSERT( str.IsValid() );
340 #endif
341
342 wxString s;
343 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
344 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
345 }
346 s += str;
347 s += pwz;
348
349 return s;
350 }
351
352 wxString operator+(const char *psz, const wxString& str)
353 {
354 #if !wxUSE_STL_BASED_WXSTRING
355 wxASSERT( str.IsValid() );
356 #endif
357
358 wxString s;
359 if ( !s.Alloc(strlen(psz) + str.length()) ) {
360 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
361 }
362 s = psz;
363 s += str;
364
365 return s;
366 }
367
368 wxString operator+(const wchar_t *pwz, const wxString& str)
369 {
370 #if !wxUSE_STL_BASED_WXSTRING
371 wxASSERT( str.IsValid() );
372 #endif
373
374 wxString s;
375 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
376 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
377 }
378 s = pwz;
379 s += str;
380
381 return s;
382 }
383
384 // ---------------------------------------------------------------------------
385 // string comparison
386 // ---------------------------------------------------------------------------
387
388 #ifdef HAVE_STD_STRING_COMPARE
389
390 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
391 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
392 // sort strings in characters code point order by sorting the byte sequence
393 // in byte values order (i.e. what strcmp() and memcmp() do).
394
395 int wxString::compare(const wxString& str) const
396 {
397 return m_impl.compare(str.m_impl);
398 }
399
400 int wxString::compare(size_t nStart, size_t nLen,
401 const wxString& str) const
402 {
403 size_t pos, len;
404 PosLenToImpl(nStart, nLen, &pos, &len);
405 return m_impl.compare(pos, len, str.m_impl);
406 }
407
408 int wxString::compare(size_t nStart, size_t nLen,
409 const wxString& str,
410 size_t nStart2, size_t nLen2) const
411 {
412 size_t pos, len;
413 PosLenToImpl(nStart, nLen, &pos, &len);
414
415 size_t pos2, len2;
416 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
417
418 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
419 }
420
421 int wxString::compare(const char* sz) const
422 {
423 return m_impl.compare(ImplStr(sz));
424 }
425
426 int wxString::compare(const wchar_t* sz) const
427 {
428 return m_impl.compare(ImplStr(sz));
429 }
430
431 int wxString::compare(size_t nStart, size_t nLen,
432 const char* sz, size_t nCount) const
433 {
434 size_t pos, len;
435 PosLenToImpl(nStart, nLen, &pos, &len);
436
437 SubstrBufFromMB str(ImplStr(sz, nCount));
438
439 return m_impl.compare(pos, len, str.data, str.len);
440 }
441
442 int wxString::compare(size_t nStart, size_t nLen,
443 const wchar_t* sz, size_t nCount) const
444 {
445 size_t pos, len;
446 PosLenToImpl(nStart, nLen, &pos, &len);
447
448 SubstrBufFromWC str(ImplStr(sz, nCount));
449
450 return m_impl.compare(pos, len, str.data, str.len);
451 }
452
453 #else // !HAVE_STD_STRING_COMPARE
454
455 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
456 const wxStringCharType* s2, size_t l2)
457 {
458 if( l1 == l2 )
459 return wxStringMemcmp(s1, s2, l1);
460 else if( l1 < l2 )
461 {
462 int ret = wxStringMemcmp(s1, s2, l1);
463 return ret == 0 ? -1 : ret;
464 }
465 else
466 {
467 int ret = wxStringMemcmp(s1, s2, l2);
468 return ret == 0 ? +1 : ret;
469 }
470 }
471
472 int wxString::compare(const wxString& str) const
473 {
474 return ::wxDoCmp(m_impl.data(), m_impl.length(),
475 str.m_impl.data(), str.m_impl.length());
476 }
477
478 int wxString::compare(size_t nStart, size_t nLen,
479 const wxString& str) const
480 {
481 wxASSERT(nStart <= length());
482 size_type strLen = length() - nStart;
483 nLen = strLen < nLen ? strLen : nLen;
484
485 size_t pos, len;
486 PosLenToImpl(nStart, nLen, &pos, &len);
487
488 return ::wxDoCmp(m_impl.data() + pos, len,
489 str.m_impl.data(), str.m_impl.length());
490 }
491
492 int wxString::compare(size_t nStart, size_t nLen,
493 const wxString& str,
494 size_t nStart2, size_t nLen2) const
495 {
496 wxASSERT(nStart <= length());
497 wxASSERT(nStart2 <= str.length());
498 size_type strLen = length() - nStart,
499 strLen2 = str.length() - nStart2;
500 nLen = strLen < nLen ? strLen : nLen;
501 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
502
503 size_t pos, len;
504 PosLenToImpl(nStart, nLen, &pos, &len);
505 size_t pos2, len2;
506 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
507
508 return ::wxDoCmp(m_impl.data() + pos, len,
509 str.m_impl.data() + pos2, len2);
510 }
511
512 int wxString::compare(const char* sz) const
513 {
514 SubstrBufFromMB str(ImplStr(sz, npos));
515 if ( str.len == npos )
516 str.len = wxStringStrlen(str.data);
517 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
518 }
519
520 int wxString::compare(const wchar_t* sz) const
521 {
522 SubstrBufFromWC str(ImplStr(sz, npos));
523 if ( str.len == npos )
524 str.len = wxStringStrlen(str.data);
525 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
526 }
527
528 int wxString::compare(size_t nStart, size_t nLen,
529 const char* sz, size_t nCount) const
530 {
531 wxASSERT(nStart <= length());
532 size_type strLen = length() - nStart;
533 nLen = strLen < nLen ? strLen : nLen;
534
535 size_t pos, len;
536 PosLenToImpl(nStart, nLen, &pos, &len);
537
538 SubstrBufFromMB str(ImplStr(sz, nCount));
539 if ( str.len == npos )
540 str.len = wxStringStrlen(str.data);
541
542 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
543 }
544
545 int wxString::compare(size_t nStart, size_t nLen,
546 const wchar_t* sz, size_t nCount) const
547 {
548 wxASSERT(nStart <= length());
549 size_type strLen = length() - nStart;
550 nLen = strLen < nLen ? strLen : nLen;
551
552 size_t pos, len;
553 PosLenToImpl(nStart, nLen, &pos, &len);
554
555 SubstrBufFromWC str(ImplStr(sz, nCount));
556 if ( str.len == npos )
557 str.len = wxStringStrlen(str.data);
558
559 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
560 }
561
562 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
563
564
565 // ---------------------------------------------------------------------------
566 // find_{first,last}_[not]_of functions
567 // ---------------------------------------------------------------------------
568
569 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
570
571 // NB: All these functions are implemented with the argument being wxChar*,
572 // i.e. widechar string in any Unicode build, even though native string
573 // representation is char* in the UTF-8 build. This is because we couldn't
574 // use memchr() to determine if a character is in a set encoded as UTF-8.
575
576 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
577 {
578 return find_first_of(sz, nStart, wxStrlen(sz));
579 }
580
581 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
582 {
583 return find_first_not_of(sz, nStart, wxStrlen(sz));
584 }
585
586 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
587 {
588 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
589
590 size_t idx = nStart;
591 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
592 {
593 if ( wxTmemchr(sz, *i, n) )
594 return idx;
595 }
596
597 return npos;
598 }
599
600 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
601 {
602 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
603
604 size_t idx = nStart;
605 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
606 {
607 if ( !wxTmemchr(sz, *i, n) )
608 return idx;
609 }
610
611 return npos;
612 }
613
614
615 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
616 {
617 return find_last_of(sz, nStart, wxStrlen(sz));
618 }
619
620 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
621 {
622 return find_last_not_of(sz, nStart, wxStrlen(sz));
623 }
624
625 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
626 {
627 size_t len = length();
628
629 if ( nStart == npos )
630 {
631 nStart = len - 1;
632 }
633 else
634 {
635 wxASSERT_MSG( nStart <= len, _T("invalid index") );
636 }
637
638 size_t idx = nStart;
639 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
640 i != rend(); --idx, ++i )
641 {
642 if ( wxTmemchr(sz, *i, n) )
643 return idx;
644 }
645
646 return npos;
647 }
648
649 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
650 {
651 size_t len = length();
652
653 if ( nStart == npos )
654 {
655 nStart = len - 1;
656 }
657 else
658 {
659 wxASSERT_MSG( nStart <= len, _T("invalid index") );
660 }
661
662 size_t idx = nStart;
663 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
664 i != rend(); --idx, ++i )
665 {
666 if ( !wxTmemchr(sz, *i, n) )
667 return idx;
668 }
669
670 return npos;
671 }
672
673 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
674 {
675 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
676
677 size_t idx = nStart;
678 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
679 {
680 if ( *i != ch )
681 return idx;
682 }
683
684 return npos;
685 }
686
687 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
688 {
689 size_t len = length();
690
691 if ( nStart == npos )
692 {
693 nStart = len - 1;
694 }
695 else
696 {
697 wxASSERT_MSG( nStart <= len, _T("invalid index") );
698 }
699
700 size_t idx = nStart;
701 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
702 i != rend(); --idx, ++i )
703 {
704 if ( *i != ch )
705 return idx;
706 }
707
708 return npos;
709 }
710
711 // the functions above were implemented for wchar_t* arguments in Unicode
712 // build and char* in ANSI build; below are implementations for the other
713 // version:
714 #if wxUSE_UNICODE
715 #define wxOtherCharType char
716 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
717 #else
718 #define wxOtherCharType wchar_t
719 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
720 #endif
721
722 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
723 { return find_first_of(STRCONV(sz), nStart); }
724
725 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
726 size_t n) const
727 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
728 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
729 { return find_last_of(STRCONV(sz), nStart); }
730 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
731 size_t n) const
732 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
733 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
734 { return find_first_not_of(STRCONV(sz), nStart); }
735 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
736 size_t n) const
737 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
738 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
739 { return find_last_not_of(STRCONV(sz), nStart); }
740 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
741 size_t n) const
742 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
743
744 #undef wxOtherCharType
745 #undef STRCONV
746
747 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
748
749 // ===========================================================================
750 // other common string functions
751 // ===========================================================================
752
753 int wxString::CmpNoCase(const wxString& s) const
754 {
755 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
756
757 size_t idx = 0;
758 const_iterator i1 = begin();
759 const_iterator end1 = end();
760 const_iterator i2 = s.begin();
761 const_iterator end2 = s.end();
762
763 for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
764 {
765 wxUniChar lower1 = (wxChar)wxTolower(*i1);
766 wxUniChar lower2 = (wxChar)wxTolower(*i2);
767 if ( lower1 != lower2 )
768 return lower1 < lower2 ? -1 : 1;
769 }
770
771 size_t len1 = length();
772 size_t len2 = s.length();
773
774 if ( len1 < len2 )
775 return -1;
776 else if ( len1 > len2 )
777 return 1;
778 return 0;
779 }
780
781
782 #if wxUSE_UNICODE
783
784 #ifdef __MWERKS__
785 #ifndef __SCHAR_MAX__
786 #define __SCHAR_MAX__ 127
787 #endif
788 #endif
789
790 wxString wxString::FromAscii(const char *ascii)
791 {
792 if (!ascii)
793 return wxEmptyString;
794
795 size_t len = strlen( ascii );
796 wxString res;
797
798 if ( len )
799 {
800 wxStringBuffer buf(res, len);
801
802 wchar_t *dest = buf;
803
804 for ( ;; )
805 {
806 if ( (*dest++ = (wchar_t)(unsigned char)*ascii++) == L'\0' )
807 break;
808 }
809 }
810
811 return res;
812 }
813
814 wxString wxString::FromAscii(const char ascii)
815 {
816 // What do we do with '\0' ?
817
818 wxString res;
819 res += (wchar_t)(unsigned char) ascii;
820
821 return res;
822 }
823
824 const wxCharBuffer wxString::ToAscii() const
825 {
826 // this will allocate enough space for the terminating NUL too
827 wxCharBuffer buffer(length());
828
829
830 char *dest = buffer.data();
831
832 const wchar_t *pwc = c_str();
833 for ( ;; )
834 {
835 *dest++ = (char)(*pwc > SCHAR_MAX ? wxT('_') : *pwc);
836
837 // the output string can't have embedded NULs anyhow, so we can safely
838 // stop at first of them even if we do have any
839 if ( !*pwc++ )
840 break;
841 }
842
843 return buffer;
844 }
845
846 #endif // Unicode
847
848 // extract string of length nCount starting at nFirst
849 wxString wxString::Mid(size_t nFirst, size_t nCount) const
850 {
851 size_t nLen = length();
852
853 // default value of nCount is npos and means "till the end"
854 if ( nCount == npos )
855 {
856 nCount = nLen - nFirst;
857 }
858
859 // out-of-bounds requests return sensible things
860 if ( nFirst + nCount > nLen )
861 {
862 nCount = nLen - nFirst;
863 }
864
865 if ( nFirst > nLen )
866 {
867 // AllocCopy() will return empty string
868 return wxEmptyString;
869 }
870
871 wxString dest(*this, nFirst, nCount);
872 if ( dest.length() != nCount )
873 {
874 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
875 }
876
877 return dest;
878 }
879
880 // check that the string starts with prefix and return the rest of the string
881 // in the provided pointer if it is not NULL, otherwise return false
882 bool wxString::StartsWith(const wxChar *prefix, wxString *rest) const
883 {
884 wxASSERT_MSG( prefix, _T("invalid parameter in wxString::StartsWith") );
885
886 // first check if the beginning of the string matches the prefix: note
887 // that we don't have to check that we don't run out of this string as
888 // when we reach the terminating NUL, either prefix string ends too (and
889 // then it's ok) or we break out of the loop because there is no match
890 const wxChar *p = c_str();
891 while ( *prefix )
892 {
893 if ( *prefix++ != *p++ )
894 {
895 // no match
896 return false;
897 }
898 }
899
900 if ( rest )
901 {
902 // put the rest of the string into provided pointer
903 *rest = p;
904 }
905
906 return true;
907 }
908
909
910 // check that the string ends with suffix and return the rest of it in the
911 // provided pointer if it is not NULL, otherwise return false
912 bool wxString::EndsWith(const wxChar *suffix, wxString *rest) const
913 {
914 wxASSERT_MSG( suffix, _T("invalid parameter in wxString::EndssWith") );
915
916 int start = length() - wxStrlen(suffix);
917 if ( start < 0 || wxStrcmp(wx_str() + start, suffix) != 0 )
918 return false;
919
920 if ( rest )
921 {
922 // put the rest of the string into provided pointer
923 rest->assign(*this, 0, start);
924 }
925
926 return true;
927 }
928
929
930 // extract nCount last (rightmost) characters
931 wxString wxString::Right(size_t nCount) const
932 {
933 if ( nCount > length() )
934 nCount = length();
935
936 wxString dest(*this, length() - nCount, nCount);
937 if ( dest.length() != nCount ) {
938 wxFAIL_MSG( _T("out of memory in wxString::Right") );
939 }
940 return dest;
941 }
942
943 // get all characters after the last occurence of ch
944 // (returns the whole string if ch not found)
945 wxString wxString::AfterLast(wxUniChar ch) const
946 {
947 wxString str;
948 int iPos = Find(ch, true);
949 if ( iPos == wxNOT_FOUND )
950 str = *this;
951 else
952 str = wx_str() + iPos + 1;
953
954 return str;
955 }
956
957 // extract nCount first (leftmost) characters
958 wxString wxString::Left(size_t nCount) const
959 {
960 if ( nCount > length() )
961 nCount = length();
962
963 wxString dest(*this, 0, nCount);
964 if ( dest.length() != nCount ) {
965 wxFAIL_MSG( _T("out of memory in wxString::Left") );
966 }
967 return dest;
968 }
969
970 // get all characters before the first occurence of ch
971 // (returns the whole string if ch not found)
972 wxString wxString::BeforeFirst(wxUniChar ch) const
973 {
974 int iPos = Find(ch);
975 if ( iPos == wxNOT_FOUND ) iPos = length();
976 return wxString(*this, 0, iPos);
977 }
978
979 /// get all characters before the last occurence of ch
980 /// (returns empty string if ch not found)
981 wxString wxString::BeforeLast(wxUniChar ch) const
982 {
983 wxString str;
984 int iPos = Find(ch, true);
985 if ( iPos != wxNOT_FOUND && iPos != 0 )
986 str = wxString(c_str(), iPos);
987
988 return str;
989 }
990
991 /// get all characters after the first occurence of ch
992 /// (returns empty string if ch not found)
993 wxString wxString::AfterFirst(wxUniChar ch) const
994 {
995 wxString str;
996 int iPos = Find(ch);
997 if ( iPos != wxNOT_FOUND )
998 str = wx_str() + iPos + 1;
999
1000 return str;
1001 }
1002
1003 // replace first (or all) occurences of some substring with another one
1004 size_t wxString::Replace(const wxChar *szOld,
1005 const wxChar *szNew, bool bReplaceAll)
1006 {
1007 // if we tried to replace an empty string we'd enter an infinite loop below
1008 wxCHECK_MSG( szOld && *szOld && szNew, 0,
1009 _T("wxString::Replace(): invalid parameter") );
1010
1011 size_t uiCount = 0; // count of replacements made
1012
1013 size_t uiOldLen = wxStrlen(szOld);
1014 size_t uiNewLen = wxStrlen(szNew);
1015
1016 size_t dwPos = 0;
1017
1018 while ( this->c_str()[dwPos] != wxT('\0') )
1019 {
1020 //DO NOT USE STRSTR HERE
1021 //this string can contain embedded null characters,
1022 //so strstr will function incorrectly
1023 dwPos = find(szOld, dwPos);
1024 if ( dwPos == npos )
1025 break; // exit the loop
1026 else
1027 {
1028 //replace this occurance of the old string with the new one
1029 replace(dwPos, uiOldLen, szNew, uiNewLen);
1030
1031 //move up pos past the string that was replaced
1032 dwPos += uiNewLen;
1033
1034 //increase replace count
1035 ++uiCount;
1036
1037 // stop now?
1038 if ( !bReplaceAll )
1039 break; // exit the loop
1040 }
1041 }
1042
1043 return uiCount;
1044 }
1045
1046 bool wxString::IsAscii() const
1047 {
1048 const wxChar *s = (const wxChar*) *this;
1049 while(*s){
1050 if(!isascii(*s)) return(false);
1051 s++;
1052 }
1053 return(true);
1054 }
1055
1056 bool wxString::IsWord() const
1057 {
1058 const wxChar *s = (const wxChar*) *this;
1059 while(*s){
1060 if(!wxIsalpha(*s)) return(false);
1061 s++;
1062 }
1063 return(true);
1064 }
1065
1066 bool wxString::IsNumber() const
1067 {
1068 const wxChar *s = (const wxChar*) *this;
1069 if (wxStrlen(s))
1070 if ((s[0] == wxT('-')) || (s[0] == wxT('+'))) s++;
1071 while(*s){
1072 if(!wxIsdigit(*s)) return(false);
1073 s++;
1074 }
1075 return(true);
1076 }
1077
1078 wxString wxString::Strip(stripType w) const
1079 {
1080 wxString s = *this;
1081 if ( w & leading ) s.Trim(false);
1082 if ( w & trailing ) s.Trim(true);
1083 return s;
1084 }
1085
1086 // ---------------------------------------------------------------------------
1087 // case conversion
1088 // ---------------------------------------------------------------------------
1089
1090 wxString& wxString::MakeUpper()
1091 {
1092 for ( iterator it = begin(), en = end(); it != en; ++it )
1093 *it = (wxChar)wxToupper(*it);
1094
1095 return *this;
1096 }
1097
1098 wxString& wxString::MakeLower()
1099 {
1100 for ( iterator it = begin(), en = end(); it != en; ++it )
1101 *it = (wxChar)wxTolower(*it);
1102
1103 return *this;
1104 }
1105
1106 // ---------------------------------------------------------------------------
1107 // trimming and padding
1108 // ---------------------------------------------------------------------------
1109
1110 // some compilers (VC++ 6.0 not to name them) return true for a call to
1111 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1112 // live with this by checking that the character is a 7 bit one - even if this
1113 // may fail to detect some spaces (I don't know if Unicode doesn't have
1114 // space-like symbols somewhere except in the first 128 chars), it is arguably
1115 // still better than trimming away accented letters
1116 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1117
1118 // trims spaces (in the sense of isspace) from left or right side
1119 wxString& wxString::Trim(bool bFromRight)
1120 {
1121 // first check if we're going to modify the string at all
1122 if ( !empty() &&
1123 (
1124 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1125 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1126 )
1127 )
1128 {
1129 if ( bFromRight )
1130 {
1131 // find last non-space character
1132 reverse_iterator psz = rbegin();
1133 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1134 psz++;
1135
1136 // truncate at trailing space start
1137 erase(psz.base(), end());
1138 }
1139 else
1140 {
1141 // find first non-space character
1142 iterator psz = begin();
1143 while ( (psz != end()) && wxSafeIsspace(*psz) )
1144 psz++;
1145
1146 // fix up data and length
1147 erase(begin(), psz);
1148 }
1149 }
1150
1151 return *this;
1152 }
1153
1154 // adds nCount characters chPad to the string from either side
1155 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1156 {
1157 wxString s(chPad, nCount);
1158
1159 if ( bFromRight )
1160 *this += s;
1161 else
1162 {
1163 s += *this;
1164 swap(s);
1165 }
1166
1167 return *this;
1168 }
1169
1170 // truncate the string
1171 wxString& wxString::Truncate(size_t uiLen)
1172 {
1173 if ( uiLen < length() )
1174 {
1175 erase(begin() + uiLen, end());
1176 }
1177 //else: nothing to do, string is already short enough
1178
1179 return *this;
1180 }
1181
1182 // ---------------------------------------------------------------------------
1183 // finding (return wxNOT_FOUND if not found and index otherwise)
1184 // ---------------------------------------------------------------------------
1185
1186 // find a character
1187 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1188 {
1189 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1190
1191 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1192 }
1193
1194 // find a sub-string (like strstr)
1195 int wxString::Find(const wxChar *pszSub) const
1196 {
1197 size_type idx = find(pszSub);
1198
1199 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1200 }
1201
1202 // ----------------------------------------------------------------------------
1203 // conversion to numbers
1204 // ----------------------------------------------------------------------------
1205
1206 // the implementation of all the functions below is exactly the same so factor
1207 // it out
1208
1209 template <typename T, typename F>
1210 bool wxStringToIntType(const wxChar *start,
1211 T *val,
1212 int base,
1213 F func)
1214 {
1215 wxCHECK_MSG( val, false, _T("NULL output pointer") );
1216 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1217
1218 #ifndef __WXWINCE__
1219 errno = 0;
1220 #endif
1221
1222 wxChar *end;
1223 *val = (*func)(start, &end, base);
1224
1225 // return true only if scan was stopped by the terminating NUL and if the
1226 // string was not empty to start with and no under/overflow occurred
1227 return !*end && (end != start)
1228 #ifndef __WXWINCE__
1229 && (errno != ERANGE)
1230 #endif
1231 ;
1232 }
1233
1234 bool wxString::ToLong(long *val, int base) const
1235 {
1236 return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtol);
1237 }
1238
1239 bool wxString::ToULong(unsigned long *val, int base) const
1240 {
1241 return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoul);
1242 }
1243
1244 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1245 {
1246 #ifdef wxHAS_STRTOLL
1247 return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoll);
1248 #else
1249 // TODO: implement this ourselves
1250 wxUnusedVar(val);
1251 wxUnusedVar(base);
1252 return false;
1253 #endif // wxHAS_STRTOLL
1254 }
1255
1256 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1257 {
1258 #ifdef wxHAS_STRTOLL
1259 return wxStringToIntType((const wxChar*)c_str(), val, base, wxStrtoull);
1260 #else
1261 // TODO: implement this ourselves
1262 wxUnusedVar(val);
1263 wxUnusedVar(base);
1264 return false;
1265 #endif
1266 }
1267
1268 bool wxString::ToDouble(double *val) const
1269 {
1270 wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1271
1272 #ifndef __WXWINCE__
1273 errno = 0;
1274 #endif
1275
1276 const wxChar *start = c_str();
1277 wxChar *end;
1278 *val = wxStrtod(start, &end);
1279
1280 // return true only if scan was stopped by the terminating NUL and if the
1281 // string was not empty to start with and no under/overflow occurred
1282 return !*end && (end != start)
1283 #ifndef __WXWINCE__
1284 && (errno != ERANGE)
1285 #endif
1286 ;
1287 }
1288
1289 // ---------------------------------------------------------------------------
1290 // formatted output
1291 // ---------------------------------------------------------------------------
1292
1293 /* static */
1294 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1295 wxString wxStringPrintfMixinBase::DoFormat(const wxChar *format, ...)
1296 #else
1297 wxString wxString::DoFormat(const wxChar *format, ...)
1298 #endif
1299 {
1300 va_list argptr;
1301 va_start(argptr, format);
1302
1303 wxString s;
1304 s.PrintfV(format, argptr);
1305
1306 va_end(argptr);
1307
1308 return s;
1309 }
1310
1311 /* static */
1312 wxString wxString::FormatV(const wxString& format, va_list argptr)
1313 {
1314 wxString s;
1315 s.PrintfV(format, argptr);
1316 return s;
1317 }
1318
1319 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1320 int wxStringPrintfMixinBase::DoPrintf(const wxChar *format, ...)
1321 #else
1322 int wxString::DoPrintf(const wxChar *format, ...)
1323 #endif
1324 {
1325 va_list argptr;
1326 va_start(argptr, format);
1327
1328 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1329 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1330 // because it's the only cast that works safely for downcasting when
1331 // multiple inheritance is used:
1332 wxString *str = static_cast<wxString*>(this);
1333 #else
1334 wxString *str = this;
1335 #endif
1336
1337 int iLen = str->PrintfV(format, argptr);
1338
1339 va_end(argptr);
1340
1341 return iLen;
1342 }
1343
1344 int wxString::PrintfV(const wxString& format, va_list argptr)
1345 {
1346 int size = 1024;
1347
1348 for ( ;; )
1349 {
1350 wxStringBuffer tmp(*this, size + 1);
1351 wxChar *buf = tmp;
1352
1353 if ( !buf )
1354 {
1355 // out of memory
1356 return -1;
1357 }
1358
1359 // wxVsnprintf() may modify the original arg pointer, so pass it
1360 // only a copy
1361 va_list argptrcopy;
1362 wxVaCopy(argptrcopy, argptr);
1363 int len = wxVsnprintf(buf, size, format, argptrcopy);
1364 va_end(argptrcopy);
1365
1366 // some implementations of vsnprintf() don't NUL terminate
1367 // the string if there is not enough space for it so
1368 // always do it manually
1369 buf[size] = _T('\0');
1370
1371 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1372 // total number of characters which would have been written if the
1373 // buffer were large enough (newer standards such as Unix98)
1374 if ( len < 0 )
1375 {
1376 #if wxUSE_WXVSNPRINTF
1377 // we know that our own implementation of wxVsnprintf() returns -1
1378 // only for a format error - thus there's something wrong with
1379 // the user's format string
1380 return -1;
1381 #else // assume that system version only returns error if not enough space
1382 // still not enough, as we don't know how much we need, double the
1383 // current size of the buffer
1384 size *= 2;
1385 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1386 }
1387 else if ( len >= size )
1388 {
1389 #if wxUSE_WXVSNPRINTF
1390 // we know that our own implementation of wxVsnprintf() returns
1391 // size+1 when there's not enough space but that's not the size
1392 // of the required buffer!
1393 size *= 2; // so we just double the current size of the buffer
1394 #else
1395 // some vsnprintf() implementations NUL-terminate the buffer and
1396 // some don't in len == size case, to be safe always add 1
1397 size = len + 1;
1398 #endif
1399 }
1400 else // ok, there was enough space
1401 {
1402 break;
1403 }
1404 }
1405
1406 // we could have overshot
1407 Shrink();
1408
1409 return length();
1410 }
1411
1412 // ----------------------------------------------------------------------------
1413 // misc other operations
1414 // ----------------------------------------------------------------------------
1415
1416 // returns true if the string matches the pattern which may contain '*' and
1417 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1418 // of them)
1419 bool wxString::Matches(const wxChar *pszMask) const
1420 {
1421 // I disable this code as it doesn't seem to be faster (in fact, it seems
1422 // to be much slower) than the old, hand-written code below and using it
1423 // here requires always linking with libregex even if the user code doesn't
1424 // use it
1425 #if 0 // wxUSE_REGEX
1426 // first translate the shell-like mask into a regex
1427 wxString pattern;
1428 pattern.reserve(wxStrlen(pszMask));
1429
1430 pattern += _T('^');
1431 while ( *pszMask )
1432 {
1433 switch ( *pszMask )
1434 {
1435 case _T('?'):
1436 pattern += _T('.');
1437 break;
1438
1439 case _T('*'):
1440 pattern += _T(".*");
1441 break;
1442
1443 case _T('^'):
1444 case _T('.'):
1445 case _T('$'):
1446 case _T('('):
1447 case _T(')'):
1448 case _T('|'):
1449 case _T('+'):
1450 case _T('\\'):
1451 // these characters are special in a RE, quote them
1452 // (however note that we don't quote '[' and ']' to allow
1453 // using them for Unix shell like matching)
1454 pattern += _T('\\');
1455 // fall through
1456
1457 default:
1458 pattern += *pszMask;
1459 }
1460
1461 pszMask++;
1462 }
1463 pattern += _T('$');
1464
1465 // and now use it
1466 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1467 #else // !wxUSE_REGEX
1468 // TODO: this is, of course, awfully inefficient...
1469
1470 // the char currently being checked
1471 const wxChar *pszTxt = c_str();
1472
1473 // the last location where '*' matched
1474 const wxChar *pszLastStarInText = NULL;
1475 const wxChar *pszLastStarInMask = NULL;
1476
1477 match:
1478 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1479 switch ( *pszMask ) {
1480 case wxT('?'):
1481 if ( *pszTxt == wxT('\0') )
1482 return false;
1483
1484 // pszTxt and pszMask will be incremented in the loop statement
1485
1486 break;
1487
1488 case wxT('*'):
1489 {
1490 // remember where we started to be able to backtrack later
1491 pszLastStarInText = pszTxt;
1492 pszLastStarInMask = pszMask;
1493
1494 // ignore special chars immediately following this one
1495 // (should this be an error?)
1496 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1497 pszMask++;
1498
1499 // if there is nothing more, match
1500 if ( *pszMask == wxT('\0') )
1501 return true;
1502
1503 // are there any other metacharacters in the mask?
1504 size_t uiLenMask;
1505 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1506
1507 if ( pEndMask != NULL ) {
1508 // we have to match the string between two metachars
1509 uiLenMask = pEndMask - pszMask;
1510 }
1511 else {
1512 // we have to match the remainder of the string
1513 uiLenMask = wxStrlen(pszMask);
1514 }
1515
1516 wxString strToMatch(pszMask, uiLenMask);
1517 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1518 if ( pMatch == NULL )
1519 return false;
1520
1521 // -1 to compensate "++" in the loop
1522 pszTxt = pMatch + uiLenMask - 1;
1523 pszMask += uiLenMask - 1;
1524 }
1525 break;
1526
1527 default:
1528 if ( *pszMask != *pszTxt )
1529 return false;
1530 break;
1531 }
1532 }
1533
1534 // match only if nothing left
1535 if ( *pszTxt == wxT('\0') )
1536 return true;
1537
1538 // if we failed to match, backtrack if we can
1539 if ( pszLastStarInText ) {
1540 pszTxt = pszLastStarInText + 1;
1541 pszMask = pszLastStarInMask;
1542
1543 pszLastStarInText = NULL;
1544
1545 // don't bother resetting pszLastStarInMask, it's unnecessary
1546
1547 goto match;
1548 }
1549
1550 return false;
1551 #endif // wxUSE_REGEX/!wxUSE_REGEX
1552 }
1553
1554 // Count the number of chars
1555 int wxString::Freq(wxUniChar ch) const
1556 {
1557 int count = 0;
1558 for ( const_iterator i = begin(); i != end(); ++i )
1559 {
1560 if ( *i == ch )
1561 count ++;
1562 }
1563 return count;
1564 }
1565
1566 // convert to upper case, return the copy of the string
1567 wxString wxString::Upper() const
1568 { wxString s(*this); return s.MakeUpper(); }
1569
1570 // convert to lower case, return the copy of the string
1571 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
1572