]> git.saurik.com Git - wxWidgets.git/blob - src/common/string.cpp
d0a6a445691b1b242f75676a89aaeb1d821a2a10
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
16
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #endif
28
29 #include <ctype.h>
30
31 #ifndef __WXWINCE__
32 #include <errno.h>
33 #endif
34
35 #include <string.h>
36 #include <stdlib.h>
37
38 #ifdef __SALFORDC__
39 #include <clib.h>
40 #endif
41
42 #include "wx/hashmap.h"
43
44 // string handling functions used by wxString:
45 #if wxUSE_UNICODE_UTF8
46 #define wxStringMemcpy memcpy
47 #define wxStringMemcmp memcmp
48 #define wxStringMemchr memchr
49 #define wxStringStrlen strlen
50 #else
51 #define wxStringMemcpy wxTmemcpy
52 #define wxStringMemcmp wxTmemcmp
53 #define wxStringMemchr wxTmemchr
54 #define wxStringStrlen wxStrlen
55 #endif
56
57
58 // ---------------------------------------------------------------------------
59 // static class variables definition
60 // ---------------------------------------------------------------------------
61
62 //According to STL _must_ be a -1 size_t
63 const size_t wxString::npos = (size_t) -1;
64
65 // ----------------------------------------------------------------------------
66 // global functions
67 // ----------------------------------------------------------------------------
68
69 #if wxUSE_STD_IOSTREAM
70
71 #include <iostream>
72
73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
74 {
75 // FIXME-UTF8: always, not only if wxUSE_UNICODE
76 #if wxUSE_UNICODE && !defined(__BORLANDC__)
77 return os << (const wchar_t*)str.AsWCharBuf();
78 #else
79 return os << (const char*)str.AsCharBuf();
80 #endif
81 }
82
83 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
84 {
85 return os << str.c_str();
86 }
87
88 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
89 {
90 return os << str.data();
91 }
92
93 #ifndef __BORLANDC__
94 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
95 {
96 return os << str.data();
97 }
98 #endif
99
100 #endif // wxUSE_STD_IOSTREAM
101
102 // ===========================================================================
103 // wxString class core
104 // ===========================================================================
105
106 #if wxUSE_UNICODE_UTF8
107
108 void wxString::PosLenToImpl(size_t pos, size_t len,
109 size_t *implPos, size_t *implLen) const
110 {
111 if ( pos == npos )
112 *implPos = npos;
113 else
114 {
115 const_iterator i = begin() + pos;
116 *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
117 if ( len == npos )
118 *implLen = npos;
119 else
120 {
121 // too large length is interpreted as "to the end of the string"
122 // FIXME-UTF8: verify this is the case in std::string, assert
123 // otherwise
124 if ( pos + len > length() )
125 len = length() - pos;
126
127 *implLen = (i + len).impl() - i.impl();
128 }
129 }
130 }
131
132 #endif // wxUSE_UNICODE_UTF8
133
134 // ----------------------------------------------------------------------------
135 // wxCStrData converted strings caching
136 // ----------------------------------------------------------------------------
137
138 // FIXME-UTF8: temporarily disabled because it doesn't work with global
139 // string objects; re-enable after fixing this bug and benchmarking
140 // performance to see if using a hash is a good idea at all
141 #if 0
142
143 // For backward compatibility reasons, it must be possible to assign the value
144 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
145 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
146 // because the memory would be freed immediately, but it has to be valid as long
147 // as the string is not modified, so that code like this still works:
148 //
149 // const wxChar *s = str.c_str();
150 // while ( s ) { ... }
151
152 // FIXME-UTF8: not thread safe!
153 // FIXME-UTF8: we currently clear the cached conversion only when the string is
154 // destroyed, but we should do it when the string is modified, to
155 // keep memory usage down
156 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
157 // invalidated the cache on every change, we could keep the previous
158 // conversion
159 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
160 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
161
162 template<typename T>
163 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
164 {
165 typename T::iterator i = hash.find(wxConstCast(s, wxString));
166 if ( i != hash.end() )
167 {
168 free(i->second);
169 hash.erase(i);
170 }
171 }
172
173 #if wxUSE_UNICODE
174 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
175 // so we have to use wxString* here and const-cast when used
176 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
177 wxStringCharConversionCache);
178 static wxStringCharConversionCache gs_stringsCharCache;
179
180 const char* wxCStrData::AsChar() const
181 {
182 // remove previously cache value, if any (see FIXMEs above):
183 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
184
185 // convert the string and keep it:
186 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
187 m_str->mb_str().release();
188
189 return s + m_offset;
190 }
191 #endif // wxUSE_UNICODE
192
193 #if !wxUSE_UNICODE_WCHAR
194 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
195 wxStringWCharConversionCache);
196 static wxStringWCharConversionCache gs_stringsWCharCache;
197
198 const wchar_t* wxCStrData::AsWChar() const
199 {
200 // remove previously cache value, if any (see FIXMEs above):
201 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
202
203 // convert the string and keep it:
204 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
205 m_str->wc_str().release();
206
207 return s + m_offset;
208 }
209 #endif // !wxUSE_UNICODE_WCHAR
210
211 wxString::~wxString()
212 {
213 #if wxUSE_UNICODE
214 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
215 DeleteStringFromConversionCache(gs_stringsCharCache, this);
216 #endif
217 #if !wxUSE_UNICODE_WCHAR
218 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
219 #endif
220 }
221 #endif
222
223 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
224 const char* wxCStrData::AsChar() const
225 {
226 #if wxUSE_UNICODE_UTF8
227 if ( wxLocaleIsUtf8 )
228 return AsInternal();
229 #endif
230 // under non-UTF8 locales, we have to convert the internal UTF-8
231 // representation using wxConvLibc and cache the result
232
233 wxString *str = wxConstCast(m_str, wxString);
234
235 // convert the string:
236 wxCharBuffer buf(str->mb_str());
237
238 // FIXME-UTF8: do the conversion in-place in the existing buffer
239 if ( str->m_convertedToChar &&
240 strlen(buf) == strlen(str->m_convertedToChar) )
241 {
242 // keep the same buffer for as long as possible, so that several calls
243 // to c_str() in a row still work:
244 strcpy(str->m_convertedToChar, buf);
245 }
246 else
247 {
248 str->m_convertedToChar = buf.release();
249 }
250
251 // and keep it:
252 return str->m_convertedToChar + m_offset;
253 }
254 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
255
256 #if !wxUSE_UNICODE_WCHAR
257 const wchar_t* wxCStrData::AsWChar() const
258 {
259 wxString *str = wxConstCast(m_str, wxString);
260
261 // convert the string:
262 wxWCharBuffer buf(str->wc_str());
263
264 // FIXME-UTF8: do the conversion in-place in the existing buffer
265 if ( str->m_convertedToWChar &&
266 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
267 {
268 // keep the same buffer for as long as possible, so that several calls
269 // to c_str() in a row still work:
270 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
271 }
272 else
273 {
274 str->m_convertedToWChar = buf.release();
275 }
276
277 // and keep it:
278 return str->m_convertedToWChar + m_offset;
279 }
280 #endif // !wxUSE_UNICODE_WCHAR
281
282 // ===========================================================================
283 // wxString class core
284 // ===========================================================================
285
286 // ---------------------------------------------------------------------------
287 // construction and conversion
288 // ---------------------------------------------------------------------------
289
290 #if wxUSE_UNICODE_WCHAR
291 /* static */
292 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
293 const wxMBConv& conv)
294 {
295 // anything to do?
296 if ( !psz || nLength == 0 )
297 return SubstrBufFromMB(L"", 0);
298
299 if ( nLength == npos )
300 nLength = wxNO_LEN;
301
302 size_t wcLen;
303 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
304 if ( !wcLen )
305 return SubstrBufFromMB(_T(""), 0);
306 else
307 return SubstrBufFromMB(wcBuf, wcLen);
308 }
309 #endif // wxUSE_UNICODE_WCHAR
310
311 #if wxUSE_UNICODE_UTF8
312 /* static */
313 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
314 const wxMBConv& conv)
315 {
316 // anything to do?
317 if ( !psz || nLength == 0 )
318 return SubstrBufFromMB("", 0);
319
320 // if psz is already in UTF-8, we don't have to do the roundtrip to
321 // wchar_t* and back:
322 if ( conv.IsUTF8() )
323 {
324 // we need to validate the input because UTF8 iterators assume valid
325 // UTF-8 sequence and psz may be invalid:
326 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
327 {
328 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
329 }
330 // else: do the roundtrip through wchar_t*
331 }
332
333 if ( nLength == npos )
334 nLength = wxNO_LEN;
335
336 // first convert to wide string:
337 size_t wcLen;
338 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
339 if ( !wcLen )
340 return SubstrBufFromMB("", 0);
341
342 // and then to UTF-8:
343 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
344 // widechar -> UTF-8 conversion isn't supposed to ever fail:
345 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
346
347 return buf;
348 }
349 #endif // wxUSE_UNICODE_UTF8
350
351 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
352 /* static */
353 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
354 const wxMBConv& conv)
355 {
356 // anything to do?
357 if ( !pwz || nLength == 0 )
358 return SubstrBufFromWC("", 0);
359
360 if ( nLength == npos )
361 nLength = wxNO_LEN;
362
363 size_t mbLen;
364 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
365 if ( !mbLen )
366 return SubstrBufFromWC("", 0);
367 else
368 return SubstrBufFromWC(mbBuf, mbLen);
369 }
370 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
371
372
373 #if wxUSE_UNICODE_WCHAR
374
375 //Convert wxString in Unicode mode to a multi-byte string
376 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
377 {
378 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
379 }
380
381 #elif wxUSE_UNICODE_UTF8
382
383 const wxWCharBuffer wxString::wc_str() const
384 {
385 return wxMBConvStrictUTF8().cMB2WC
386 (
387 m_impl.c_str(),
388 m_impl.length() + 1, // size, not length
389 NULL
390 );
391 }
392
393 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
394 {
395 if ( conv.IsUTF8() )
396 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
397
398 // FIXME-UTF8: use wc_str() here once we have buffers with length
399
400 size_t wcLen;
401 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
402 (
403 m_impl.c_str(),
404 m_impl.length() + 1, // size
405 &wcLen
406 ));
407 if ( !wcLen )
408 return wxCharBuffer("");
409
410 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
411 }
412
413 #else // ANSI
414
415 //Converts this string to a wide character string if unicode
416 //mode is not enabled and wxUSE_WCHAR_T is enabled
417 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
418 {
419 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
420 }
421
422 #endif // Unicode/ANSI
423
424 // shrink to minimal size (releasing extra memory)
425 bool wxString::Shrink()
426 {
427 wxString tmp(begin(), end());
428 swap(tmp);
429 return tmp.length() == length();
430 }
431
432 // deprecated compatibility code:
433 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
434 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
435 {
436 return DoGetWriteBuf(nLen);
437 }
438
439 void wxString::UngetWriteBuf()
440 {
441 DoUngetWriteBuf();
442 }
443
444 void wxString::UngetWriteBuf(size_t nLen)
445 {
446 DoUngetWriteBuf(nLen);
447 }
448 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
449
450
451 // ---------------------------------------------------------------------------
452 // data access
453 // ---------------------------------------------------------------------------
454
455 // all functions are inline in string.h
456
457 // ---------------------------------------------------------------------------
458 // concatenation operators
459 // ---------------------------------------------------------------------------
460
461 /*
462 * concatenation functions come in 5 flavours:
463 * string + string
464 * char + string and string + char
465 * C str + string and string + C str
466 */
467
468 wxString operator+(const wxString& str1, const wxString& str2)
469 {
470 #if !wxUSE_STL_BASED_WXSTRING
471 wxASSERT( str1.IsValid() );
472 wxASSERT( str2.IsValid() );
473 #endif
474
475 wxString s = str1;
476 s += str2;
477
478 return s;
479 }
480
481 wxString operator+(const wxString& str, wxUniChar ch)
482 {
483 #if !wxUSE_STL_BASED_WXSTRING
484 wxASSERT( str.IsValid() );
485 #endif
486
487 wxString s = str;
488 s += ch;
489
490 return s;
491 }
492
493 wxString operator+(wxUniChar ch, const wxString& str)
494 {
495 #if !wxUSE_STL_BASED_WXSTRING
496 wxASSERT( str.IsValid() );
497 #endif
498
499 wxString s = ch;
500 s += str;
501
502 return s;
503 }
504
505 wxString operator+(const wxString& str, const char *psz)
506 {
507 #if !wxUSE_STL_BASED_WXSTRING
508 wxASSERT( str.IsValid() );
509 #endif
510
511 wxString s;
512 if ( !s.Alloc(strlen(psz) + str.length()) ) {
513 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
514 }
515 s += str;
516 s += psz;
517
518 return s;
519 }
520
521 wxString operator+(const wxString& str, const wchar_t *pwz)
522 {
523 #if !wxUSE_STL_BASED_WXSTRING
524 wxASSERT( str.IsValid() );
525 #endif
526
527 wxString s;
528 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
529 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
530 }
531 s += str;
532 s += pwz;
533
534 return s;
535 }
536
537 wxString operator+(const char *psz, const wxString& str)
538 {
539 #if !wxUSE_STL_BASED_WXSTRING
540 wxASSERT( str.IsValid() );
541 #endif
542
543 wxString s;
544 if ( !s.Alloc(strlen(psz) + str.length()) ) {
545 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
546 }
547 s = psz;
548 s += str;
549
550 return s;
551 }
552
553 wxString operator+(const wchar_t *pwz, const wxString& str)
554 {
555 #if !wxUSE_STL_BASED_WXSTRING
556 wxASSERT( str.IsValid() );
557 #endif
558
559 wxString s;
560 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
561 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
562 }
563 s = pwz;
564 s += str;
565
566 return s;
567 }
568
569 // ---------------------------------------------------------------------------
570 // string comparison
571 // ---------------------------------------------------------------------------
572
573 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
574 {
575 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
576 : wxToupper(GetChar(0u)) == wxToupper(c));
577 }
578
579 #ifdef HAVE_STD_STRING_COMPARE
580
581 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
582 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
583 // sort strings in characters code point order by sorting the byte sequence
584 // in byte values order (i.e. what strcmp() and memcmp() do).
585
586 int wxString::compare(const wxString& str) const
587 {
588 return m_impl.compare(str.m_impl);
589 }
590
591 int wxString::compare(size_t nStart, size_t nLen,
592 const wxString& str) const
593 {
594 size_t pos, len;
595 PosLenToImpl(nStart, nLen, &pos, &len);
596 return m_impl.compare(pos, len, str.m_impl);
597 }
598
599 int wxString::compare(size_t nStart, size_t nLen,
600 const wxString& str,
601 size_t nStart2, size_t nLen2) const
602 {
603 size_t pos, len;
604 PosLenToImpl(nStart, nLen, &pos, &len);
605
606 size_t pos2, len2;
607 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
608
609 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
610 }
611
612 int wxString::compare(const char* sz) const
613 {
614 return m_impl.compare(ImplStr(sz));
615 }
616
617 int wxString::compare(const wchar_t* sz) const
618 {
619 return m_impl.compare(ImplStr(sz));
620 }
621
622 int wxString::compare(size_t nStart, size_t nLen,
623 const char* sz, size_t nCount) const
624 {
625 size_t pos, len;
626 PosLenToImpl(nStart, nLen, &pos, &len);
627
628 SubstrBufFromMB str(ImplStr(sz, nCount));
629
630 return m_impl.compare(pos, len, str.data, str.len);
631 }
632
633 int wxString::compare(size_t nStart, size_t nLen,
634 const wchar_t* sz, size_t nCount) const
635 {
636 size_t pos, len;
637 PosLenToImpl(nStart, nLen, &pos, &len);
638
639 SubstrBufFromWC str(ImplStr(sz, nCount));
640
641 return m_impl.compare(pos, len, str.data, str.len);
642 }
643
644 #else // !HAVE_STD_STRING_COMPARE
645
646 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
647 const wxStringCharType* s2, size_t l2)
648 {
649 if( l1 == l2 )
650 return wxStringMemcmp(s1, s2, l1);
651 else if( l1 < l2 )
652 {
653 int ret = wxStringMemcmp(s1, s2, l1);
654 return ret == 0 ? -1 : ret;
655 }
656 else
657 {
658 int ret = wxStringMemcmp(s1, s2, l2);
659 return ret == 0 ? +1 : ret;
660 }
661 }
662
663 int wxString::compare(const wxString& str) const
664 {
665 return ::wxDoCmp(m_impl.data(), m_impl.length(),
666 str.m_impl.data(), str.m_impl.length());
667 }
668
669 int wxString::compare(size_t nStart, size_t nLen,
670 const wxString& str) const
671 {
672 wxASSERT(nStart <= length());
673 size_type strLen = length() - nStart;
674 nLen = strLen < nLen ? strLen : nLen;
675
676 size_t pos, len;
677 PosLenToImpl(nStart, nLen, &pos, &len);
678
679 return ::wxDoCmp(m_impl.data() + pos, len,
680 str.m_impl.data(), str.m_impl.length());
681 }
682
683 int wxString::compare(size_t nStart, size_t nLen,
684 const wxString& str,
685 size_t nStart2, size_t nLen2) const
686 {
687 wxASSERT(nStart <= length());
688 wxASSERT(nStart2 <= str.length());
689 size_type strLen = length() - nStart,
690 strLen2 = str.length() - nStart2;
691 nLen = strLen < nLen ? strLen : nLen;
692 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
693
694 size_t pos, len;
695 PosLenToImpl(nStart, nLen, &pos, &len);
696 size_t pos2, len2;
697 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
698
699 return ::wxDoCmp(m_impl.data() + pos, len,
700 str.m_impl.data() + pos2, len2);
701 }
702
703 int wxString::compare(const char* sz) const
704 {
705 SubstrBufFromMB str(ImplStr(sz, npos));
706 if ( str.len == npos )
707 str.len = wxStringStrlen(str.data);
708 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
709 }
710
711 int wxString::compare(const wchar_t* sz) const
712 {
713 SubstrBufFromWC str(ImplStr(sz, npos));
714 if ( str.len == npos )
715 str.len = wxStringStrlen(str.data);
716 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
717 }
718
719 int wxString::compare(size_t nStart, size_t nLen,
720 const char* sz, size_t nCount) const
721 {
722 wxASSERT(nStart <= length());
723 size_type strLen = length() - nStart;
724 nLen = strLen < nLen ? strLen : nLen;
725
726 size_t pos, len;
727 PosLenToImpl(nStart, nLen, &pos, &len);
728
729 SubstrBufFromMB str(ImplStr(sz, nCount));
730 if ( str.len == npos )
731 str.len = wxStringStrlen(str.data);
732
733 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
734 }
735
736 int wxString::compare(size_t nStart, size_t nLen,
737 const wchar_t* sz, size_t nCount) const
738 {
739 wxASSERT(nStart <= length());
740 size_type strLen = length() - nStart;
741 nLen = strLen < nLen ? strLen : nLen;
742
743 size_t pos, len;
744 PosLenToImpl(nStart, nLen, &pos, &len);
745
746 SubstrBufFromWC str(ImplStr(sz, nCount));
747 if ( str.len == npos )
748 str.len = wxStringStrlen(str.data);
749
750 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
751 }
752
753 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
754
755
756 // ---------------------------------------------------------------------------
757 // find_{first,last}_[not]_of functions
758 // ---------------------------------------------------------------------------
759
760 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
761
762 // NB: All these functions are implemented with the argument being wxChar*,
763 // i.e. widechar string in any Unicode build, even though native string
764 // representation is char* in the UTF-8 build. This is because we couldn't
765 // use memchr() to determine if a character is in a set encoded as UTF-8.
766
767 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
768 {
769 return find_first_of(sz, nStart, wxStrlen(sz));
770 }
771
772 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
773 {
774 return find_first_not_of(sz, nStart, wxStrlen(sz));
775 }
776
777 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
778 {
779 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
780
781 size_t idx = nStart;
782 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
783 {
784 if ( wxTmemchr(sz, *i, n) )
785 return idx;
786 }
787
788 return npos;
789 }
790
791 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
792 {
793 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
794
795 size_t idx = nStart;
796 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
797 {
798 if ( !wxTmemchr(sz, *i, n) )
799 return idx;
800 }
801
802 return npos;
803 }
804
805
806 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
807 {
808 return find_last_of(sz, nStart, wxStrlen(sz));
809 }
810
811 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
812 {
813 return find_last_not_of(sz, nStart, wxStrlen(sz));
814 }
815
816 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
817 {
818 size_t len = length();
819
820 if ( nStart == npos )
821 {
822 nStart = len - 1;
823 }
824 else
825 {
826 wxASSERT_MSG( nStart <= len, _T("invalid index") );
827 }
828
829 size_t idx = nStart;
830 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
831 i != rend(); --idx, ++i )
832 {
833 if ( wxTmemchr(sz, *i, n) )
834 return idx;
835 }
836
837 return npos;
838 }
839
840 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
841 {
842 size_t len = length();
843
844 if ( nStart == npos )
845 {
846 nStart = len - 1;
847 }
848 else
849 {
850 wxASSERT_MSG( nStart <= len, _T("invalid index") );
851 }
852
853 size_t idx = nStart;
854 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
855 i != rend(); --idx, ++i )
856 {
857 if ( !wxTmemchr(sz, *i, n) )
858 return idx;
859 }
860
861 return npos;
862 }
863
864 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
865 {
866 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
867
868 size_t idx = nStart;
869 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
870 {
871 if ( *i != ch )
872 return idx;
873 }
874
875 return npos;
876 }
877
878 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
879 {
880 size_t len = length();
881
882 if ( nStart == npos )
883 {
884 nStart = len - 1;
885 }
886 else
887 {
888 wxASSERT_MSG( nStart <= len, _T("invalid index") );
889 }
890
891 size_t idx = nStart;
892 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
893 i != rend(); --idx, ++i )
894 {
895 if ( *i != ch )
896 return idx;
897 }
898
899 return npos;
900 }
901
902 // the functions above were implemented for wchar_t* arguments in Unicode
903 // build and char* in ANSI build; below are implementations for the other
904 // version:
905 #if wxUSE_UNICODE
906 #define wxOtherCharType char
907 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
908 #else
909 #define wxOtherCharType wchar_t
910 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
911 #endif
912
913 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
914 { return find_first_of(STRCONV(sz), nStart); }
915
916 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
917 size_t n) const
918 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
919 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
920 { return find_last_of(STRCONV(sz), nStart); }
921 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
922 size_t n) const
923 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
924 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
925 { return find_first_not_of(STRCONV(sz), nStart); }
926 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
927 size_t n) const
928 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
929 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
930 { return find_last_not_of(STRCONV(sz), nStart); }
931 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
932 size_t n) const
933 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
934
935 #undef wxOtherCharType
936 #undef STRCONV
937
938 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
939
940 // ===========================================================================
941 // other common string functions
942 // ===========================================================================
943
944 int wxString::CmpNoCase(const wxString& s) const
945 {
946 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
947
948 size_t idx = 0;
949 const_iterator i1 = begin();
950 const_iterator end1 = end();
951 const_iterator i2 = s.begin();
952 const_iterator end2 = s.end();
953
954 for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
955 {
956 wxUniChar lower1 = (wxChar)wxTolower(*i1);
957 wxUniChar lower2 = (wxChar)wxTolower(*i2);
958 if ( lower1 != lower2 )
959 return lower1 < lower2 ? -1 : 1;
960 }
961
962 size_t len1 = length();
963 size_t len2 = s.length();
964
965 if ( len1 < len2 )
966 return -1;
967 else if ( len1 > len2 )
968 return 1;
969 return 0;
970 }
971
972
973 #if wxUSE_UNICODE
974
975 #ifdef __MWERKS__
976 #ifndef __SCHAR_MAX__
977 #define __SCHAR_MAX__ 127
978 #endif
979 #endif
980
981 wxString wxString::FromAscii(const char *ascii, size_t len)
982 {
983 if (!ascii || len == 0)
984 return wxEmptyString;
985
986 wxString res;
987
988 {
989 wxStringInternalBuffer buf(res, len);
990 wxStringCharType *dest = buf;
991
992 for ( ; len > 0; --len )
993 {
994 unsigned char c = (unsigned char)*ascii++;
995 wxASSERT_MSG( c < 0x80,
996 _T("Non-ASCII value passed to FromAscii().") );
997
998 *dest++ = (wchar_t)c;
999 }
1000 }
1001
1002 return res;
1003 }
1004
1005 wxString wxString::FromAscii(const char *ascii)
1006 {
1007 return FromAscii(ascii, wxStrlen(ascii));
1008 }
1009
1010 wxString wxString::FromAscii(char ascii)
1011 {
1012 // What do we do with '\0' ?
1013
1014 unsigned char c = (unsigned char)ascii;
1015
1016 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1017
1018 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1019 return wxString(wxUniChar((wchar_t)c));
1020 }
1021
1022 const wxCharBuffer wxString::ToAscii() const
1023 {
1024 // this will allocate enough space for the terminating NUL too
1025 wxCharBuffer buffer(length());
1026 char *dest = buffer.data();
1027
1028 for ( const_iterator i = begin(); i != end(); ++i )
1029 {
1030 wxUniChar c(*i);
1031 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1032 *dest++ = c.IsAscii() ? (char)c : '_';
1033
1034 // the output string can't have embedded NULs anyhow, so we can safely
1035 // stop at first of them even if we do have any
1036 if ( !c )
1037 break;
1038 }
1039
1040 return buffer;
1041 }
1042
1043 #endif // wxUSE_UNICODE
1044
1045 // extract string of length nCount starting at nFirst
1046 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1047 {
1048 size_t nLen = length();
1049
1050 // default value of nCount is npos and means "till the end"
1051 if ( nCount == npos )
1052 {
1053 nCount = nLen - nFirst;
1054 }
1055
1056 // out-of-bounds requests return sensible things
1057 if ( nFirst + nCount > nLen )
1058 {
1059 nCount = nLen - nFirst;
1060 }
1061
1062 if ( nFirst > nLen )
1063 {
1064 // AllocCopy() will return empty string
1065 return wxEmptyString;
1066 }
1067
1068 wxString dest(*this, nFirst, nCount);
1069 if ( dest.length() != nCount )
1070 {
1071 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1072 }
1073
1074 return dest;
1075 }
1076
1077 // check that the string starts with prefix and return the rest of the string
1078 // in the provided pointer if it is not NULL, otherwise return false
1079 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1080 {
1081 if ( compare(0, prefix.length(), prefix) != 0 )
1082 return false;
1083
1084 if ( rest )
1085 {
1086 // put the rest of the string into provided pointer
1087 rest->assign(*this, prefix.length(), npos);
1088 }
1089
1090 return true;
1091 }
1092
1093
1094 // check that the string ends with suffix and return the rest of it in the
1095 // provided pointer if it is not NULL, otherwise return false
1096 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1097 {
1098 int start = length() - suffix.length();
1099
1100 if ( start < 0 || compare(start, npos, suffix) != 0 )
1101 return false;
1102
1103 if ( rest )
1104 {
1105 // put the rest of the string into provided pointer
1106 rest->assign(*this, 0, start);
1107 }
1108
1109 return true;
1110 }
1111
1112
1113 // extract nCount last (rightmost) characters
1114 wxString wxString::Right(size_t nCount) const
1115 {
1116 if ( nCount > length() )
1117 nCount = length();
1118
1119 wxString dest(*this, length() - nCount, nCount);
1120 if ( dest.length() != nCount ) {
1121 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1122 }
1123 return dest;
1124 }
1125
1126 // get all characters after the last occurence of ch
1127 // (returns the whole string if ch not found)
1128 wxString wxString::AfterLast(wxUniChar ch) const
1129 {
1130 wxString str;
1131 int iPos = Find(ch, true);
1132 if ( iPos == wxNOT_FOUND )
1133 str = *this;
1134 else
1135 str = wx_str() + iPos + 1;
1136
1137 return str;
1138 }
1139
1140 // extract nCount first (leftmost) characters
1141 wxString wxString::Left(size_t nCount) const
1142 {
1143 if ( nCount > length() )
1144 nCount = length();
1145
1146 wxString dest(*this, 0, nCount);
1147 if ( dest.length() != nCount ) {
1148 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1149 }
1150 return dest;
1151 }
1152
1153 // get all characters before the first occurence of ch
1154 // (returns the whole string if ch not found)
1155 wxString wxString::BeforeFirst(wxUniChar ch) const
1156 {
1157 int iPos = Find(ch);
1158 if ( iPos == wxNOT_FOUND ) iPos = length();
1159 return wxString(*this, 0, iPos);
1160 }
1161
1162 /// get all characters before the last occurence of ch
1163 /// (returns empty string if ch not found)
1164 wxString wxString::BeforeLast(wxUniChar ch) const
1165 {
1166 wxString str;
1167 int iPos = Find(ch, true);
1168 if ( iPos != wxNOT_FOUND && iPos != 0 )
1169 str = wxString(c_str(), iPos);
1170
1171 return str;
1172 }
1173
1174 /// get all characters after the first occurence of ch
1175 /// (returns empty string if ch not found)
1176 wxString wxString::AfterFirst(wxUniChar ch) const
1177 {
1178 wxString str;
1179 int iPos = Find(ch);
1180 if ( iPos != wxNOT_FOUND )
1181 str = wx_str() + iPos + 1;
1182
1183 return str;
1184 }
1185
1186 // replace first (or all) occurences of some substring with another one
1187 size_t wxString::Replace(const wxString& strOld,
1188 const wxString& strNew, bool bReplaceAll)
1189 {
1190 // if we tried to replace an empty string we'd enter an infinite loop below
1191 wxCHECK_MSG( !strOld.empty(), 0,
1192 _T("wxString::Replace(): invalid parameter") );
1193
1194 size_t uiCount = 0; // count of replacements made
1195
1196 size_t uiOldLen = strOld.length();
1197 size_t uiNewLen = strNew.length();
1198
1199 size_t dwPos = 0;
1200
1201 while ( (*this)[dwPos] != wxT('\0') )
1202 {
1203 //DO NOT USE STRSTR HERE
1204 //this string can contain embedded null characters,
1205 //so strstr will function incorrectly
1206 dwPos = find(strOld, dwPos);
1207 if ( dwPos == npos )
1208 break; // exit the loop
1209 else
1210 {
1211 //replace this occurance of the old string with the new one
1212 replace(dwPos, uiOldLen, strNew, uiNewLen);
1213
1214 //move up pos past the string that was replaced
1215 dwPos += uiNewLen;
1216
1217 //increase replace count
1218 ++uiCount;
1219
1220 // stop now?
1221 if ( !bReplaceAll )
1222 break; // exit the loop
1223 }
1224 }
1225
1226 return uiCount;
1227 }
1228
1229 bool wxString::IsAscii() const
1230 {
1231 for ( const_iterator i = begin(); i != end(); ++i )
1232 {
1233 if ( !(*i).IsAscii() )
1234 return false;
1235 }
1236
1237 return true;
1238 }
1239
1240 bool wxString::IsWord() const
1241 {
1242 for ( const_iterator i = begin(); i != end(); ++i )
1243 {
1244 if ( !wxIsalpha(*i) )
1245 return false;
1246 }
1247
1248 return true;
1249 }
1250
1251 bool wxString::IsNumber() const
1252 {
1253 if ( empty() )
1254 return true;
1255
1256 const_iterator i = begin();
1257
1258 if ( *i == _T('-') || *i == _T('+') )
1259 ++i;
1260
1261 for ( ; i != end(); ++i )
1262 {
1263 if ( !wxIsdigit(*i) )
1264 return false;
1265 }
1266
1267 return true;
1268 }
1269
1270 wxString wxString::Strip(stripType w) const
1271 {
1272 wxString s = *this;
1273 if ( w & leading ) s.Trim(false);
1274 if ( w & trailing ) s.Trim(true);
1275 return s;
1276 }
1277
1278 // ---------------------------------------------------------------------------
1279 // case conversion
1280 // ---------------------------------------------------------------------------
1281
1282 wxString& wxString::MakeUpper()
1283 {
1284 for ( iterator it = begin(), en = end(); it != en; ++it )
1285 *it = (wxChar)wxToupper(*it);
1286
1287 return *this;
1288 }
1289
1290 wxString& wxString::MakeLower()
1291 {
1292 for ( iterator it = begin(), en = end(); it != en; ++it )
1293 *it = (wxChar)wxTolower(*it);
1294
1295 return *this;
1296 }
1297
1298 // ---------------------------------------------------------------------------
1299 // trimming and padding
1300 // ---------------------------------------------------------------------------
1301
1302 // some compilers (VC++ 6.0 not to name them) return true for a call to
1303 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1304 // live with this by checking that the character is a 7 bit one - even if this
1305 // may fail to detect some spaces (I don't know if Unicode doesn't have
1306 // space-like symbols somewhere except in the first 128 chars), it is arguably
1307 // still better than trimming away accented letters
1308 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1309
1310 // trims spaces (in the sense of isspace) from left or right side
1311 wxString& wxString::Trim(bool bFromRight)
1312 {
1313 // first check if we're going to modify the string at all
1314 if ( !empty() &&
1315 (
1316 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1317 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1318 )
1319 )
1320 {
1321 if ( bFromRight )
1322 {
1323 // find last non-space character
1324 reverse_iterator psz = rbegin();
1325 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1326 psz++;
1327
1328 // truncate at trailing space start
1329 erase(psz.base(), end());
1330 }
1331 else
1332 {
1333 // find first non-space character
1334 iterator psz = begin();
1335 while ( (psz != end()) && wxSafeIsspace(*psz) )
1336 psz++;
1337
1338 // fix up data and length
1339 erase(begin(), psz);
1340 }
1341 }
1342
1343 return *this;
1344 }
1345
1346 // adds nCount characters chPad to the string from either side
1347 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1348 {
1349 wxString s(chPad, nCount);
1350
1351 if ( bFromRight )
1352 *this += s;
1353 else
1354 {
1355 s += *this;
1356 swap(s);
1357 }
1358
1359 return *this;
1360 }
1361
1362 // truncate the string
1363 wxString& wxString::Truncate(size_t uiLen)
1364 {
1365 if ( uiLen < length() )
1366 {
1367 erase(begin() + uiLen, end());
1368 }
1369 //else: nothing to do, string is already short enough
1370
1371 return *this;
1372 }
1373
1374 // ---------------------------------------------------------------------------
1375 // finding (return wxNOT_FOUND if not found and index otherwise)
1376 // ---------------------------------------------------------------------------
1377
1378 // find a character
1379 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1380 {
1381 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1382
1383 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1384 }
1385
1386 // ----------------------------------------------------------------------------
1387 // conversion to numbers
1388 // ----------------------------------------------------------------------------
1389
1390 // The implementation of all the functions below is exactly the same so factor
1391 // it out. Note that number extraction works correctly on UTF-8 strings, so
1392 // we can use wxStringCharType and wx_str() for maximum efficiency.
1393
1394 #ifndef __WXWINCE__
1395 #define DO_IF_NOT_WINCE(x) x
1396 #else
1397 #define DO_IF_NOT_WINCE(x)
1398 #endif
1399
1400 #define WX_STRING_TO_INT_TYPE(val, base, func) \
1401 wxCHECK_MSG( val, false, _T("NULL output pointer") ); \
1402 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1403 \
1404 DO_IF_NOT_WINCE( errno = 0; ) \
1405 \
1406 const wxStringCharType *start = wx_str(); \
1407 wxStringCharType *end; \
1408 *val = func(start, &end, base); \
1409 \
1410 /* return true only if scan was stopped by the terminating NUL and */ \
1411 /* if the string was not empty to start with and no under/overflow */ \
1412 /* occurred: */ \
1413 return !*end && (end != start) \
1414 DO_IF_NOT_WINCE( && (errno != ERANGE) )
1415
1416 bool wxString::ToLong(long *val, int base) const
1417 {
1418 WX_STRING_TO_INT_TYPE(val, base, wxStrtol);
1419 }
1420
1421 bool wxString::ToULong(unsigned long *val, int base) const
1422 {
1423 WX_STRING_TO_INT_TYPE(val, base, wxStrtoul);
1424 }
1425
1426 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1427 {
1428 WX_STRING_TO_INT_TYPE(val, base, wxStrtoll);
1429 }
1430
1431 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1432 {
1433 WX_STRING_TO_INT_TYPE(val, base, wxStrtoull);
1434 }
1435
1436 bool wxString::ToDouble(double *val) const
1437 {
1438 wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1439
1440 #ifndef __WXWINCE__
1441 errno = 0;
1442 #endif
1443
1444 const wxChar *start = c_str();
1445 wxChar *end;
1446 *val = wxStrtod(start, &end);
1447
1448 // return true only if scan was stopped by the terminating NUL and if the
1449 // string was not empty to start with and no under/overflow occurred
1450 return !*end && (end != start)
1451 #ifndef __WXWINCE__
1452 && (errno != ERANGE)
1453 #endif
1454 ;
1455 }
1456
1457 // ---------------------------------------------------------------------------
1458 // formatted output
1459 // ---------------------------------------------------------------------------
1460
1461 #if !wxUSE_UTF8_LOCALE_ONLY
1462 /* static */
1463 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1464 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1465 #else
1466 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1467 #endif
1468 {
1469 va_list argptr;
1470 va_start(argptr, format);
1471
1472 wxString s;
1473 s.PrintfV(format, argptr);
1474
1475 va_end(argptr);
1476
1477 return s;
1478 }
1479 #endif // !wxUSE_UTF8_LOCALE_ONLY
1480
1481 #if wxUSE_UNICODE_UTF8
1482 /* static */
1483 wxString wxString::DoFormatUtf8(const char *format, ...)
1484 {
1485 va_list argptr;
1486 va_start(argptr, format);
1487
1488 wxString s;
1489 s.PrintfV(format, argptr);
1490
1491 va_end(argptr);
1492
1493 return s;
1494 }
1495 #endif // wxUSE_UNICODE_UTF8
1496
1497 /* static */
1498 wxString wxString::FormatV(const wxString& format, va_list argptr)
1499 {
1500 wxString s;
1501 s.PrintfV(format, argptr);
1502 return s;
1503 }
1504
1505 #if !wxUSE_UTF8_LOCALE_ONLY
1506 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1507 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1508 #else
1509 int wxString::DoPrintfWchar(const wxChar *format, ...)
1510 #endif
1511 {
1512 va_list argptr;
1513 va_start(argptr, format);
1514
1515 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1516 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1517 // because it's the only cast that works safely for downcasting when
1518 // multiple inheritance is used:
1519 wxString *str = static_cast<wxString*>(this);
1520 #else
1521 wxString *str = this;
1522 #endif
1523
1524 int iLen = str->PrintfV(format, argptr);
1525
1526 va_end(argptr);
1527
1528 return iLen;
1529 }
1530 #endif // !wxUSE_UTF8_LOCALE_ONLY
1531
1532 #if wxUSE_UNICODE_UTF8
1533 int wxString::DoPrintfUtf8(const char *format, ...)
1534 {
1535 va_list argptr;
1536 va_start(argptr, format);
1537
1538 int iLen = PrintfV(format, argptr);
1539
1540 va_end(argptr);
1541
1542 return iLen;
1543 }
1544 #endif // wxUSE_UNICODE_UTF8
1545
1546 #if wxUSE_UNICODE_UTF8
1547 template<typename BufferType>
1548 #else
1549 // we only need one version in non-UTF8 builds and at least two Windows
1550 // compilers have problems with this function template, so use just one
1551 // normal function here
1552 #endif
1553 static int DoStringPrintfV(wxString& str,
1554 const wxString& format, va_list argptr)
1555 {
1556 int size = 1024;
1557
1558 for ( ;; )
1559 {
1560 #if wxUSE_UNICODE_UTF8
1561 BufferType tmp(str, size + 1);
1562 typename BufferType::CharType *buf = tmp;
1563 #else
1564 wxStringBuffer tmp(str, size + 1);
1565 wxChar *buf = tmp;
1566 #endif
1567
1568 if ( !buf )
1569 {
1570 // out of memory
1571
1572 // in UTF-8 build, leaving uninitialized junk in the buffer
1573 // could result in invalid non-empty UTF-8 string, so just
1574 // reset the string to empty on failure:
1575 buf[0] = '\0';
1576 return -1;
1577 }
1578
1579 // wxVsnprintf() may modify the original arg pointer, so pass it
1580 // only a copy
1581 va_list argptrcopy;
1582 wxVaCopy(argptrcopy, argptr);
1583 int len = wxVsnprintf(buf, size, format, argptrcopy);
1584 va_end(argptrcopy);
1585
1586 // some implementations of vsnprintf() don't NUL terminate
1587 // the string if there is not enough space for it so
1588 // always do it manually
1589 buf[size] = _T('\0');
1590
1591 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1592 // total number of characters which would have been written if the
1593 // buffer were large enough (newer standards such as Unix98)
1594 if ( len < 0 )
1595 {
1596 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1597 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1598 // is true if *both* of them use our own implementation,
1599 // otherwise we can't be sure
1600 #if wxUSE_WXVSNPRINTF
1601 // we know that our own implementation of wxVsnprintf() returns -1
1602 // only for a format error - thus there's something wrong with
1603 // the user's format string
1604 buf[0] = '\0';
1605 return -1;
1606 #else // possibly using system version
1607 // assume it only returns error if there is not enough space, but
1608 // as we don't know how much we need, double the current size of
1609 // the buffer
1610 size *= 2;
1611 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1612 }
1613 else if ( len >= size )
1614 {
1615 #if wxUSE_WXVSNPRINTF
1616 // we know that our own implementation of wxVsnprintf() returns
1617 // size+1 when there's not enough space but that's not the size
1618 // of the required buffer!
1619 size *= 2; // so we just double the current size of the buffer
1620 #else
1621 // some vsnprintf() implementations NUL-terminate the buffer and
1622 // some don't in len == size case, to be safe always add 1
1623 size = len + 1;
1624 #endif
1625 }
1626 else // ok, there was enough space
1627 {
1628 break;
1629 }
1630 }
1631
1632 // we could have overshot
1633 str.Shrink();
1634
1635 return str.length();
1636 }
1637
1638 int wxString::PrintfV(const wxString& format, va_list argptr)
1639 {
1640 #if wxUSE_UNICODE_UTF8
1641 #if wxUSE_STL_BASED_WXSTRING
1642 typedef wxStringTypeBuffer<char> Utf8Buffer;
1643 #else
1644 typedef wxStringInternalBuffer Utf8Buffer;
1645 #endif
1646 #endif
1647
1648 #if wxUSE_UTF8_LOCALE_ONLY
1649 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1650 #else
1651 #if wxUSE_UNICODE_UTF8
1652 if ( wxLocaleIsUtf8 )
1653 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1654 else
1655 // wxChar* version
1656 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1657 #else
1658 return DoStringPrintfV(*this, format, argptr);
1659 #endif // UTF8/WCHAR
1660 #endif
1661 }
1662
1663 // ----------------------------------------------------------------------------
1664 // misc other operations
1665 // ----------------------------------------------------------------------------
1666
1667 // returns true if the string matches the pattern which may contain '*' and
1668 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1669 // of them)
1670 bool wxString::Matches(const wxString& mask) const
1671 {
1672 // I disable this code as it doesn't seem to be faster (in fact, it seems
1673 // to be much slower) than the old, hand-written code below and using it
1674 // here requires always linking with libregex even if the user code doesn't
1675 // use it
1676 #if 0 // wxUSE_REGEX
1677 // first translate the shell-like mask into a regex
1678 wxString pattern;
1679 pattern.reserve(wxStrlen(pszMask));
1680
1681 pattern += _T('^');
1682 while ( *pszMask )
1683 {
1684 switch ( *pszMask )
1685 {
1686 case _T('?'):
1687 pattern += _T('.');
1688 break;
1689
1690 case _T('*'):
1691 pattern += _T(".*");
1692 break;
1693
1694 case _T('^'):
1695 case _T('.'):
1696 case _T('$'):
1697 case _T('('):
1698 case _T(')'):
1699 case _T('|'):
1700 case _T('+'):
1701 case _T('\\'):
1702 // these characters are special in a RE, quote them
1703 // (however note that we don't quote '[' and ']' to allow
1704 // using them for Unix shell like matching)
1705 pattern += _T('\\');
1706 // fall through
1707
1708 default:
1709 pattern += *pszMask;
1710 }
1711
1712 pszMask++;
1713 }
1714 pattern += _T('$');
1715
1716 // and now use it
1717 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1718 #else // !wxUSE_REGEX
1719 // TODO: this is, of course, awfully inefficient...
1720
1721 // FIXME-UTF8: implement using iterators, remove #if
1722 #if wxUSE_UNICODE_UTF8
1723 wxWCharBuffer maskBuf = mask.wc_str();
1724 wxWCharBuffer txtBuf = wc_str();
1725 const wxChar *pszMask = maskBuf.data();
1726 const wxChar *pszTxt = txtBuf.data();
1727 #else
1728 const wxChar *pszMask = mask.wx_str();
1729 // the char currently being checked
1730 const wxChar *pszTxt = wx_str();
1731 #endif
1732
1733 // the last location where '*' matched
1734 const wxChar *pszLastStarInText = NULL;
1735 const wxChar *pszLastStarInMask = NULL;
1736
1737 match:
1738 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1739 switch ( *pszMask ) {
1740 case wxT('?'):
1741 if ( *pszTxt == wxT('\0') )
1742 return false;
1743
1744 // pszTxt and pszMask will be incremented in the loop statement
1745
1746 break;
1747
1748 case wxT('*'):
1749 {
1750 // remember where we started to be able to backtrack later
1751 pszLastStarInText = pszTxt;
1752 pszLastStarInMask = pszMask;
1753
1754 // ignore special chars immediately following this one
1755 // (should this be an error?)
1756 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1757 pszMask++;
1758
1759 // if there is nothing more, match
1760 if ( *pszMask == wxT('\0') )
1761 return true;
1762
1763 // are there any other metacharacters in the mask?
1764 size_t uiLenMask;
1765 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1766
1767 if ( pEndMask != NULL ) {
1768 // we have to match the string between two metachars
1769 uiLenMask = pEndMask - pszMask;
1770 }
1771 else {
1772 // we have to match the remainder of the string
1773 uiLenMask = wxStrlen(pszMask);
1774 }
1775
1776 wxString strToMatch(pszMask, uiLenMask);
1777 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1778 if ( pMatch == NULL )
1779 return false;
1780
1781 // -1 to compensate "++" in the loop
1782 pszTxt = pMatch + uiLenMask - 1;
1783 pszMask += uiLenMask - 1;
1784 }
1785 break;
1786
1787 default:
1788 if ( *pszMask != *pszTxt )
1789 return false;
1790 break;
1791 }
1792 }
1793
1794 // match only if nothing left
1795 if ( *pszTxt == wxT('\0') )
1796 return true;
1797
1798 // if we failed to match, backtrack if we can
1799 if ( pszLastStarInText ) {
1800 pszTxt = pszLastStarInText + 1;
1801 pszMask = pszLastStarInMask;
1802
1803 pszLastStarInText = NULL;
1804
1805 // don't bother resetting pszLastStarInMask, it's unnecessary
1806
1807 goto match;
1808 }
1809
1810 return false;
1811 #endif // wxUSE_REGEX/!wxUSE_REGEX
1812 }
1813
1814 // Count the number of chars
1815 int wxString::Freq(wxUniChar ch) const
1816 {
1817 int count = 0;
1818 for ( const_iterator i = begin(); i != end(); ++i )
1819 {
1820 if ( *i == ch )
1821 count ++;
1822 }
1823 return count;
1824 }
1825
1826 // convert to upper case, return the copy of the string
1827 wxString wxString::Upper() const
1828 { wxString s(*this); return s.MakeUpper(); }
1829
1830 // convert to lower case, return the copy of the string
1831 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
1832
1833 // ----------------------------------------------------------------------------
1834 // wxUTF8StringBuffer
1835 // ----------------------------------------------------------------------------
1836
1837 #if wxUSE_UNICODE_WCHAR
1838 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1839 {
1840 wxMBConvStrictUTF8 conv;
1841 size_t wlen = conv.ToWChar(NULL, 0, m_buf);
1842 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1843
1844 wxStringInternalBuffer wbuf(m_str, wlen);
1845 conv.ToWChar(wbuf, wlen, m_buf);
1846 }
1847
1848 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
1849 {
1850 wxCHECK_RET(m_lenSet, "length not set");
1851
1852 wxMBConvStrictUTF8 conv;
1853 size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
1854 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1855
1856 wxStringInternalBufferLength wbuf(m_str, wlen);
1857 conv.ToWChar(wbuf, wlen, m_buf, m_len);
1858 wbuf.SetLength(wlen);
1859 }
1860 #endif // wxUSE_UNICODE_WCHAR