]> git.saurik.com Git - wxWidgets.git/blob - src/common/string.cpp
7b9f77b28b47918eed64cac21e89ffcf4e75ef1a
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
16
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #endif
28
29 #include <ctype.h>
30
31 #ifndef __WXWINCE__
32 #include <errno.h>
33 #endif
34
35 #include <string.h>
36 #include <stdlib.h>
37
38 #ifdef __SALFORDC__
39 #include <clib.h>
40 #endif
41
42 #include "wx/hashmap.h"
43
44 // string handling functions used by wxString:
45 #if wxUSE_UNICODE_UTF8
46 #define wxStringMemcpy memcpy
47 #define wxStringMemcmp memcmp
48 #define wxStringMemchr memchr
49 #define wxStringStrlen strlen
50 #else
51 #define wxStringMemcpy wxTmemcpy
52 #define wxStringMemcmp wxTmemcmp
53 #define wxStringMemchr wxTmemchr
54 #define wxStringStrlen wxStrlen
55 #endif
56
57
58 // ---------------------------------------------------------------------------
59 // static class variables definition
60 // ---------------------------------------------------------------------------
61
62 //According to STL _must_ be a -1 size_t
63 const size_t wxString::npos = (size_t) -1;
64
65 // ----------------------------------------------------------------------------
66 // global functions
67 // ----------------------------------------------------------------------------
68
69 #if wxUSE_STD_IOSTREAM
70
71 #include <iostream>
72
73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
74 {
75 // FIXME-UTF8: always, not only if wxUSE_UNICODE
76 #if wxUSE_UNICODE && !defined(__BORLANDC__)
77 return os << (const wchar_t*)str.AsWCharBuf();
78 #else
79 return os << (const char*)str.AsCharBuf();
80 #endif
81 }
82
83 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
84 {
85 return os << str.c_str();
86 }
87
88 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
89 {
90 return os << str.data();
91 }
92
93 #ifndef __BORLANDC__
94 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
95 {
96 return os << str.data();
97 }
98 #endif
99
100 #endif // wxUSE_STD_IOSTREAM
101
102 // ===========================================================================
103 // wxString class core
104 // ===========================================================================
105
106 #if wxUSE_UNICODE_UTF8
107
108 void wxString::PosLenToImpl(size_t pos, size_t len,
109 size_t *implPos, size_t *implLen) const
110 {
111 if ( pos == npos )
112 *implPos = npos;
113 else
114 {
115 const_iterator i = begin() + pos;
116 *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
117 if ( len == npos )
118 *implLen = npos;
119 else
120 {
121 // too large length is interpreted as "to the end of the string"
122 // FIXME-UTF8: verify this is the case in std::string, assert
123 // otherwise
124 if ( pos + len > length() )
125 len = length() - pos;
126
127 *implLen = (i + len).impl() - i.impl();
128 }
129 }
130 }
131
132 #endif // wxUSE_UNICODE_UTF8
133
134 // ----------------------------------------------------------------------------
135 // wxCStrData converted strings caching
136 // ----------------------------------------------------------------------------
137
138 // FIXME-UTF8: temporarily disabled because it doesn't work with global
139 // string objects; re-enable after fixing this bug and benchmarking
140 // performance to see if using a hash is a good idea at all
141 #if 0
142
143 // For backward compatibility reasons, it must be possible to assign the value
144 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
145 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
146 // because the memory would be freed immediately, but it has to be valid as long
147 // as the string is not modified, so that code like this still works:
148 //
149 // const wxChar *s = str.c_str();
150 // while ( s ) { ... }
151
152 // FIXME-UTF8: not thread safe!
153 // FIXME-UTF8: we currently clear the cached conversion only when the string is
154 // destroyed, but we should do it when the string is modified, to
155 // keep memory usage down
156 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
157 // invalidated the cache on every change, we could keep the previous
158 // conversion
159 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
160 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
161
162 template<typename T>
163 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
164 {
165 typename T::iterator i = hash.find(wxConstCast(s, wxString));
166 if ( i != hash.end() )
167 {
168 free(i->second);
169 hash.erase(i);
170 }
171 }
172
173 #if wxUSE_UNICODE
174 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
175 // so we have to use wxString* here and const-cast when used
176 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
177 wxStringCharConversionCache);
178 static wxStringCharConversionCache gs_stringsCharCache;
179
180 const char* wxCStrData::AsChar() const
181 {
182 // remove previously cache value, if any (see FIXMEs above):
183 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
184
185 // convert the string and keep it:
186 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
187 m_str->mb_str().release();
188
189 return s + m_offset;
190 }
191 #endif // wxUSE_UNICODE
192
193 #if !wxUSE_UNICODE_WCHAR
194 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
195 wxStringWCharConversionCache);
196 static wxStringWCharConversionCache gs_stringsWCharCache;
197
198 const wchar_t* wxCStrData::AsWChar() const
199 {
200 // remove previously cache value, if any (see FIXMEs above):
201 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
202
203 // convert the string and keep it:
204 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
205 m_str->wc_str().release();
206
207 return s + m_offset;
208 }
209 #endif // !wxUSE_UNICODE_WCHAR
210
211 wxString::~wxString()
212 {
213 #if wxUSE_UNICODE
214 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
215 DeleteStringFromConversionCache(gs_stringsCharCache, this);
216 #endif
217 #if !wxUSE_UNICODE_WCHAR
218 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
219 #endif
220 }
221 #endif
222
223 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
224 const char* wxCStrData::AsChar() const
225 {
226 #if wxUSE_UNICODE_UTF8
227 if ( wxLocaleIsUtf8 )
228 return AsInternal();
229 #endif
230 // under non-UTF8 locales, we have to convert the internal UTF-8
231 // representation using wxConvLibc and cache the result
232
233 wxString *str = wxConstCast(m_str, wxString);
234
235 // convert the string:
236 wxCharBuffer buf(str->mb_str());
237
238 // FIXME-UTF8: do the conversion in-place in the existing buffer
239 if ( str->m_convertedToChar &&
240 strlen(buf) == strlen(str->m_convertedToChar) )
241 {
242 // keep the same buffer for as long as possible, so that several calls
243 // to c_str() in a row still work:
244 strcpy(str->m_convertedToChar, buf);
245 }
246 else
247 {
248 str->m_convertedToChar = buf.release();
249 }
250
251 // and keep it:
252 return str->m_convertedToChar + m_offset;
253 }
254 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
255
256 #if !wxUSE_UNICODE_WCHAR
257 const wchar_t* wxCStrData::AsWChar() const
258 {
259 wxString *str = wxConstCast(m_str, wxString);
260
261 // convert the string:
262 wxWCharBuffer buf(str->wc_str());
263
264 // FIXME-UTF8: do the conversion in-place in the existing buffer
265 if ( str->m_convertedToWChar &&
266 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
267 {
268 // keep the same buffer for as long as possible, so that several calls
269 // to c_str() in a row still work:
270 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
271 }
272 else
273 {
274 str->m_convertedToWChar = buf.release();
275 }
276
277 // and keep it:
278 return str->m_convertedToWChar + m_offset;
279 }
280 #endif // !wxUSE_UNICODE_WCHAR
281
282 // ===========================================================================
283 // wxString class core
284 // ===========================================================================
285
286 // ---------------------------------------------------------------------------
287 // construction and conversion
288 // ---------------------------------------------------------------------------
289
290 #if wxUSE_UNICODE_WCHAR
291 /* static */
292 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
293 const wxMBConv& conv)
294 {
295 // anything to do?
296 if ( !psz || nLength == 0 )
297 return SubstrBufFromMB(L"", 0);
298
299 if ( nLength == npos )
300 nLength = wxNO_LEN;
301
302 size_t wcLen;
303 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
304 if ( !wcLen )
305 return SubstrBufFromMB(_T(""), 0);
306 else
307 return SubstrBufFromMB(wcBuf, wcLen);
308 }
309 #endif // wxUSE_UNICODE_WCHAR
310
311 #if wxUSE_UNICODE_UTF8
312 /* static */
313 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
314 const wxMBConv& conv)
315 {
316 // anything to do?
317 if ( !psz || nLength == 0 )
318 return SubstrBufFromMB("", 0);
319
320 // if psz is already in UTF-8, we don't have to do the roundtrip to
321 // wchar_t* and back:
322 if ( conv.IsUTF8() )
323 {
324 // we need to validate the input because UTF8 iterators assume valid
325 // UTF-8 sequence and psz may be invalid:
326 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
327 {
328 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
329 }
330 // else: do the roundtrip through wchar_t*
331 }
332
333 if ( nLength == npos )
334 nLength = wxNO_LEN;
335
336 // first convert to wide string:
337 size_t wcLen;
338 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
339 if ( !wcLen )
340 return SubstrBufFromMB("", 0);
341
342 // and then to UTF-8:
343 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvUTF8()));
344 // widechar -> UTF-8 conversion isn't supposed to ever fail:
345 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
346
347 return buf;
348 }
349 #endif // wxUSE_UNICODE_UTF8
350
351 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
352 /* static */
353 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
354 const wxMBConv& conv)
355 {
356 // anything to do?
357 if ( !pwz || nLength == 0 )
358 return SubstrBufFromWC("", 0);
359
360 if ( nLength == npos )
361 nLength = wxNO_LEN;
362
363 size_t mbLen;
364 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
365 if ( !mbLen )
366 return SubstrBufFromWC("", 0);
367 else
368 return SubstrBufFromWC(mbBuf, mbLen);
369 }
370 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
371
372
373 #if wxUSE_UNICODE_WCHAR
374
375 //Convert wxString in Unicode mode to a multi-byte string
376 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
377 {
378 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
379 }
380
381 #elif wxUSE_UNICODE_UTF8
382
383 const wxWCharBuffer wxString::wc_str() const
384 {
385 return wxMBConvUTF8().cMB2WC(m_impl.c_str(),
386 m_impl.length() + 1 /* size, not length */,
387 NULL);
388 }
389
390 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
391 {
392 if ( conv.IsUTF8() )
393 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
394
395 // FIXME-UTF8: use wc_str() here once we have buffers with length
396
397 size_t wcLen;
398 wxWCharBuffer wcBuf(
399 wxMBConvUTF8().cMB2WC(m_impl.c_str(),
400 m_impl.length() + 1 /* size, not length */,
401 &wcLen));
402 if ( !wcLen )
403 return wxCharBuffer("");
404
405 return conv.cWC2MB(wcBuf, wcLen, NULL);
406 }
407
408 #else // ANSI
409
410 //Converts this string to a wide character string if unicode
411 //mode is not enabled and wxUSE_WCHAR_T is enabled
412 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
413 {
414 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
415 }
416
417 #endif // Unicode/ANSI
418
419 // shrink to minimal size (releasing extra memory)
420 bool wxString::Shrink()
421 {
422 wxString tmp(begin(), end());
423 swap(tmp);
424 return tmp.length() == length();
425 }
426
427 // deprecated compatibility code:
428 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
429 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
430 {
431 return DoGetWriteBuf(nLen);
432 }
433
434 void wxString::UngetWriteBuf()
435 {
436 DoUngetWriteBuf();
437 }
438
439 void wxString::UngetWriteBuf(size_t nLen)
440 {
441 DoUngetWriteBuf(nLen);
442 }
443 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
444
445
446 // ---------------------------------------------------------------------------
447 // data access
448 // ---------------------------------------------------------------------------
449
450 // all functions are inline in string.h
451
452 // ---------------------------------------------------------------------------
453 // concatenation operators
454 // ---------------------------------------------------------------------------
455
456 /*
457 * concatenation functions come in 5 flavours:
458 * string + string
459 * char + string and string + char
460 * C str + string and string + C str
461 */
462
463 wxString operator+(const wxString& str1, const wxString& str2)
464 {
465 #if !wxUSE_STL_BASED_WXSTRING
466 wxASSERT( str1.IsValid() );
467 wxASSERT( str2.IsValid() );
468 #endif
469
470 wxString s = str1;
471 s += str2;
472
473 return s;
474 }
475
476 wxString operator+(const wxString& str, wxUniChar ch)
477 {
478 #if !wxUSE_STL_BASED_WXSTRING
479 wxASSERT( str.IsValid() );
480 #endif
481
482 wxString s = str;
483 s += ch;
484
485 return s;
486 }
487
488 wxString operator+(wxUniChar ch, const wxString& str)
489 {
490 #if !wxUSE_STL_BASED_WXSTRING
491 wxASSERT( str.IsValid() );
492 #endif
493
494 wxString s = ch;
495 s += str;
496
497 return s;
498 }
499
500 wxString operator+(const wxString& str, const char *psz)
501 {
502 #if !wxUSE_STL_BASED_WXSTRING
503 wxASSERT( str.IsValid() );
504 #endif
505
506 wxString s;
507 if ( !s.Alloc(strlen(psz) + str.length()) ) {
508 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
509 }
510 s += str;
511 s += psz;
512
513 return s;
514 }
515
516 wxString operator+(const wxString& str, const wchar_t *pwz)
517 {
518 #if !wxUSE_STL_BASED_WXSTRING
519 wxASSERT( str.IsValid() );
520 #endif
521
522 wxString s;
523 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
524 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
525 }
526 s += str;
527 s += pwz;
528
529 return s;
530 }
531
532 wxString operator+(const char *psz, const wxString& str)
533 {
534 #if !wxUSE_STL_BASED_WXSTRING
535 wxASSERT( str.IsValid() );
536 #endif
537
538 wxString s;
539 if ( !s.Alloc(strlen(psz) + str.length()) ) {
540 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
541 }
542 s = psz;
543 s += str;
544
545 return s;
546 }
547
548 wxString operator+(const wchar_t *pwz, const wxString& str)
549 {
550 #if !wxUSE_STL_BASED_WXSTRING
551 wxASSERT( str.IsValid() );
552 #endif
553
554 wxString s;
555 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
556 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
557 }
558 s = pwz;
559 s += str;
560
561 return s;
562 }
563
564 // ---------------------------------------------------------------------------
565 // string comparison
566 // ---------------------------------------------------------------------------
567
568 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
569 {
570 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
571 : wxToupper(GetChar(0u)) == wxToupper(c));
572 }
573
574 #ifdef HAVE_STD_STRING_COMPARE
575
576 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
577 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
578 // sort strings in characters code point order by sorting the byte sequence
579 // in byte values order (i.e. what strcmp() and memcmp() do).
580
581 int wxString::compare(const wxString& str) const
582 {
583 return m_impl.compare(str.m_impl);
584 }
585
586 int wxString::compare(size_t nStart, size_t nLen,
587 const wxString& str) const
588 {
589 size_t pos, len;
590 PosLenToImpl(nStart, nLen, &pos, &len);
591 return m_impl.compare(pos, len, str.m_impl);
592 }
593
594 int wxString::compare(size_t nStart, size_t nLen,
595 const wxString& str,
596 size_t nStart2, size_t nLen2) const
597 {
598 size_t pos, len;
599 PosLenToImpl(nStart, nLen, &pos, &len);
600
601 size_t pos2, len2;
602 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
603
604 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
605 }
606
607 int wxString::compare(const char* sz) const
608 {
609 return m_impl.compare(ImplStr(sz));
610 }
611
612 int wxString::compare(const wchar_t* sz) const
613 {
614 return m_impl.compare(ImplStr(sz));
615 }
616
617 int wxString::compare(size_t nStart, size_t nLen,
618 const char* sz, size_t nCount) const
619 {
620 size_t pos, len;
621 PosLenToImpl(nStart, nLen, &pos, &len);
622
623 SubstrBufFromMB str(ImplStr(sz, nCount));
624
625 return m_impl.compare(pos, len, str.data, str.len);
626 }
627
628 int wxString::compare(size_t nStart, size_t nLen,
629 const wchar_t* sz, size_t nCount) const
630 {
631 size_t pos, len;
632 PosLenToImpl(nStart, nLen, &pos, &len);
633
634 SubstrBufFromWC str(ImplStr(sz, nCount));
635
636 return m_impl.compare(pos, len, str.data, str.len);
637 }
638
639 #else // !HAVE_STD_STRING_COMPARE
640
641 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
642 const wxStringCharType* s2, size_t l2)
643 {
644 if( l1 == l2 )
645 return wxStringMemcmp(s1, s2, l1);
646 else if( l1 < l2 )
647 {
648 int ret = wxStringMemcmp(s1, s2, l1);
649 return ret == 0 ? -1 : ret;
650 }
651 else
652 {
653 int ret = wxStringMemcmp(s1, s2, l2);
654 return ret == 0 ? +1 : ret;
655 }
656 }
657
658 int wxString::compare(const wxString& str) const
659 {
660 return ::wxDoCmp(m_impl.data(), m_impl.length(),
661 str.m_impl.data(), str.m_impl.length());
662 }
663
664 int wxString::compare(size_t nStart, size_t nLen,
665 const wxString& str) const
666 {
667 wxASSERT(nStart <= length());
668 size_type strLen = length() - nStart;
669 nLen = strLen < nLen ? strLen : nLen;
670
671 size_t pos, len;
672 PosLenToImpl(nStart, nLen, &pos, &len);
673
674 return ::wxDoCmp(m_impl.data() + pos, len,
675 str.m_impl.data(), str.m_impl.length());
676 }
677
678 int wxString::compare(size_t nStart, size_t nLen,
679 const wxString& str,
680 size_t nStart2, size_t nLen2) const
681 {
682 wxASSERT(nStart <= length());
683 wxASSERT(nStart2 <= str.length());
684 size_type strLen = length() - nStart,
685 strLen2 = str.length() - nStart2;
686 nLen = strLen < nLen ? strLen : nLen;
687 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
688
689 size_t pos, len;
690 PosLenToImpl(nStart, nLen, &pos, &len);
691 size_t pos2, len2;
692 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
693
694 return ::wxDoCmp(m_impl.data() + pos, len,
695 str.m_impl.data() + pos2, len2);
696 }
697
698 int wxString::compare(const char* sz) const
699 {
700 SubstrBufFromMB str(ImplStr(sz, npos));
701 if ( str.len == npos )
702 str.len = wxStringStrlen(str.data);
703 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
704 }
705
706 int wxString::compare(const wchar_t* sz) const
707 {
708 SubstrBufFromWC str(ImplStr(sz, npos));
709 if ( str.len == npos )
710 str.len = wxStringStrlen(str.data);
711 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
712 }
713
714 int wxString::compare(size_t nStart, size_t nLen,
715 const char* sz, size_t nCount) const
716 {
717 wxASSERT(nStart <= length());
718 size_type strLen = length() - nStart;
719 nLen = strLen < nLen ? strLen : nLen;
720
721 size_t pos, len;
722 PosLenToImpl(nStart, nLen, &pos, &len);
723
724 SubstrBufFromMB str(ImplStr(sz, nCount));
725 if ( str.len == npos )
726 str.len = wxStringStrlen(str.data);
727
728 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
729 }
730
731 int wxString::compare(size_t nStart, size_t nLen,
732 const wchar_t* sz, size_t nCount) const
733 {
734 wxASSERT(nStart <= length());
735 size_type strLen = length() - nStart;
736 nLen = strLen < nLen ? strLen : nLen;
737
738 size_t pos, len;
739 PosLenToImpl(nStart, nLen, &pos, &len);
740
741 SubstrBufFromWC str(ImplStr(sz, nCount));
742 if ( str.len == npos )
743 str.len = wxStringStrlen(str.data);
744
745 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
746 }
747
748 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
749
750
751 // ---------------------------------------------------------------------------
752 // find_{first,last}_[not]_of functions
753 // ---------------------------------------------------------------------------
754
755 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
756
757 // NB: All these functions are implemented with the argument being wxChar*,
758 // i.e. widechar string in any Unicode build, even though native string
759 // representation is char* in the UTF-8 build. This is because we couldn't
760 // use memchr() to determine if a character is in a set encoded as UTF-8.
761
762 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
763 {
764 return find_first_of(sz, nStart, wxStrlen(sz));
765 }
766
767 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
768 {
769 return find_first_not_of(sz, nStart, wxStrlen(sz));
770 }
771
772 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
773 {
774 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
775
776 size_t idx = nStart;
777 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
778 {
779 if ( wxTmemchr(sz, *i, n) )
780 return idx;
781 }
782
783 return npos;
784 }
785
786 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
787 {
788 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
789
790 size_t idx = nStart;
791 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
792 {
793 if ( !wxTmemchr(sz, *i, n) )
794 return idx;
795 }
796
797 return npos;
798 }
799
800
801 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
802 {
803 return find_last_of(sz, nStart, wxStrlen(sz));
804 }
805
806 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
807 {
808 return find_last_not_of(sz, nStart, wxStrlen(sz));
809 }
810
811 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
812 {
813 size_t len = length();
814
815 if ( nStart == npos )
816 {
817 nStart = len - 1;
818 }
819 else
820 {
821 wxASSERT_MSG( nStart <= len, _T("invalid index") );
822 }
823
824 size_t idx = nStart;
825 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
826 i != rend(); --idx, ++i )
827 {
828 if ( wxTmemchr(sz, *i, n) )
829 return idx;
830 }
831
832 return npos;
833 }
834
835 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
836 {
837 size_t len = length();
838
839 if ( nStart == npos )
840 {
841 nStart = len - 1;
842 }
843 else
844 {
845 wxASSERT_MSG( nStart <= len, _T("invalid index") );
846 }
847
848 size_t idx = nStart;
849 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
850 i != rend(); --idx, ++i )
851 {
852 if ( !wxTmemchr(sz, *i, n) )
853 return idx;
854 }
855
856 return npos;
857 }
858
859 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
860 {
861 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
862
863 size_t idx = nStart;
864 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
865 {
866 if ( *i != ch )
867 return idx;
868 }
869
870 return npos;
871 }
872
873 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
874 {
875 size_t len = length();
876
877 if ( nStart == npos )
878 {
879 nStart = len - 1;
880 }
881 else
882 {
883 wxASSERT_MSG( nStart <= len, _T("invalid index") );
884 }
885
886 size_t idx = nStart;
887 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
888 i != rend(); --idx, ++i )
889 {
890 if ( *i != ch )
891 return idx;
892 }
893
894 return npos;
895 }
896
897 // the functions above were implemented for wchar_t* arguments in Unicode
898 // build and char* in ANSI build; below are implementations for the other
899 // version:
900 #if wxUSE_UNICODE
901 #define wxOtherCharType char
902 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
903 #else
904 #define wxOtherCharType wchar_t
905 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
906 #endif
907
908 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
909 { return find_first_of(STRCONV(sz), nStart); }
910
911 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
912 size_t n) const
913 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
914 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
915 { return find_last_of(STRCONV(sz), nStart); }
916 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
917 size_t n) const
918 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
919 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
920 { return find_first_not_of(STRCONV(sz), nStart); }
921 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
922 size_t n) const
923 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
924 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
925 { return find_last_not_of(STRCONV(sz), nStart); }
926 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
927 size_t n) const
928 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
929
930 #undef wxOtherCharType
931 #undef STRCONV
932
933 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
934
935 // ===========================================================================
936 // other common string functions
937 // ===========================================================================
938
939 int wxString::CmpNoCase(const wxString& s) const
940 {
941 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
942
943 size_t idx = 0;
944 const_iterator i1 = begin();
945 const_iterator end1 = end();
946 const_iterator i2 = s.begin();
947 const_iterator end2 = s.end();
948
949 for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
950 {
951 wxUniChar lower1 = (wxChar)wxTolower(*i1);
952 wxUniChar lower2 = (wxChar)wxTolower(*i2);
953 if ( lower1 != lower2 )
954 return lower1 < lower2 ? -1 : 1;
955 }
956
957 size_t len1 = length();
958 size_t len2 = s.length();
959
960 if ( len1 < len2 )
961 return -1;
962 else if ( len1 > len2 )
963 return 1;
964 return 0;
965 }
966
967
968 #if wxUSE_UNICODE
969
970 #ifdef __MWERKS__
971 #ifndef __SCHAR_MAX__
972 #define __SCHAR_MAX__ 127
973 #endif
974 #endif
975
976 wxString wxString::FromAscii(const char *ascii, size_t len)
977 {
978 if (!ascii || len == 0)
979 return wxEmptyString;
980
981 wxString res;
982
983 wxImplStringBuffer buf(res, len);
984 wxStringCharType *dest = buf;
985
986 for ( ;; )
987 {
988 unsigned char c = (unsigned char)*ascii++;
989 wxASSERT_MSG( c < 0x80,
990 _T("Non-ASCII value passed to FromAscii().") );
991
992 *dest++ = (wchar_t)c;
993
994 if ( c == '\0' )
995 break;
996 }
997
998 return res;
999 }
1000
1001 wxString wxString::FromAscii(const char *ascii)
1002 {
1003 return FromAscii(ascii, strlen(ascii));
1004 }
1005
1006 wxString wxString::FromAscii(const char ascii)
1007 {
1008 // What do we do with '\0' ?
1009
1010 unsigned char c = (unsigned char)ascii;
1011
1012 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1013
1014 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1015 return wxString(wxUniChar((wchar_t)c));
1016 }
1017
1018 const wxCharBuffer wxString::ToAscii() const
1019 {
1020 // this will allocate enough space for the terminating NUL too
1021 wxCharBuffer buffer(length());
1022 char *dest = buffer.data();
1023
1024 for ( const_iterator i = begin(); i != end(); ++i )
1025 {
1026 wxUniChar c(*i);
1027 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1028 *dest++ = c.IsAscii() ? (char)c : '_';
1029
1030 // the output string can't have embedded NULs anyhow, so we can safely
1031 // stop at first of them even if we do have any
1032 if ( !c )
1033 break;
1034 }
1035
1036 return buffer;
1037 }
1038
1039 #endif // wxUSE_UNICODE
1040
1041 // extract string of length nCount starting at nFirst
1042 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1043 {
1044 size_t nLen = length();
1045
1046 // default value of nCount is npos and means "till the end"
1047 if ( nCount == npos )
1048 {
1049 nCount = nLen - nFirst;
1050 }
1051
1052 // out-of-bounds requests return sensible things
1053 if ( nFirst + nCount > nLen )
1054 {
1055 nCount = nLen - nFirst;
1056 }
1057
1058 if ( nFirst > nLen )
1059 {
1060 // AllocCopy() will return empty string
1061 return wxEmptyString;
1062 }
1063
1064 wxString dest(*this, nFirst, nCount);
1065 if ( dest.length() != nCount )
1066 {
1067 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1068 }
1069
1070 return dest;
1071 }
1072
1073 // check that the string starts with prefix and return the rest of the string
1074 // in the provided pointer if it is not NULL, otherwise return false
1075 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1076 {
1077 if ( compare(0, prefix.length(), prefix) != 0 )
1078 return false;
1079
1080 if ( rest )
1081 {
1082 // put the rest of the string into provided pointer
1083 rest->assign(*this, prefix.length(), npos);
1084 }
1085
1086 return true;
1087 }
1088
1089
1090 // check that the string ends with suffix and return the rest of it in the
1091 // provided pointer if it is not NULL, otherwise return false
1092 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1093 {
1094 int start = length() - suffix.length();
1095
1096 if ( start < 0 || compare(start, npos, suffix) != 0 )
1097 return false;
1098
1099 if ( rest )
1100 {
1101 // put the rest of the string into provided pointer
1102 rest->assign(*this, 0, start);
1103 }
1104
1105 return true;
1106 }
1107
1108
1109 // extract nCount last (rightmost) characters
1110 wxString wxString::Right(size_t nCount) const
1111 {
1112 if ( nCount > length() )
1113 nCount = length();
1114
1115 wxString dest(*this, length() - nCount, nCount);
1116 if ( dest.length() != nCount ) {
1117 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1118 }
1119 return dest;
1120 }
1121
1122 // get all characters after the last occurence of ch
1123 // (returns the whole string if ch not found)
1124 wxString wxString::AfterLast(wxUniChar ch) const
1125 {
1126 wxString str;
1127 int iPos = Find(ch, true);
1128 if ( iPos == wxNOT_FOUND )
1129 str = *this;
1130 else
1131 str = wx_str() + iPos + 1;
1132
1133 return str;
1134 }
1135
1136 // extract nCount first (leftmost) characters
1137 wxString wxString::Left(size_t nCount) const
1138 {
1139 if ( nCount > length() )
1140 nCount = length();
1141
1142 wxString dest(*this, 0, nCount);
1143 if ( dest.length() != nCount ) {
1144 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1145 }
1146 return dest;
1147 }
1148
1149 // get all characters before the first occurence of ch
1150 // (returns the whole string if ch not found)
1151 wxString wxString::BeforeFirst(wxUniChar ch) const
1152 {
1153 int iPos = Find(ch);
1154 if ( iPos == wxNOT_FOUND ) iPos = length();
1155 return wxString(*this, 0, iPos);
1156 }
1157
1158 /// get all characters before the last occurence of ch
1159 /// (returns empty string if ch not found)
1160 wxString wxString::BeforeLast(wxUniChar ch) const
1161 {
1162 wxString str;
1163 int iPos = Find(ch, true);
1164 if ( iPos != wxNOT_FOUND && iPos != 0 )
1165 str = wxString(c_str(), iPos);
1166
1167 return str;
1168 }
1169
1170 /// get all characters after the first occurence of ch
1171 /// (returns empty string if ch not found)
1172 wxString wxString::AfterFirst(wxUniChar ch) const
1173 {
1174 wxString str;
1175 int iPos = Find(ch);
1176 if ( iPos != wxNOT_FOUND )
1177 str = wx_str() + iPos + 1;
1178
1179 return str;
1180 }
1181
1182 // replace first (or all) occurences of some substring with another one
1183 size_t wxString::Replace(const wxString& strOld,
1184 const wxString& strNew, bool bReplaceAll)
1185 {
1186 // if we tried to replace an empty string we'd enter an infinite loop below
1187 wxCHECK_MSG( !strOld.empty(), 0,
1188 _T("wxString::Replace(): invalid parameter") );
1189
1190 size_t uiCount = 0; // count of replacements made
1191
1192 size_t uiOldLen = strOld.length();
1193 size_t uiNewLen = strNew.length();
1194
1195 size_t dwPos = 0;
1196
1197 while ( (*this)[dwPos] != wxT('\0') )
1198 {
1199 //DO NOT USE STRSTR HERE
1200 //this string can contain embedded null characters,
1201 //so strstr will function incorrectly
1202 dwPos = find(strOld, dwPos);
1203 if ( dwPos == npos )
1204 break; // exit the loop
1205 else
1206 {
1207 //replace this occurance of the old string with the new one
1208 replace(dwPos, uiOldLen, strNew, uiNewLen);
1209
1210 //move up pos past the string that was replaced
1211 dwPos += uiNewLen;
1212
1213 //increase replace count
1214 ++uiCount;
1215
1216 // stop now?
1217 if ( !bReplaceAll )
1218 break; // exit the loop
1219 }
1220 }
1221
1222 return uiCount;
1223 }
1224
1225 bool wxString::IsAscii() const
1226 {
1227 for ( const_iterator i = begin(); i != end(); ++i )
1228 {
1229 if ( !(*i).IsAscii() )
1230 return false;
1231 }
1232
1233 return true;
1234 }
1235
1236 bool wxString::IsWord() const
1237 {
1238 for ( const_iterator i = begin(); i != end(); ++i )
1239 {
1240 if ( !wxIsalpha(*i) )
1241 return false;
1242 }
1243
1244 return true;
1245 }
1246
1247 bool wxString::IsNumber() const
1248 {
1249 if ( empty() )
1250 return true;
1251
1252 const_iterator i = begin();
1253
1254 if ( *i == _T('-') || *i == _T('+') )
1255 ++i;
1256
1257 for ( ; i != end(); ++i )
1258 {
1259 if ( !wxIsdigit(*i) )
1260 return false;
1261 }
1262
1263 return true;
1264 }
1265
1266 wxString wxString::Strip(stripType w) const
1267 {
1268 wxString s = *this;
1269 if ( w & leading ) s.Trim(false);
1270 if ( w & trailing ) s.Trim(true);
1271 return s;
1272 }
1273
1274 // ---------------------------------------------------------------------------
1275 // case conversion
1276 // ---------------------------------------------------------------------------
1277
1278 wxString& wxString::MakeUpper()
1279 {
1280 for ( iterator it = begin(), en = end(); it != en; ++it )
1281 *it = (wxChar)wxToupper(*it);
1282
1283 return *this;
1284 }
1285
1286 wxString& wxString::MakeLower()
1287 {
1288 for ( iterator it = begin(), en = end(); it != en; ++it )
1289 *it = (wxChar)wxTolower(*it);
1290
1291 return *this;
1292 }
1293
1294 // ---------------------------------------------------------------------------
1295 // trimming and padding
1296 // ---------------------------------------------------------------------------
1297
1298 // some compilers (VC++ 6.0 not to name them) return true for a call to
1299 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1300 // live with this by checking that the character is a 7 bit one - even if this
1301 // may fail to detect some spaces (I don't know if Unicode doesn't have
1302 // space-like symbols somewhere except in the first 128 chars), it is arguably
1303 // still better than trimming away accented letters
1304 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1305
1306 // trims spaces (in the sense of isspace) from left or right side
1307 wxString& wxString::Trim(bool bFromRight)
1308 {
1309 // first check if we're going to modify the string at all
1310 if ( !empty() &&
1311 (
1312 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1313 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1314 )
1315 )
1316 {
1317 if ( bFromRight )
1318 {
1319 // find last non-space character
1320 reverse_iterator psz = rbegin();
1321 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1322 psz++;
1323
1324 // truncate at trailing space start
1325 erase(psz.base(), end());
1326 }
1327 else
1328 {
1329 // find first non-space character
1330 iterator psz = begin();
1331 while ( (psz != end()) && wxSafeIsspace(*psz) )
1332 psz++;
1333
1334 // fix up data and length
1335 erase(begin(), psz);
1336 }
1337 }
1338
1339 return *this;
1340 }
1341
1342 // adds nCount characters chPad to the string from either side
1343 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1344 {
1345 wxString s(chPad, nCount);
1346
1347 if ( bFromRight )
1348 *this += s;
1349 else
1350 {
1351 s += *this;
1352 swap(s);
1353 }
1354
1355 return *this;
1356 }
1357
1358 // truncate the string
1359 wxString& wxString::Truncate(size_t uiLen)
1360 {
1361 if ( uiLen < length() )
1362 {
1363 erase(begin() + uiLen, end());
1364 }
1365 //else: nothing to do, string is already short enough
1366
1367 return *this;
1368 }
1369
1370 // ---------------------------------------------------------------------------
1371 // finding (return wxNOT_FOUND if not found and index otherwise)
1372 // ---------------------------------------------------------------------------
1373
1374 // find a character
1375 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1376 {
1377 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1378
1379 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1380 }
1381
1382 // ----------------------------------------------------------------------------
1383 // conversion to numbers
1384 // ----------------------------------------------------------------------------
1385
1386 // The implementation of all the functions below is exactly the same so factor
1387 // it out. Note that number extraction works correctly on UTF-8 strings, so
1388 // we can use wxStringCharType and wx_str() for maximum efficiency.
1389
1390 template <typename T>
1391 bool wxStringToIntType(const wxStringCharType *start,
1392 T *val,
1393 int base,
1394 T (*func)(const wxStringCharType*, wxStringCharType**, int))
1395 {
1396 wxCHECK_MSG( val, false, _T("NULL output pointer") );
1397 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
1398
1399 #ifndef __WXWINCE__
1400 errno = 0;
1401 #endif
1402
1403 wxStringCharType *end;
1404 *val = (*func)(start, &end, base);
1405
1406 // return true only if scan was stopped by the terminating NUL and if the
1407 // string was not empty to start with and no under/overflow occurred
1408 return !*end && (end != start)
1409 #ifndef __WXWINCE__
1410 && (errno != ERANGE)
1411 #endif
1412 ;
1413 }
1414
1415 bool wxString::ToLong(long *val, int base) const
1416 {
1417 return wxStringToIntType(wx_str(), val, base, wxStrtol);
1418 }
1419
1420 bool wxString::ToULong(unsigned long *val, int base) const
1421 {
1422 return wxStringToIntType(wx_str(), val, base, wxStrtoul);
1423 }
1424
1425 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1426 {
1427 return wxStringToIntType(wx_str(), val, base, wxStrtoll);
1428 }
1429
1430 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1431 {
1432 return wxStringToIntType(wx_str(), val, base, wxStrtoull);
1433 }
1434
1435 bool wxString::ToDouble(double *val) const
1436 {
1437 wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1438
1439 #ifndef __WXWINCE__
1440 errno = 0;
1441 #endif
1442
1443 const wxChar *start = c_str();
1444 wxChar *end;
1445 *val = wxStrtod(start, &end);
1446
1447 // return true only if scan was stopped by the terminating NUL and if the
1448 // string was not empty to start with and no under/overflow occurred
1449 return !*end && (end != start)
1450 #ifndef __WXWINCE__
1451 && (errno != ERANGE)
1452 #endif
1453 ;
1454 }
1455
1456 // ---------------------------------------------------------------------------
1457 // formatted output
1458 // ---------------------------------------------------------------------------
1459
1460 #if !wxUSE_UTF8_LOCALE_ONLY
1461 /* static */
1462 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1463 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1464 #else
1465 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1466 #endif
1467 {
1468 va_list argptr;
1469 va_start(argptr, format);
1470
1471 wxString s;
1472 s.PrintfV(format, argptr);
1473
1474 va_end(argptr);
1475
1476 return s;
1477 }
1478 #endif // !wxUSE_UTF8_LOCALE_ONLY
1479
1480 #if wxUSE_UNICODE_UTF8
1481 /* static */
1482 wxString wxString::DoFormatUtf8(const char *format, ...)
1483 {
1484 va_list argptr;
1485 va_start(argptr, format);
1486
1487 wxString s;
1488 s.PrintfV(format, argptr);
1489
1490 va_end(argptr);
1491
1492 return s;
1493 }
1494 #endif // wxUSE_UNICODE_UTF8
1495
1496 /* static */
1497 wxString wxString::FormatV(const wxString& format, va_list argptr)
1498 {
1499 wxString s;
1500 s.PrintfV(format, argptr);
1501 return s;
1502 }
1503
1504 #if !wxUSE_UTF8_LOCALE_ONLY
1505 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1506 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1507 #else
1508 int wxString::DoPrintfWchar(const wxChar *format, ...)
1509 #endif
1510 {
1511 va_list argptr;
1512 va_start(argptr, format);
1513
1514 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1515 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1516 // because it's the only cast that works safely for downcasting when
1517 // multiple inheritance is used:
1518 wxString *str = static_cast<wxString*>(this);
1519 #else
1520 wxString *str = this;
1521 #endif
1522
1523 int iLen = str->PrintfV(format, argptr);
1524
1525 va_end(argptr);
1526
1527 return iLen;
1528 }
1529 #endif // !wxUSE_UTF8_LOCALE_ONLY
1530
1531 #if wxUSE_UNICODE_UTF8
1532 int wxString::DoPrintfUtf8(const char *format, ...)
1533 {
1534 va_list argptr;
1535 va_start(argptr, format);
1536
1537 int iLen = PrintfV(format, argptr);
1538
1539 va_end(argptr);
1540
1541 return iLen;
1542 }
1543 #endif // wxUSE_UNICODE_UTF8
1544
1545 #if wxUSE_UNICODE_UTF8
1546 template<typename BufferType>
1547 #else
1548 // we only need one version in non-UTF8 builds and at least two Windows
1549 // compilers have problems with this function template, so use just one
1550 // normal function here
1551 #endif
1552 static int DoStringPrintfV(wxString& str,
1553 const wxString& format, va_list argptr)
1554 {
1555 int size = 1024;
1556
1557 for ( ;; )
1558 {
1559 #if wxUSE_UNICODE_UTF8
1560 BufferType tmp(str, size + 1);
1561 typename BufferType::CharType *buf = tmp;
1562 #else
1563 wxStringBuffer tmp(str, size + 1);
1564 wxChar *buf = tmp;
1565 #endif
1566
1567 if ( !buf )
1568 {
1569 // out of memory
1570
1571 // in UTF-8 build, leaving uninitialized junk in the buffer
1572 // could result in invalid non-empty UTF-8 string, so just
1573 // reset the string to empty on failure:
1574 buf[0] = '\0';
1575 return -1;
1576 }
1577
1578 // wxVsnprintf() may modify the original arg pointer, so pass it
1579 // only a copy
1580 va_list argptrcopy;
1581 wxVaCopy(argptrcopy, argptr);
1582 int len = wxVsnprintf(buf, size, format, argptrcopy);
1583 va_end(argptrcopy);
1584
1585 // some implementations of vsnprintf() don't NUL terminate
1586 // the string if there is not enough space for it so
1587 // always do it manually
1588 buf[size] = _T('\0');
1589
1590 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1591 // total number of characters which would have been written if the
1592 // buffer were large enough (newer standards such as Unix98)
1593 if ( len < 0 )
1594 {
1595 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1596 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1597 // is true if *both* of them use our own implementation,
1598 // otherwise we can't be sure
1599 #if wxUSE_WXVSNPRINTF
1600 // we know that our own implementation of wxVsnprintf() returns -1
1601 // only for a format error - thus there's something wrong with
1602 // the user's format string
1603 buf[0] = '\0';
1604 return -1;
1605 #else // possibly using system version
1606 // assume it only returns error if there is not enough space, but
1607 // as we don't know how much we need, double the current size of
1608 // the buffer
1609 size *= 2;
1610 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1611 }
1612 else if ( len >= size )
1613 {
1614 #if wxUSE_WXVSNPRINTF
1615 // we know that our own implementation of wxVsnprintf() returns
1616 // size+1 when there's not enough space but that's not the size
1617 // of the required buffer!
1618 size *= 2; // so we just double the current size of the buffer
1619 #else
1620 // some vsnprintf() implementations NUL-terminate the buffer and
1621 // some don't in len == size case, to be safe always add 1
1622 size = len + 1;
1623 #endif
1624 }
1625 else // ok, there was enough space
1626 {
1627 break;
1628 }
1629 }
1630
1631 // we could have overshot
1632 str.Shrink();
1633
1634 return str.length();
1635 }
1636
1637 int wxString::PrintfV(const wxString& format, va_list argptr)
1638 {
1639 #if wxUSE_UNICODE_UTF8
1640 #if wxUSE_STL_BASED_WXSTRING
1641 typedef wxStringTypeBuffer<char> Utf8Buffer;
1642 #else
1643 typedef wxImplStringBuffer Utf8Buffer;
1644 #endif
1645 #endif
1646
1647 #if wxUSE_UTF8_LOCALE_ONLY
1648 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1649 #else
1650 #if wxUSE_UNICODE_UTF8
1651 if ( wxLocaleIsUtf8 )
1652 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1653 else
1654 // wxChar* version
1655 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1656 #else
1657 return DoStringPrintfV(*this, format, argptr);
1658 #endif // UTF8/WCHAR
1659 #endif
1660 }
1661
1662 // ----------------------------------------------------------------------------
1663 // misc other operations
1664 // ----------------------------------------------------------------------------
1665
1666 // returns true if the string matches the pattern which may contain '*' and
1667 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1668 // of them)
1669 bool wxString::Matches(const wxString& mask) const
1670 {
1671 // I disable this code as it doesn't seem to be faster (in fact, it seems
1672 // to be much slower) than the old, hand-written code below and using it
1673 // here requires always linking with libregex even if the user code doesn't
1674 // use it
1675 #if 0 // wxUSE_REGEX
1676 // first translate the shell-like mask into a regex
1677 wxString pattern;
1678 pattern.reserve(wxStrlen(pszMask));
1679
1680 pattern += _T('^');
1681 while ( *pszMask )
1682 {
1683 switch ( *pszMask )
1684 {
1685 case _T('?'):
1686 pattern += _T('.');
1687 break;
1688
1689 case _T('*'):
1690 pattern += _T(".*");
1691 break;
1692
1693 case _T('^'):
1694 case _T('.'):
1695 case _T('$'):
1696 case _T('('):
1697 case _T(')'):
1698 case _T('|'):
1699 case _T('+'):
1700 case _T('\\'):
1701 // these characters are special in a RE, quote them
1702 // (however note that we don't quote '[' and ']' to allow
1703 // using them for Unix shell like matching)
1704 pattern += _T('\\');
1705 // fall through
1706
1707 default:
1708 pattern += *pszMask;
1709 }
1710
1711 pszMask++;
1712 }
1713 pattern += _T('$');
1714
1715 // and now use it
1716 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1717 #else // !wxUSE_REGEX
1718 // TODO: this is, of course, awfully inefficient...
1719
1720 // FIXME-UTF8: implement using iterators, remove #if
1721 #if wxUSE_UNICODE_UTF8
1722 wxWCharBuffer maskBuf = mask.wc_str();
1723 wxWCharBuffer txtBuf = wc_str();
1724 const wxChar *pszMask = maskBuf.data();
1725 const wxChar *pszTxt = txtBuf.data();
1726 #else
1727 const wxChar *pszMask = mask.wx_str();
1728 // the char currently being checked
1729 const wxChar *pszTxt = wx_str();
1730 #endif
1731
1732 // the last location where '*' matched
1733 const wxChar *pszLastStarInText = NULL;
1734 const wxChar *pszLastStarInMask = NULL;
1735
1736 match:
1737 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1738 switch ( *pszMask ) {
1739 case wxT('?'):
1740 if ( *pszTxt == wxT('\0') )
1741 return false;
1742
1743 // pszTxt and pszMask will be incremented in the loop statement
1744
1745 break;
1746
1747 case wxT('*'):
1748 {
1749 // remember where we started to be able to backtrack later
1750 pszLastStarInText = pszTxt;
1751 pszLastStarInMask = pszMask;
1752
1753 // ignore special chars immediately following this one
1754 // (should this be an error?)
1755 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1756 pszMask++;
1757
1758 // if there is nothing more, match
1759 if ( *pszMask == wxT('\0') )
1760 return true;
1761
1762 // are there any other metacharacters in the mask?
1763 size_t uiLenMask;
1764 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1765
1766 if ( pEndMask != NULL ) {
1767 // we have to match the string between two metachars
1768 uiLenMask = pEndMask - pszMask;
1769 }
1770 else {
1771 // we have to match the remainder of the string
1772 uiLenMask = wxStrlen(pszMask);
1773 }
1774
1775 wxString strToMatch(pszMask, uiLenMask);
1776 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1777 if ( pMatch == NULL )
1778 return false;
1779
1780 // -1 to compensate "++" in the loop
1781 pszTxt = pMatch + uiLenMask - 1;
1782 pszMask += uiLenMask - 1;
1783 }
1784 break;
1785
1786 default:
1787 if ( *pszMask != *pszTxt )
1788 return false;
1789 break;
1790 }
1791 }
1792
1793 // match only if nothing left
1794 if ( *pszTxt == wxT('\0') )
1795 return true;
1796
1797 // if we failed to match, backtrack if we can
1798 if ( pszLastStarInText ) {
1799 pszTxt = pszLastStarInText + 1;
1800 pszMask = pszLastStarInMask;
1801
1802 pszLastStarInText = NULL;
1803
1804 // don't bother resetting pszLastStarInMask, it's unnecessary
1805
1806 goto match;
1807 }
1808
1809 return false;
1810 #endif // wxUSE_REGEX/!wxUSE_REGEX
1811 }
1812
1813 // Count the number of chars
1814 int wxString::Freq(wxUniChar ch) const
1815 {
1816 int count = 0;
1817 for ( const_iterator i = begin(); i != end(); ++i )
1818 {
1819 if ( *i == ch )
1820 count ++;
1821 }
1822 return count;
1823 }
1824
1825 // convert to upper case, return the copy of the string
1826 wxString wxString::Upper() const
1827 { wxString s(*this); return s.MakeUpper(); }
1828
1829 // convert to lower case, return the copy of the string
1830 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }