]> git.saurik.com Git - wxWidgets.git/blob - src/common/string.cpp
return empty string, not NULL, from wxCStrData::AsChar() if conversion to ANSI fails...
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
16
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #endif
28
29 #include <ctype.h>
30
31 #ifndef __WXWINCE__
32 #include <errno.h>
33 #endif
34
35 #include <string.h>
36 #include <stdlib.h>
37
38 #ifdef __SALFORDC__
39 #include <clib.h>
40 #endif
41
42 #include "wx/hashmap.h"
43
44 // string handling functions used by wxString:
45 #if wxUSE_UNICODE_UTF8
46 #define wxStringMemcpy memcpy
47 #define wxStringMemcmp memcmp
48 #define wxStringMemchr memchr
49 #define wxStringStrlen strlen
50 #else
51 #define wxStringMemcpy wxTmemcpy
52 #define wxStringMemcmp wxTmemcmp
53 #define wxStringMemchr wxTmemchr
54 #define wxStringStrlen wxStrlen
55 #endif
56
57
58 // ---------------------------------------------------------------------------
59 // static class variables definition
60 // ---------------------------------------------------------------------------
61
62 //According to STL _must_ be a -1 size_t
63 const size_t wxString::npos = (size_t) -1;
64
65 // ----------------------------------------------------------------------------
66 // global functions
67 // ----------------------------------------------------------------------------
68
69 #if wxUSE_STD_IOSTREAM
70
71 #include <iostream>
72
73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
74 {
75 // FIXME-UTF8: always, not only if wxUSE_UNICODE
76 #if wxUSE_UNICODE && !defined(__BORLANDC__)
77 return os << (const wchar_t*)str.AsWCharBuf();
78 #else
79 return os << (const char*)str.AsCharBuf();
80 #endif
81 }
82
83 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
84 {
85 return os << str.c_str();
86 }
87
88 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
89 {
90 return os << str.data();
91 }
92
93 #ifndef __BORLANDC__
94 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
95 {
96 return os << str.data();
97 }
98 #endif
99
100 #endif // wxUSE_STD_IOSTREAM
101
102 // ===========================================================================
103 // wxString class core
104 // ===========================================================================
105
106 #if wxUSE_UNICODE_UTF8
107
108 void wxString::PosLenToImpl(size_t pos, size_t len,
109 size_t *implPos, size_t *implLen) const
110 {
111 if ( pos == npos )
112 *implPos = npos;
113 else
114 {
115 const_iterator i = begin() + pos;
116 *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
117 if ( len == npos )
118 *implLen = npos;
119 else
120 {
121 // too large length is interpreted as "to the end of the string"
122 // FIXME-UTF8: verify this is the case in std::string, assert
123 // otherwise
124 if ( pos + len > length() )
125 len = length() - pos;
126
127 *implLen = (i + len).impl() - i.impl();
128 }
129 }
130 }
131
132 #endif // wxUSE_UNICODE_UTF8
133
134 // ----------------------------------------------------------------------------
135 // wxCStrData converted strings caching
136 // ----------------------------------------------------------------------------
137
138 // FIXME-UTF8: temporarily disabled because it doesn't work with global
139 // string objects; re-enable after fixing this bug and benchmarking
140 // performance to see if using a hash is a good idea at all
141 #if 0
142
143 // For backward compatibility reasons, it must be possible to assign the value
144 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
145 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
146 // because the memory would be freed immediately, but it has to be valid as long
147 // as the string is not modified, so that code like this still works:
148 //
149 // const wxChar *s = str.c_str();
150 // while ( s ) { ... }
151
152 // FIXME-UTF8: not thread safe!
153 // FIXME-UTF8: we currently clear the cached conversion only when the string is
154 // destroyed, but we should do it when the string is modified, to
155 // keep memory usage down
156 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
157 // invalidated the cache on every change, we could keep the previous
158 // conversion
159 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
160 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
161
162 template<typename T>
163 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
164 {
165 typename T::iterator i = hash.find(wxConstCast(s, wxString));
166 if ( i != hash.end() )
167 {
168 free(i->second);
169 hash.erase(i);
170 }
171 }
172
173 #if wxUSE_UNICODE
174 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
175 // so we have to use wxString* here and const-cast when used
176 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
177 wxStringCharConversionCache);
178 static wxStringCharConversionCache gs_stringsCharCache;
179
180 const char* wxCStrData::AsChar() const
181 {
182 // remove previously cache value, if any (see FIXMEs above):
183 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
184
185 // convert the string and keep it:
186 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
187 m_str->mb_str().release();
188
189 return s + m_offset;
190 }
191 #endif // wxUSE_UNICODE
192
193 #if !wxUSE_UNICODE_WCHAR
194 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
195 wxStringWCharConversionCache);
196 static wxStringWCharConversionCache gs_stringsWCharCache;
197
198 const wchar_t* wxCStrData::AsWChar() const
199 {
200 // remove previously cache value, if any (see FIXMEs above):
201 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
202
203 // convert the string and keep it:
204 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
205 m_str->wc_str().release();
206
207 return s + m_offset;
208 }
209 #endif // !wxUSE_UNICODE_WCHAR
210
211 wxString::~wxString()
212 {
213 #if wxUSE_UNICODE
214 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
215 DeleteStringFromConversionCache(gs_stringsCharCache, this);
216 #endif
217 #if !wxUSE_UNICODE_WCHAR
218 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
219 #endif
220 }
221 #endif
222
223 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
224 const char* wxCStrData::AsChar() const
225 {
226 #if wxUSE_UNICODE_UTF8
227 if ( wxLocaleIsUtf8 )
228 return AsInternal();
229 #endif
230 // under non-UTF8 locales, we have to convert the internal UTF-8
231 // representation using wxConvLibc and cache the result
232
233 wxString *str = wxConstCast(m_str, wxString);
234
235 // convert the string:
236 wxCharBuffer buf(str->mb_str());
237
238 // if it failed, return empty string and not NULL to avoid crashes in code
239 // written with either wxWidgets 2 wxString or std::string behaviour in
240 // mind: neither of them ever returns NULL and so we shouldn't neither
241 if ( !buf )
242 return "";
243
244 // FIXME-UTF8: do the conversion in-place in the existing buffer
245 if ( str->m_convertedToChar &&
246 strlen(buf) == strlen(str->m_convertedToChar) )
247 {
248 // keep the same buffer for as long as possible, so that several calls
249 // to c_str() in a row still work:
250 strcpy(str->m_convertedToChar, buf);
251 }
252 else
253 {
254 str->m_convertedToChar = buf.release();
255 }
256
257 // and keep it:
258 return str->m_convertedToChar + m_offset;
259 }
260 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
261
262 #if !wxUSE_UNICODE_WCHAR
263 const wchar_t* wxCStrData::AsWChar() const
264 {
265 wxString *str = wxConstCast(m_str, wxString);
266
267 // convert the string:
268 wxWCharBuffer buf(str->wc_str());
269
270 // notice that here, unlike above in AsChar(), conversion can't fail as our
271 // internal UTF-8 is always well-formed -- or the string was corrupted and
272 // all bets are off anyhow
273
274 // FIXME-UTF8: do the conversion in-place in the existing buffer
275 if ( str->m_convertedToWChar &&
276 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
277 {
278 // keep the same buffer for as long as possible, so that several calls
279 // to c_str() in a row still work:
280 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
281 }
282 else
283 {
284 str->m_convertedToWChar = buf.release();
285 }
286
287 // and keep it:
288 return str->m_convertedToWChar + m_offset;
289 }
290 #endif // !wxUSE_UNICODE_WCHAR
291
292 // ===========================================================================
293 // wxString class core
294 // ===========================================================================
295
296 // ---------------------------------------------------------------------------
297 // construction and conversion
298 // ---------------------------------------------------------------------------
299
300 #if wxUSE_UNICODE_WCHAR
301 /* static */
302 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
303 const wxMBConv& conv)
304 {
305 // anything to do?
306 if ( !psz || nLength == 0 )
307 return SubstrBufFromMB(L"", 0);
308
309 if ( nLength == npos )
310 nLength = wxNO_LEN;
311
312 size_t wcLen;
313 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
314 if ( !wcLen )
315 return SubstrBufFromMB(_T(""), 0);
316 else
317 return SubstrBufFromMB(wcBuf, wcLen);
318 }
319 #endif // wxUSE_UNICODE_WCHAR
320
321 #if wxUSE_UNICODE_UTF8
322 /* static */
323 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
324 const wxMBConv& conv)
325 {
326 // anything to do?
327 if ( !psz || nLength == 0 )
328 return SubstrBufFromMB("", 0);
329
330 // if psz is already in UTF-8, we don't have to do the roundtrip to
331 // wchar_t* and back:
332 if ( conv.IsUTF8() )
333 {
334 // we need to validate the input because UTF8 iterators assume valid
335 // UTF-8 sequence and psz may be invalid:
336 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
337 {
338 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
339 }
340 // else: do the roundtrip through wchar_t*
341 }
342
343 if ( nLength == npos )
344 nLength = wxNO_LEN;
345
346 // first convert to wide string:
347 size_t wcLen;
348 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
349 if ( !wcLen )
350 return SubstrBufFromMB("", 0);
351
352 // and then to UTF-8:
353 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
354 // widechar -> UTF-8 conversion isn't supposed to ever fail:
355 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
356
357 return buf;
358 }
359 #endif // wxUSE_UNICODE_UTF8
360
361 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
362 /* static */
363 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
364 const wxMBConv& conv)
365 {
366 // anything to do?
367 if ( !pwz || nLength == 0 )
368 return SubstrBufFromWC("", 0);
369
370 if ( nLength == npos )
371 nLength = wxNO_LEN;
372
373 size_t mbLen;
374 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
375 if ( !mbLen )
376 return SubstrBufFromWC("", 0);
377 else
378 return SubstrBufFromWC(mbBuf, mbLen);
379 }
380 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
381
382
383 #if wxUSE_UNICODE_WCHAR
384
385 //Convert wxString in Unicode mode to a multi-byte string
386 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
387 {
388 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
389 }
390
391 #elif wxUSE_UNICODE_UTF8
392
393 const wxWCharBuffer wxString::wc_str() const
394 {
395 return wxMBConvStrictUTF8().cMB2WC
396 (
397 m_impl.c_str(),
398 m_impl.length() + 1, // size, not length
399 NULL
400 );
401 }
402
403 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
404 {
405 if ( conv.IsUTF8() )
406 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
407
408 // FIXME-UTF8: use wc_str() here once we have buffers with length
409
410 size_t wcLen;
411 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
412 (
413 m_impl.c_str(),
414 m_impl.length() + 1, // size
415 &wcLen
416 ));
417 if ( !wcLen )
418 return wxCharBuffer("");
419
420 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
421 }
422
423 #else // ANSI
424
425 //Converts this string to a wide character string if unicode
426 //mode is not enabled and wxUSE_WCHAR_T is enabled
427 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
428 {
429 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
430 }
431
432 #endif // Unicode/ANSI
433
434 // shrink to minimal size (releasing extra memory)
435 bool wxString::Shrink()
436 {
437 wxString tmp(begin(), end());
438 swap(tmp);
439 return tmp.length() == length();
440 }
441
442 // deprecated compatibility code:
443 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
444 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
445 {
446 return DoGetWriteBuf(nLen);
447 }
448
449 void wxString::UngetWriteBuf()
450 {
451 DoUngetWriteBuf();
452 }
453
454 void wxString::UngetWriteBuf(size_t nLen)
455 {
456 DoUngetWriteBuf(nLen);
457 }
458 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
459
460
461 // ---------------------------------------------------------------------------
462 // data access
463 // ---------------------------------------------------------------------------
464
465 // all functions are inline in string.h
466
467 // ---------------------------------------------------------------------------
468 // concatenation operators
469 // ---------------------------------------------------------------------------
470
471 /*
472 * concatenation functions come in 5 flavours:
473 * string + string
474 * char + string and string + char
475 * C str + string and string + C str
476 */
477
478 wxString operator+(const wxString& str1, const wxString& str2)
479 {
480 #if !wxUSE_STL_BASED_WXSTRING
481 wxASSERT( str1.IsValid() );
482 wxASSERT( str2.IsValid() );
483 #endif
484
485 wxString s = str1;
486 s += str2;
487
488 return s;
489 }
490
491 wxString operator+(const wxString& str, wxUniChar ch)
492 {
493 #if !wxUSE_STL_BASED_WXSTRING
494 wxASSERT( str.IsValid() );
495 #endif
496
497 wxString s = str;
498 s += ch;
499
500 return s;
501 }
502
503 wxString operator+(wxUniChar ch, const wxString& str)
504 {
505 #if !wxUSE_STL_BASED_WXSTRING
506 wxASSERT( str.IsValid() );
507 #endif
508
509 wxString s = ch;
510 s += str;
511
512 return s;
513 }
514
515 wxString operator+(const wxString& str, const char *psz)
516 {
517 #if !wxUSE_STL_BASED_WXSTRING
518 wxASSERT( str.IsValid() );
519 #endif
520
521 wxString s;
522 if ( !s.Alloc(strlen(psz) + str.length()) ) {
523 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
524 }
525 s += str;
526 s += psz;
527
528 return s;
529 }
530
531 wxString operator+(const wxString& str, const wchar_t *pwz)
532 {
533 #if !wxUSE_STL_BASED_WXSTRING
534 wxASSERT( str.IsValid() );
535 #endif
536
537 wxString s;
538 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
539 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
540 }
541 s += str;
542 s += pwz;
543
544 return s;
545 }
546
547 wxString operator+(const char *psz, const wxString& str)
548 {
549 #if !wxUSE_STL_BASED_WXSTRING
550 wxASSERT( str.IsValid() );
551 #endif
552
553 wxString s;
554 if ( !s.Alloc(strlen(psz) + str.length()) ) {
555 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
556 }
557 s = psz;
558 s += str;
559
560 return s;
561 }
562
563 wxString operator+(const wchar_t *pwz, const wxString& str)
564 {
565 #if !wxUSE_STL_BASED_WXSTRING
566 wxASSERT( str.IsValid() );
567 #endif
568
569 wxString s;
570 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
571 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
572 }
573 s = pwz;
574 s += str;
575
576 return s;
577 }
578
579 // ---------------------------------------------------------------------------
580 // string comparison
581 // ---------------------------------------------------------------------------
582
583 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
584 {
585 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
586 : wxToupper(GetChar(0u)) == wxToupper(c));
587 }
588
589 #ifdef HAVE_STD_STRING_COMPARE
590
591 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
592 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
593 // sort strings in characters code point order by sorting the byte sequence
594 // in byte values order (i.e. what strcmp() and memcmp() do).
595
596 int wxString::compare(const wxString& str) const
597 {
598 return m_impl.compare(str.m_impl);
599 }
600
601 int wxString::compare(size_t nStart, size_t nLen,
602 const wxString& str) const
603 {
604 size_t pos, len;
605 PosLenToImpl(nStart, nLen, &pos, &len);
606 return m_impl.compare(pos, len, str.m_impl);
607 }
608
609 int wxString::compare(size_t nStart, size_t nLen,
610 const wxString& str,
611 size_t nStart2, size_t nLen2) const
612 {
613 size_t pos, len;
614 PosLenToImpl(nStart, nLen, &pos, &len);
615
616 size_t pos2, len2;
617 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
618
619 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
620 }
621
622 int wxString::compare(const char* sz) const
623 {
624 return m_impl.compare(ImplStr(sz));
625 }
626
627 int wxString::compare(const wchar_t* sz) const
628 {
629 return m_impl.compare(ImplStr(sz));
630 }
631
632 int wxString::compare(size_t nStart, size_t nLen,
633 const char* sz, size_t nCount) const
634 {
635 size_t pos, len;
636 PosLenToImpl(nStart, nLen, &pos, &len);
637
638 SubstrBufFromMB str(ImplStr(sz, nCount));
639
640 return m_impl.compare(pos, len, str.data, str.len);
641 }
642
643 int wxString::compare(size_t nStart, size_t nLen,
644 const wchar_t* sz, size_t nCount) const
645 {
646 size_t pos, len;
647 PosLenToImpl(nStart, nLen, &pos, &len);
648
649 SubstrBufFromWC str(ImplStr(sz, nCount));
650
651 return m_impl.compare(pos, len, str.data, str.len);
652 }
653
654 #else // !HAVE_STD_STRING_COMPARE
655
656 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
657 const wxStringCharType* s2, size_t l2)
658 {
659 if( l1 == l2 )
660 return wxStringMemcmp(s1, s2, l1);
661 else if( l1 < l2 )
662 {
663 int ret = wxStringMemcmp(s1, s2, l1);
664 return ret == 0 ? -1 : ret;
665 }
666 else
667 {
668 int ret = wxStringMemcmp(s1, s2, l2);
669 return ret == 0 ? +1 : ret;
670 }
671 }
672
673 int wxString::compare(const wxString& str) const
674 {
675 return ::wxDoCmp(m_impl.data(), m_impl.length(),
676 str.m_impl.data(), str.m_impl.length());
677 }
678
679 int wxString::compare(size_t nStart, size_t nLen,
680 const wxString& str) const
681 {
682 wxASSERT(nStart <= length());
683 size_type strLen = length() - nStart;
684 nLen = strLen < nLen ? strLen : nLen;
685
686 size_t pos, len;
687 PosLenToImpl(nStart, nLen, &pos, &len);
688
689 return ::wxDoCmp(m_impl.data() + pos, len,
690 str.m_impl.data(), str.m_impl.length());
691 }
692
693 int wxString::compare(size_t nStart, size_t nLen,
694 const wxString& str,
695 size_t nStart2, size_t nLen2) const
696 {
697 wxASSERT(nStart <= length());
698 wxASSERT(nStart2 <= str.length());
699 size_type strLen = length() - nStart,
700 strLen2 = str.length() - nStart2;
701 nLen = strLen < nLen ? strLen : nLen;
702 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
703
704 size_t pos, len;
705 PosLenToImpl(nStart, nLen, &pos, &len);
706 size_t pos2, len2;
707 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
708
709 return ::wxDoCmp(m_impl.data() + pos, len,
710 str.m_impl.data() + pos2, len2);
711 }
712
713 int wxString::compare(const char* sz) const
714 {
715 SubstrBufFromMB str(ImplStr(sz, npos));
716 if ( str.len == npos )
717 str.len = wxStringStrlen(str.data);
718 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
719 }
720
721 int wxString::compare(const wchar_t* sz) const
722 {
723 SubstrBufFromWC str(ImplStr(sz, npos));
724 if ( str.len == npos )
725 str.len = wxStringStrlen(str.data);
726 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
727 }
728
729 int wxString::compare(size_t nStart, size_t nLen,
730 const char* sz, size_t nCount) const
731 {
732 wxASSERT(nStart <= length());
733 size_type strLen = length() - nStart;
734 nLen = strLen < nLen ? strLen : nLen;
735
736 size_t pos, len;
737 PosLenToImpl(nStart, nLen, &pos, &len);
738
739 SubstrBufFromMB str(ImplStr(sz, nCount));
740 if ( str.len == npos )
741 str.len = wxStringStrlen(str.data);
742
743 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
744 }
745
746 int wxString::compare(size_t nStart, size_t nLen,
747 const wchar_t* sz, size_t nCount) const
748 {
749 wxASSERT(nStart <= length());
750 size_type strLen = length() - nStart;
751 nLen = strLen < nLen ? strLen : nLen;
752
753 size_t pos, len;
754 PosLenToImpl(nStart, nLen, &pos, &len);
755
756 SubstrBufFromWC str(ImplStr(sz, nCount));
757 if ( str.len == npos )
758 str.len = wxStringStrlen(str.data);
759
760 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
761 }
762
763 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
764
765
766 // ---------------------------------------------------------------------------
767 // find_{first,last}_[not]_of functions
768 // ---------------------------------------------------------------------------
769
770 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
771
772 // NB: All these functions are implemented with the argument being wxChar*,
773 // i.e. widechar string in any Unicode build, even though native string
774 // representation is char* in the UTF-8 build. This is because we couldn't
775 // use memchr() to determine if a character is in a set encoded as UTF-8.
776
777 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
778 {
779 return find_first_of(sz, nStart, wxStrlen(sz));
780 }
781
782 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
783 {
784 return find_first_not_of(sz, nStart, wxStrlen(sz));
785 }
786
787 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
788 {
789 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
790
791 size_t idx = nStart;
792 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
793 {
794 if ( wxTmemchr(sz, *i, n) )
795 return idx;
796 }
797
798 return npos;
799 }
800
801 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
802 {
803 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
804
805 size_t idx = nStart;
806 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
807 {
808 if ( !wxTmemchr(sz, *i, n) )
809 return idx;
810 }
811
812 return npos;
813 }
814
815
816 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
817 {
818 return find_last_of(sz, nStart, wxStrlen(sz));
819 }
820
821 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
822 {
823 return find_last_not_of(sz, nStart, wxStrlen(sz));
824 }
825
826 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
827 {
828 size_t len = length();
829
830 if ( nStart == npos )
831 {
832 nStart = len - 1;
833 }
834 else
835 {
836 wxASSERT_MSG( nStart <= len, _T("invalid index") );
837 }
838
839 size_t idx = nStart;
840 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
841 i != rend(); --idx, ++i )
842 {
843 if ( wxTmemchr(sz, *i, n) )
844 return idx;
845 }
846
847 return npos;
848 }
849
850 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
851 {
852 size_t len = length();
853
854 if ( nStart == npos )
855 {
856 nStart = len - 1;
857 }
858 else
859 {
860 wxASSERT_MSG( nStart <= len, _T("invalid index") );
861 }
862
863 size_t idx = nStart;
864 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
865 i != rend(); --idx, ++i )
866 {
867 if ( !wxTmemchr(sz, *i, n) )
868 return idx;
869 }
870
871 return npos;
872 }
873
874 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
875 {
876 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
877
878 size_t idx = nStart;
879 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
880 {
881 if ( *i != ch )
882 return idx;
883 }
884
885 return npos;
886 }
887
888 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
889 {
890 size_t len = length();
891
892 if ( nStart == npos )
893 {
894 nStart = len - 1;
895 }
896 else
897 {
898 wxASSERT_MSG( nStart <= len, _T("invalid index") );
899 }
900
901 size_t idx = nStart;
902 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
903 i != rend(); --idx, ++i )
904 {
905 if ( *i != ch )
906 return idx;
907 }
908
909 return npos;
910 }
911
912 // the functions above were implemented for wchar_t* arguments in Unicode
913 // build and char* in ANSI build; below are implementations for the other
914 // version:
915 #if wxUSE_UNICODE
916 #define wxOtherCharType char
917 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
918 #else
919 #define wxOtherCharType wchar_t
920 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
921 #endif
922
923 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
924 { return find_first_of(STRCONV(sz), nStart); }
925
926 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
927 size_t n) const
928 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
929 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
930 { return find_last_of(STRCONV(sz), nStart); }
931 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
932 size_t n) const
933 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
934 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
935 { return find_first_not_of(STRCONV(sz), nStart); }
936 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
937 size_t n) const
938 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
939 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
940 { return find_last_not_of(STRCONV(sz), nStart); }
941 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
942 size_t n) const
943 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
944
945 #undef wxOtherCharType
946 #undef STRCONV
947
948 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
949
950 // ===========================================================================
951 // other common string functions
952 // ===========================================================================
953
954 int wxString::CmpNoCase(const wxString& s) const
955 {
956 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
957
958 size_t idx = 0;
959 const_iterator i1 = begin();
960 const_iterator end1 = end();
961 const_iterator i2 = s.begin();
962 const_iterator end2 = s.end();
963
964 for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
965 {
966 wxUniChar lower1 = (wxChar)wxTolower(*i1);
967 wxUniChar lower2 = (wxChar)wxTolower(*i2);
968 if ( lower1 != lower2 )
969 return lower1 < lower2 ? -1 : 1;
970 }
971
972 size_t len1 = length();
973 size_t len2 = s.length();
974
975 if ( len1 < len2 )
976 return -1;
977 else if ( len1 > len2 )
978 return 1;
979 return 0;
980 }
981
982
983 #if wxUSE_UNICODE
984
985 #ifdef __MWERKS__
986 #ifndef __SCHAR_MAX__
987 #define __SCHAR_MAX__ 127
988 #endif
989 #endif
990
991 wxString wxString::FromAscii(const char *ascii, size_t len)
992 {
993 if (!ascii || len == 0)
994 return wxEmptyString;
995
996 wxString res;
997
998 {
999 wxStringInternalBuffer buf(res, len);
1000 wxStringCharType *dest = buf;
1001
1002 for ( ; len > 0; --len )
1003 {
1004 unsigned char c = (unsigned char)*ascii++;
1005 wxASSERT_MSG( c < 0x80,
1006 _T("Non-ASCII value passed to FromAscii().") );
1007
1008 *dest++ = (wchar_t)c;
1009 }
1010 }
1011
1012 return res;
1013 }
1014
1015 wxString wxString::FromAscii(const char *ascii)
1016 {
1017 return FromAscii(ascii, wxStrlen(ascii));
1018 }
1019
1020 wxString wxString::FromAscii(char ascii)
1021 {
1022 // What do we do with '\0' ?
1023
1024 unsigned char c = (unsigned char)ascii;
1025
1026 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1027
1028 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1029 return wxString(wxUniChar((wchar_t)c));
1030 }
1031
1032 const wxCharBuffer wxString::ToAscii() const
1033 {
1034 // this will allocate enough space for the terminating NUL too
1035 wxCharBuffer buffer(length());
1036 char *dest = buffer.data();
1037
1038 for ( const_iterator i = begin(); i != end(); ++i )
1039 {
1040 wxUniChar c(*i);
1041 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1042 *dest++ = c.IsAscii() ? (char)c : '_';
1043
1044 // the output string can't have embedded NULs anyhow, so we can safely
1045 // stop at first of them even if we do have any
1046 if ( !c )
1047 break;
1048 }
1049
1050 return buffer;
1051 }
1052
1053 #endif // wxUSE_UNICODE
1054
1055 // extract string of length nCount starting at nFirst
1056 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1057 {
1058 size_t nLen = length();
1059
1060 // default value of nCount is npos and means "till the end"
1061 if ( nCount == npos )
1062 {
1063 nCount = nLen - nFirst;
1064 }
1065
1066 // out-of-bounds requests return sensible things
1067 if ( nFirst + nCount > nLen )
1068 {
1069 nCount = nLen - nFirst;
1070 }
1071
1072 if ( nFirst > nLen )
1073 {
1074 // AllocCopy() will return empty string
1075 return wxEmptyString;
1076 }
1077
1078 wxString dest(*this, nFirst, nCount);
1079 if ( dest.length() != nCount )
1080 {
1081 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1082 }
1083
1084 return dest;
1085 }
1086
1087 // check that the string starts with prefix and return the rest of the string
1088 // in the provided pointer if it is not NULL, otherwise return false
1089 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1090 {
1091 if ( compare(0, prefix.length(), prefix) != 0 )
1092 return false;
1093
1094 if ( rest )
1095 {
1096 // put the rest of the string into provided pointer
1097 rest->assign(*this, prefix.length(), npos);
1098 }
1099
1100 return true;
1101 }
1102
1103
1104 // check that the string ends with suffix and return the rest of it in the
1105 // provided pointer if it is not NULL, otherwise return false
1106 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1107 {
1108 int start = length() - suffix.length();
1109
1110 if ( start < 0 || compare(start, npos, suffix) != 0 )
1111 return false;
1112
1113 if ( rest )
1114 {
1115 // put the rest of the string into provided pointer
1116 rest->assign(*this, 0, start);
1117 }
1118
1119 return true;
1120 }
1121
1122
1123 // extract nCount last (rightmost) characters
1124 wxString wxString::Right(size_t nCount) const
1125 {
1126 if ( nCount > length() )
1127 nCount = length();
1128
1129 wxString dest(*this, length() - nCount, nCount);
1130 if ( dest.length() != nCount ) {
1131 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1132 }
1133 return dest;
1134 }
1135
1136 // get all characters after the last occurence of ch
1137 // (returns the whole string if ch not found)
1138 wxString wxString::AfterLast(wxUniChar ch) const
1139 {
1140 wxString str;
1141 int iPos = Find(ch, true);
1142 if ( iPos == wxNOT_FOUND )
1143 str = *this;
1144 else
1145 str = wx_str() + iPos + 1;
1146
1147 return str;
1148 }
1149
1150 // extract nCount first (leftmost) characters
1151 wxString wxString::Left(size_t nCount) const
1152 {
1153 if ( nCount > length() )
1154 nCount = length();
1155
1156 wxString dest(*this, 0, nCount);
1157 if ( dest.length() != nCount ) {
1158 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1159 }
1160 return dest;
1161 }
1162
1163 // get all characters before the first occurence of ch
1164 // (returns the whole string if ch not found)
1165 wxString wxString::BeforeFirst(wxUniChar ch) const
1166 {
1167 int iPos = Find(ch);
1168 if ( iPos == wxNOT_FOUND ) iPos = length();
1169 return wxString(*this, 0, iPos);
1170 }
1171
1172 /// get all characters before the last occurence of ch
1173 /// (returns empty string if ch not found)
1174 wxString wxString::BeforeLast(wxUniChar ch) const
1175 {
1176 wxString str;
1177 int iPos = Find(ch, true);
1178 if ( iPos != wxNOT_FOUND && iPos != 0 )
1179 str = wxString(c_str(), iPos);
1180
1181 return str;
1182 }
1183
1184 /// get all characters after the first occurence of ch
1185 /// (returns empty string if ch not found)
1186 wxString wxString::AfterFirst(wxUniChar ch) const
1187 {
1188 wxString str;
1189 int iPos = Find(ch);
1190 if ( iPos != wxNOT_FOUND )
1191 str = wx_str() + iPos + 1;
1192
1193 return str;
1194 }
1195
1196 // replace first (or all) occurences of some substring with another one
1197 size_t wxString::Replace(const wxString& strOld,
1198 const wxString& strNew, bool bReplaceAll)
1199 {
1200 // if we tried to replace an empty string we'd enter an infinite loop below
1201 wxCHECK_MSG( !strOld.empty(), 0,
1202 _T("wxString::Replace(): invalid parameter") );
1203
1204 size_t uiCount = 0; // count of replacements made
1205
1206 size_t uiOldLen = strOld.length();
1207 size_t uiNewLen = strNew.length();
1208
1209 size_t dwPos = 0;
1210
1211 while ( (*this)[dwPos] != wxT('\0') )
1212 {
1213 //DO NOT USE STRSTR HERE
1214 //this string can contain embedded null characters,
1215 //so strstr will function incorrectly
1216 dwPos = find(strOld, dwPos);
1217 if ( dwPos == npos )
1218 break; // exit the loop
1219 else
1220 {
1221 //replace this occurance of the old string with the new one
1222 replace(dwPos, uiOldLen, strNew, uiNewLen);
1223
1224 //move up pos past the string that was replaced
1225 dwPos += uiNewLen;
1226
1227 //increase replace count
1228 ++uiCount;
1229
1230 // stop now?
1231 if ( !bReplaceAll )
1232 break; // exit the loop
1233 }
1234 }
1235
1236 return uiCount;
1237 }
1238
1239 bool wxString::IsAscii() const
1240 {
1241 for ( const_iterator i = begin(); i != end(); ++i )
1242 {
1243 if ( !(*i).IsAscii() )
1244 return false;
1245 }
1246
1247 return true;
1248 }
1249
1250 bool wxString::IsWord() const
1251 {
1252 for ( const_iterator i = begin(); i != end(); ++i )
1253 {
1254 if ( !wxIsalpha(*i) )
1255 return false;
1256 }
1257
1258 return true;
1259 }
1260
1261 bool wxString::IsNumber() const
1262 {
1263 if ( empty() )
1264 return true;
1265
1266 const_iterator i = begin();
1267
1268 if ( *i == _T('-') || *i == _T('+') )
1269 ++i;
1270
1271 for ( ; i != end(); ++i )
1272 {
1273 if ( !wxIsdigit(*i) )
1274 return false;
1275 }
1276
1277 return true;
1278 }
1279
1280 wxString wxString::Strip(stripType w) const
1281 {
1282 wxString s = *this;
1283 if ( w & leading ) s.Trim(false);
1284 if ( w & trailing ) s.Trim(true);
1285 return s;
1286 }
1287
1288 // ---------------------------------------------------------------------------
1289 // case conversion
1290 // ---------------------------------------------------------------------------
1291
1292 wxString& wxString::MakeUpper()
1293 {
1294 for ( iterator it = begin(), en = end(); it != en; ++it )
1295 *it = (wxChar)wxToupper(*it);
1296
1297 return *this;
1298 }
1299
1300 wxString& wxString::MakeLower()
1301 {
1302 for ( iterator it = begin(), en = end(); it != en; ++it )
1303 *it = (wxChar)wxTolower(*it);
1304
1305 return *this;
1306 }
1307
1308 // ---------------------------------------------------------------------------
1309 // trimming and padding
1310 // ---------------------------------------------------------------------------
1311
1312 // some compilers (VC++ 6.0 not to name them) return true for a call to
1313 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1314 // live with this by checking that the character is a 7 bit one - even if this
1315 // may fail to detect some spaces (I don't know if Unicode doesn't have
1316 // space-like symbols somewhere except in the first 128 chars), it is arguably
1317 // still better than trimming away accented letters
1318 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1319
1320 // trims spaces (in the sense of isspace) from left or right side
1321 wxString& wxString::Trim(bool bFromRight)
1322 {
1323 // first check if we're going to modify the string at all
1324 if ( !empty() &&
1325 (
1326 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1327 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1328 )
1329 )
1330 {
1331 if ( bFromRight )
1332 {
1333 // find last non-space character
1334 reverse_iterator psz = rbegin();
1335 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1336 psz++;
1337
1338 // truncate at trailing space start
1339 erase(psz.base(), end());
1340 }
1341 else
1342 {
1343 // find first non-space character
1344 iterator psz = begin();
1345 while ( (psz != end()) && wxSafeIsspace(*psz) )
1346 psz++;
1347
1348 // fix up data and length
1349 erase(begin(), psz);
1350 }
1351 }
1352
1353 return *this;
1354 }
1355
1356 // adds nCount characters chPad to the string from either side
1357 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1358 {
1359 wxString s(chPad, nCount);
1360
1361 if ( bFromRight )
1362 *this += s;
1363 else
1364 {
1365 s += *this;
1366 swap(s);
1367 }
1368
1369 return *this;
1370 }
1371
1372 // truncate the string
1373 wxString& wxString::Truncate(size_t uiLen)
1374 {
1375 if ( uiLen < length() )
1376 {
1377 erase(begin() + uiLen, end());
1378 }
1379 //else: nothing to do, string is already short enough
1380
1381 return *this;
1382 }
1383
1384 // ---------------------------------------------------------------------------
1385 // finding (return wxNOT_FOUND if not found and index otherwise)
1386 // ---------------------------------------------------------------------------
1387
1388 // find a character
1389 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1390 {
1391 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1392
1393 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1394 }
1395
1396 // ----------------------------------------------------------------------------
1397 // conversion to numbers
1398 // ----------------------------------------------------------------------------
1399
1400 // The implementation of all the functions below is exactly the same so factor
1401 // it out. Note that number extraction works correctly on UTF-8 strings, so
1402 // we can use wxStringCharType and wx_str() for maximum efficiency.
1403
1404 #ifndef __WXWINCE__
1405 #define DO_IF_NOT_WINCE(x) x
1406 #else
1407 #define DO_IF_NOT_WINCE(x)
1408 #endif
1409
1410 #define WX_STRING_TO_INT_TYPE(val, base, func) \
1411 wxCHECK_MSG( val, false, _T("NULL output pointer") ); \
1412 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1413 \
1414 DO_IF_NOT_WINCE( errno = 0; ) \
1415 \
1416 const wxStringCharType *start = wx_str(); \
1417 wxStringCharType *end; \
1418 *val = func(start, &end, base); \
1419 \
1420 /* return true only if scan was stopped by the terminating NUL and */ \
1421 /* if the string was not empty to start with and no under/overflow */ \
1422 /* occurred: */ \
1423 return !*end && (end != start) \
1424 DO_IF_NOT_WINCE( && (errno != ERANGE) )
1425
1426 bool wxString::ToLong(long *val, int base) const
1427 {
1428 WX_STRING_TO_INT_TYPE(val, base, wxStrtol);
1429 }
1430
1431 bool wxString::ToULong(unsigned long *val, int base) const
1432 {
1433 WX_STRING_TO_INT_TYPE(val, base, wxStrtoul);
1434 }
1435
1436 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1437 {
1438 WX_STRING_TO_INT_TYPE(val, base, wxStrtoll);
1439 }
1440
1441 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1442 {
1443 WX_STRING_TO_INT_TYPE(val, base, wxStrtoull);
1444 }
1445
1446 bool wxString::ToDouble(double *val) const
1447 {
1448 wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1449
1450 #ifndef __WXWINCE__
1451 errno = 0;
1452 #endif
1453
1454 const wxChar *start = c_str();
1455 wxChar *end;
1456 *val = wxStrtod(start, &end);
1457
1458 // return true only if scan was stopped by the terminating NUL and if the
1459 // string was not empty to start with and no under/overflow occurred
1460 return !*end && (end != start)
1461 #ifndef __WXWINCE__
1462 && (errno != ERANGE)
1463 #endif
1464 ;
1465 }
1466
1467 // ---------------------------------------------------------------------------
1468 // formatted output
1469 // ---------------------------------------------------------------------------
1470
1471 #if !wxUSE_UTF8_LOCALE_ONLY
1472 /* static */
1473 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1474 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1475 #else
1476 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1477 #endif
1478 {
1479 va_list argptr;
1480 va_start(argptr, format);
1481
1482 wxString s;
1483 s.PrintfV(format, argptr);
1484
1485 va_end(argptr);
1486
1487 return s;
1488 }
1489 #endif // !wxUSE_UTF8_LOCALE_ONLY
1490
1491 #if wxUSE_UNICODE_UTF8
1492 /* static */
1493 wxString wxString::DoFormatUtf8(const char *format, ...)
1494 {
1495 va_list argptr;
1496 va_start(argptr, format);
1497
1498 wxString s;
1499 s.PrintfV(format, argptr);
1500
1501 va_end(argptr);
1502
1503 return s;
1504 }
1505 #endif // wxUSE_UNICODE_UTF8
1506
1507 /* static */
1508 wxString wxString::FormatV(const wxString& format, va_list argptr)
1509 {
1510 wxString s;
1511 s.PrintfV(format, argptr);
1512 return s;
1513 }
1514
1515 #if !wxUSE_UTF8_LOCALE_ONLY
1516 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1517 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1518 #else
1519 int wxString::DoPrintfWchar(const wxChar *format, ...)
1520 #endif
1521 {
1522 va_list argptr;
1523 va_start(argptr, format);
1524
1525 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1526 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1527 // because it's the only cast that works safely for downcasting when
1528 // multiple inheritance is used:
1529 wxString *str = static_cast<wxString*>(this);
1530 #else
1531 wxString *str = this;
1532 #endif
1533
1534 int iLen = str->PrintfV(format, argptr);
1535
1536 va_end(argptr);
1537
1538 return iLen;
1539 }
1540 #endif // !wxUSE_UTF8_LOCALE_ONLY
1541
1542 #if wxUSE_UNICODE_UTF8
1543 int wxString::DoPrintfUtf8(const char *format, ...)
1544 {
1545 va_list argptr;
1546 va_start(argptr, format);
1547
1548 int iLen = PrintfV(format, argptr);
1549
1550 va_end(argptr);
1551
1552 return iLen;
1553 }
1554 #endif // wxUSE_UNICODE_UTF8
1555
1556 #if wxUSE_UNICODE_UTF8
1557 template<typename BufferType>
1558 #else
1559 // we only need one version in non-UTF8 builds and at least two Windows
1560 // compilers have problems with this function template, so use just one
1561 // normal function here
1562 #endif
1563 static int DoStringPrintfV(wxString& str,
1564 const wxString& format, va_list argptr)
1565 {
1566 int size = 1024;
1567
1568 for ( ;; )
1569 {
1570 #if wxUSE_UNICODE_UTF8
1571 BufferType tmp(str, size + 1);
1572 typename BufferType::CharType *buf = tmp;
1573 #else
1574 wxStringBuffer tmp(str, size + 1);
1575 wxChar *buf = tmp;
1576 #endif
1577
1578 if ( !buf )
1579 {
1580 // out of memory
1581
1582 // in UTF-8 build, leaving uninitialized junk in the buffer
1583 // could result in invalid non-empty UTF-8 string, so just
1584 // reset the string to empty on failure:
1585 buf[0] = '\0';
1586 return -1;
1587 }
1588
1589 // wxVsnprintf() may modify the original arg pointer, so pass it
1590 // only a copy
1591 va_list argptrcopy;
1592 wxVaCopy(argptrcopy, argptr);
1593 int len = wxVsnprintf(buf, size, format, argptrcopy);
1594 va_end(argptrcopy);
1595
1596 // some implementations of vsnprintf() don't NUL terminate
1597 // the string if there is not enough space for it so
1598 // always do it manually
1599 buf[size] = _T('\0');
1600
1601 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1602 // total number of characters which would have been written if the
1603 // buffer were large enough (newer standards such as Unix98)
1604 if ( len < 0 )
1605 {
1606 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1607 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1608 // is true if *both* of them use our own implementation,
1609 // otherwise we can't be sure
1610 #if wxUSE_WXVSNPRINTF
1611 // we know that our own implementation of wxVsnprintf() returns -1
1612 // only for a format error - thus there's something wrong with
1613 // the user's format string
1614 buf[0] = '\0';
1615 return -1;
1616 #else // possibly using system version
1617 // assume it only returns error if there is not enough space, but
1618 // as we don't know how much we need, double the current size of
1619 // the buffer
1620 size *= 2;
1621 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1622 }
1623 else if ( len >= size )
1624 {
1625 #if wxUSE_WXVSNPRINTF
1626 // we know that our own implementation of wxVsnprintf() returns
1627 // size+1 when there's not enough space but that's not the size
1628 // of the required buffer!
1629 size *= 2; // so we just double the current size of the buffer
1630 #else
1631 // some vsnprintf() implementations NUL-terminate the buffer and
1632 // some don't in len == size case, to be safe always add 1
1633 size = len + 1;
1634 #endif
1635 }
1636 else // ok, there was enough space
1637 {
1638 break;
1639 }
1640 }
1641
1642 // we could have overshot
1643 str.Shrink();
1644
1645 return str.length();
1646 }
1647
1648 int wxString::PrintfV(const wxString& format, va_list argptr)
1649 {
1650 #if wxUSE_UNICODE_UTF8
1651 #if wxUSE_STL_BASED_WXSTRING
1652 typedef wxStringTypeBuffer<char> Utf8Buffer;
1653 #else
1654 typedef wxStringInternalBuffer Utf8Buffer;
1655 #endif
1656 #endif
1657
1658 #if wxUSE_UTF8_LOCALE_ONLY
1659 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1660 #else
1661 #if wxUSE_UNICODE_UTF8
1662 if ( wxLocaleIsUtf8 )
1663 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1664 else
1665 // wxChar* version
1666 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1667 #else
1668 return DoStringPrintfV(*this, format, argptr);
1669 #endif // UTF8/WCHAR
1670 #endif
1671 }
1672
1673 // ----------------------------------------------------------------------------
1674 // misc other operations
1675 // ----------------------------------------------------------------------------
1676
1677 // returns true if the string matches the pattern which may contain '*' and
1678 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1679 // of them)
1680 bool wxString::Matches(const wxString& mask) const
1681 {
1682 // I disable this code as it doesn't seem to be faster (in fact, it seems
1683 // to be much slower) than the old, hand-written code below and using it
1684 // here requires always linking with libregex even if the user code doesn't
1685 // use it
1686 #if 0 // wxUSE_REGEX
1687 // first translate the shell-like mask into a regex
1688 wxString pattern;
1689 pattern.reserve(wxStrlen(pszMask));
1690
1691 pattern += _T('^');
1692 while ( *pszMask )
1693 {
1694 switch ( *pszMask )
1695 {
1696 case _T('?'):
1697 pattern += _T('.');
1698 break;
1699
1700 case _T('*'):
1701 pattern += _T(".*");
1702 break;
1703
1704 case _T('^'):
1705 case _T('.'):
1706 case _T('$'):
1707 case _T('('):
1708 case _T(')'):
1709 case _T('|'):
1710 case _T('+'):
1711 case _T('\\'):
1712 // these characters are special in a RE, quote them
1713 // (however note that we don't quote '[' and ']' to allow
1714 // using them for Unix shell like matching)
1715 pattern += _T('\\');
1716 // fall through
1717
1718 default:
1719 pattern += *pszMask;
1720 }
1721
1722 pszMask++;
1723 }
1724 pattern += _T('$');
1725
1726 // and now use it
1727 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1728 #else // !wxUSE_REGEX
1729 // TODO: this is, of course, awfully inefficient...
1730
1731 // FIXME-UTF8: implement using iterators, remove #if
1732 #if wxUSE_UNICODE_UTF8
1733 wxWCharBuffer maskBuf = mask.wc_str();
1734 wxWCharBuffer txtBuf = wc_str();
1735 const wxChar *pszMask = maskBuf.data();
1736 const wxChar *pszTxt = txtBuf.data();
1737 #else
1738 const wxChar *pszMask = mask.wx_str();
1739 // the char currently being checked
1740 const wxChar *pszTxt = wx_str();
1741 #endif
1742
1743 // the last location where '*' matched
1744 const wxChar *pszLastStarInText = NULL;
1745 const wxChar *pszLastStarInMask = NULL;
1746
1747 match:
1748 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1749 switch ( *pszMask ) {
1750 case wxT('?'):
1751 if ( *pszTxt == wxT('\0') )
1752 return false;
1753
1754 // pszTxt and pszMask will be incremented in the loop statement
1755
1756 break;
1757
1758 case wxT('*'):
1759 {
1760 // remember where we started to be able to backtrack later
1761 pszLastStarInText = pszTxt;
1762 pszLastStarInMask = pszMask;
1763
1764 // ignore special chars immediately following this one
1765 // (should this be an error?)
1766 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1767 pszMask++;
1768
1769 // if there is nothing more, match
1770 if ( *pszMask == wxT('\0') )
1771 return true;
1772
1773 // are there any other metacharacters in the mask?
1774 size_t uiLenMask;
1775 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1776
1777 if ( pEndMask != NULL ) {
1778 // we have to match the string between two metachars
1779 uiLenMask = pEndMask - pszMask;
1780 }
1781 else {
1782 // we have to match the remainder of the string
1783 uiLenMask = wxStrlen(pszMask);
1784 }
1785
1786 wxString strToMatch(pszMask, uiLenMask);
1787 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1788 if ( pMatch == NULL )
1789 return false;
1790
1791 // -1 to compensate "++" in the loop
1792 pszTxt = pMatch + uiLenMask - 1;
1793 pszMask += uiLenMask - 1;
1794 }
1795 break;
1796
1797 default:
1798 if ( *pszMask != *pszTxt )
1799 return false;
1800 break;
1801 }
1802 }
1803
1804 // match only if nothing left
1805 if ( *pszTxt == wxT('\0') )
1806 return true;
1807
1808 // if we failed to match, backtrack if we can
1809 if ( pszLastStarInText ) {
1810 pszTxt = pszLastStarInText + 1;
1811 pszMask = pszLastStarInMask;
1812
1813 pszLastStarInText = NULL;
1814
1815 // don't bother resetting pszLastStarInMask, it's unnecessary
1816
1817 goto match;
1818 }
1819
1820 return false;
1821 #endif // wxUSE_REGEX/!wxUSE_REGEX
1822 }
1823
1824 // Count the number of chars
1825 int wxString::Freq(wxUniChar ch) const
1826 {
1827 int count = 0;
1828 for ( const_iterator i = begin(); i != end(); ++i )
1829 {
1830 if ( *i == ch )
1831 count ++;
1832 }
1833 return count;
1834 }
1835
1836 // convert to upper case, return the copy of the string
1837 wxString wxString::Upper() const
1838 { wxString s(*this); return s.MakeUpper(); }
1839
1840 // convert to lower case, return the copy of the string
1841 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
1842
1843 // ----------------------------------------------------------------------------
1844 // wxUTF8StringBuffer
1845 // ----------------------------------------------------------------------------
1846
1847 #if wxUSE_UNICODE_WCHAR
1848 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1849 {
1850 wxMBConvStrictUTF8 conv;
1851 size_t wlen = conv.ToWChar(NULL, 0, m_buf);
1852 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1853
1854 wxStringInternalBuffer wbuf(m_str, wlen);
1855 conv.ToWChar(wbuf, wlen, m_buf);
1856 }
1857
1858 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
1859 {
1860 wxCHECK_RET(m_lenSet, "length not set");
1861
1862 wxMBConvStrictUTF8 conv;
1863 size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
1864 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1865
1866 wxStringInternalBufferLength wbuf(m_str, wlen);
1867 conv.ToWChar(wbuf, wlen, m_buf, m_len);
1868 wbuf.SetLength(wlen);
1869 }
1870 #endif // wxUSE_UNICODE_WCHAR