]> git.saurik.com Git - wxWidgets.git/blob - src/common/string.cpp
fixes as per http://thread.gmane.org/gmane.comp.lib.wxwidgets.devel/92325
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
16
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #endif
28
29 #include <ctype.h>
30
31 #ifndef __WXWINCE__
32 #include <errno.h>
33 #endif
34
35 #include <string.h>
36 #include <stdlib.h>
37
38 #ifdef __SALFORDC__
39 #include <clib.h>
40 #endif
41
42 #include "wx/hashmap.h"
43
44 // string handling functions used by wxString:
45 #if wxUSE_UNICODE_UTF8
46 #define wxStringMemcpy memcpy
47 #define wxStringMemcmp memcmp
48 #define wxStringMemchr memchr
49 #define wxStringStrlen strlen
50 #else
51 #define wxStringMemcpy wxTmemcpy
52 #define wxStringMemcmp wxTmemcmp
53 #define wxStringMemchr wxTmemchr
54 #define wxStringStrlen wxStrlen
55 #endif
56
57
58 // ---------------------------------------------------------------------------
59 // static class variables definition
60 // ---------------------------------------------------------------------------
61
62 //According to STL _must_ be a -1 size_t
63 const size_t wxString::npos = (size_t) -1;
64
65 // ----------------------------------------------------------------------------
66 // global functions
67 // ----------------------------------------------------------------------------
68
69 #if wxUSE_STD_IOSTREAM
70
71 #include <iostream>
72
73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
74 {
75 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
76 return os << (const char *)str.AsCharBuf();
77 #else
78 return os << str.AsInternal();
79 #endif
80 }
81
82 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
83 {
84 return os << str.c_str();
85 }
86
87 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
88 {
89 return os << str.data();
90 }
91
92 #ifndef __BORLANDC__
93 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
94 {
95 return os << str.data();
96 }
97 #endif
98
99 #if wxUSE_UNICODE
100
101 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
102 {
103 return wos << str.wc_str();
104 }
105
106 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
107 {
108 return wos << str.AsWChar();
109 }
110
111 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
112 {
113 return wos << str.data();
114 }
115
116 #endif // wxUSE_UNICODE
117
118 #endif // wxUSE_STD_IOSTREAM
119
120 // ===========================================================================
121 // wxString class core
122 // ===========================================================================
123
124 #if wxUSE_UNICODE_UTF8
125
126 void wxString::PosLenToImpl(size_t pos, size_t len,
127 size_t *implPos, size_t *implLen) const
128 {
129 if ( pos == npos )
130 *implPos = npos;
131 else
132 {
133 const_iterator i = begin() + pos;
134 *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
135 if ( len == npos )
136 *implLen = npos;
137 else
138 {
139 // too large length is interpreted as "to the end of the string"
140 // FIXME-UTF8: verify this is the case in std::string, assert
141 // otherwise
142 if ( pos + len > length() )
143 len = length() - pos;
144
145 *implLen = (i + len).impl() - i.impl();
146 }
147 }
148 }
149
150 #endif // wxUSE_UNICODE_UTF8
151
152 // ----------------------------------------------------------------------------
153 // wxCStrData converted strings caching
154 // ----------------------------------------------------------------------------
155
156 // FIXME-UTF8: temporarily disabled because it doesn't work with global
157 // string objects; re-enable after fixing this bug and benchmarking
158 // performance to see if using a hash is a good idea at all
159 #if 0
160
161 // For backward compatibility reasons, it must be possible to assign the value
162 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
163 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
164 // because the memory would be freed immediately, but it has to be valid as long
165 // as the string is not modified, so that code like this still works:
166 //
167 // const wxChar *s = str.c_str();
168 // while ( s ) { ... }
169
170 // FIXME-UTF8: not thread safe!
171 // FIXME-UTF8: we currently clear the cached conversion only when the string is
172 // destroyed, but we should do it when the string is modified, to
173 // keep memory usage down
174 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
175 // invalidated the cache on every change, we could keep the previous
176 // conversion
177 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
178 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
179
180 template<typename T>
181 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
182 {
183 typename T::iterator i = hash.find(wxConstCast(s, wxString));
184 if ( i != hash.end() )
185 {
186 free(i->second);
187 hash.erase(i);
188 }
189 }
190
191 #if wxUSE_UNICODE
192 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
193 // so we have to use wxString* here and const-cast when used
194 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
195 wxStringCharConversionCache);
196 static wxStringCharConversionCache gs_stringsCharCache;
197
198 const char* wxCStrData::AsChar() const
199 {
200 // remove previously cache value, if any (see FIXMEs above):
201 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
202
203 // convert the string and keep it:
204 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
205 m_str->mb_str().release();
206
207 return s + m_offset;
208 }
209 #endif // wxUSE_UNICODE
210
211 #if !wxUSE_UNICODE_WCHAR
212 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
213 wxStringWCharConversionCache);
214 static wxStringWCharConversionCache gs_stringsWCharCache;
215
216 const wchar_t* wxCStrData::AsWChar() const
217 {
218 // remove previously cache value, if any (see FIXMEs above):
219 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
220
221 // convert the string and keep it:
222 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
223 m_str->wc_str().release();
224
225 return s + m_offset;
226 }
227 #endif // !wxUSE_UNICODE_WCHAR
228
229 wxString::~wxString()
230 {
231 #if wxUSE_UNICODE
232 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
233 DeleteStringFromConversionCache(gs_stringsCharCache, this);
234 #endif
235 #if !wxUSE_UNICODE_WCHAR
236 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
237 #endif
238 }
239 #endif
240
241 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
242 const char* wxCStrData::AsChar() const
243 {
244 #if wxUSE_UNICODE_UTF8
245 if ( wxLocaleIsUtf8 )
246 return AsInternal();
247 #endif
248 // under non-UTF8 locales, we have to convert the internal UTF-8
249 // representation using wxConvLibc and cache the result
250
251 wxString *str = wxConstCast(m_str, wxString);
252
253 // convert the string:
254 //
255 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
256 // have it) but it's unfortunately not obvious to implement
257 // because we don't know how big buffer do we need for the
258 // given string length (in case of multibyte encodings, e.g.
259 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
260 //
261 // One idea would be to store more than just m_convertedToChar
262 // in wxString: then we could record the length of the string
263 // which was converted the last time and try to reuse the same
264 // buffer if the current length is not greater than it (this
265 // could still fail because string could have been modified in
266 // place but it would work most of the time, so we'd do it and
267 // only allocate the new buffer if in-place conversion returned
268 // an error). We could also store a bit saying if the string
269 // was modified since the last conversion (and update it in all
270 // operation modifying the string, of course) to avoid unneeded
271 // consequential conversions. But both of these ideas require
272 // adding more fields to wxString and require profiling results
273 // to be sure that we really gain enough from them to justify
274 // doing it.
275 wxCharBuffer buf(str->mb_str());
276
277 // if it failed, return empty string and not NULL to avoid crashes in code
278 // written with either wxWidgets 2 wxString or std::string behaviour in
279 // mind: neither of them ever returns NULL and so we shouldn't neither
280 if ( !buf )
281 return "";
282
283 if ( str->m_convertedToChar &&
284 strlen(buf) == strlen(str->m_convertedToChar) )
285 {
286 // keep the same buffer for as long as possible, so that several calls
287 // to c_str() in a row still work:
288 strcpy(str->m_convertedToChar, buf);
289 }
290 else
291 {
292 str->m_convertedToChar = buf.release();
293 }
294
295 // and keep it:
296 return str->m_convertedToChar + m_offset;
297 }
298 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
299
300 #if !wxUSE_UNICODE_WCHAR
301 const wchar_t* wxCStrData::AsWChar() const
302 {
303 wxString *str = wxConstCast(m_str, wxString);
304
305 // convert the string:
306 wxWCharBuffer buf(str->wc_str());
307
308 // notice that here, unlike above in AsChar(), conversion can't fail as our
309 // internal UTF-8 is always well-formed -- or the string was corrupted and
310 // all bets are off anyhow
311
312 // FIXME-UTF8: do the conversion in-place in the existing buffer
313 if ( str->m_convertedToWChar &&
314 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
315 {
316 // keep the same buffer for as long as possible, so that several calls
317 // to c_str() in a row still work:
318 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
319 }
320 else
321 {
322 str->m_convertedToWChar = buf.release();
323 }
324
325 // and keep it:
326 return str->m_convertedToWChar + m_offset;
327 }
328 #endif // !wxUSE_UNICODE_WCHAR
329
330 // ===========================================================================
331 // wxString class core
332 // ===========================================================================
333
334 // ---------------------------------------------------------------------------
335 // construction and conversion
336 // ---------------------------------------------------------------------------
337
338 #if wxUSE_UNICODE_WCHAR
339 /* static */
340 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
341 const wxMBConv& conv)
342 {
343 // anything to do?
344 if ( !psz || nLength == 0 )
345 return SubstrBufFromMB(L"", 0);
346
347 if ( nLength == npos )
348 nLength = wxNO_LEN;
349
350 size_t wcLen;
351 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
352 if ( !wcLen )
353 return SubstrBufFromMB(_T(""), 0);
354 else
355 return SubstrBufFromMB(wcBuf, wcLen);
356 }
357 #endif // wxUSE_UNICODE_WCHAR
358
359 #if wxUSE_UNICODE_UTF8
360 /* static */
361 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
362 const wxMBConv& conv)
363 {
364 // anything to do?
365 if ( !psz || nLength == 0 )
366 return SubstrBufFromMB("", 0);
367
368 // if psz is already in UTF-8, we don't have to do the roundtrip to
369 // wchar_t* and back:
370 if ( conv.IsUTF8() )
371 {
372 // we need to validate the input because UTF8 iterators assume valid
373 // UTF-8 sequence and psz may be invalid:
374 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
375 {
376 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
377 }
378 // else: do the roundtrip through wchar_t*
379 }
380
381 if ( nLength == npos )
382 nLength = wxNO_LEN;
383
384 // first convert to wide string:
385 size_t wcLen;
386 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
387 if ( !wcLen )
388 return SubstrBufFromMB("", 0);
389
390 // and then to UTF-8:
391 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
392 // widechar -> UTF-8 conversion isn't supposed to ever fail:
393 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
394
395 return buf;
396 }
397 #endif // wxUSE_UNICODE_UTF8
398
399 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
400 /* static */
401 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
402 const wxMBConv& conv)
403 {
404 // anything to do?
405 if ( !pwz || nLength == 0 )
406 return SubstrBufFromWC("", 0);
407
408 if ( nLength == npos )
409 nLength = wxNO_LEN;
410
411 size_t mbLen;
412 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
413 if ( !mbLen )
414 return SubstrBufFromWC("", 0);
415 else
416 return SubstrBufFromWC(mbBuf, mbLen);
417 }
418 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
419
420
421 #if wxUSE_UNICODE_WCHAR
422
423 //Convert wxString in Unicode mode to a multi-byte string
424 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
425 {
426 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
427 }
428
429 #elif wxUSE_UNICODE_UTF8
430
431 const wxWCharBuffer wxString::wc_str() const
432 {
433 return wxMBConvStrictUTF8().cMB2WC
434 (
435 m_impl.c_str(),
436 m_impl.length() + 1, // size, not length
437 NULL
438 );
439 }
440
441 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
442 {
443 if ( conv.IsUTF8() )
444 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
445
446 // FIXME-UTF8: use wc_str() here once we have buffers with length
447
448 size_t wcLen;
449 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
450 (
451 m_impl.c_str(),
452 m_impl.length() + 1, // size
453 &wcLen
454 ));
455 if ( !wcLen )
456 return wxCharBuffer("");
457
458 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
459 }
460
461 #else // ANSI
462
463 //Converts this string to a wide character string if unicode
464 //mode is not enabled and wxUSE_WCHAR_T is enabled
465 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
466 {
467 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
468 }
469
470 #endif // Unicode/ANSI
471
472 // shrink to minimal size (releasing extra memory)
473 bool wxString::Shrink()
474 {
475 wxString tmp(begin(), end());
476 swap(tmp);
477 return tmp.length() == length();
478 }
479
480 // deprecated compatibility code:
481 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
482 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
483 {
484 return DoGetWriteBuf(nLen);
485 }
486
487 void wxString::UngetWriteBuf()
488 {
489 DoUngetWriteBuf();
490 }
491
492 void wxString::UngetWriteBuf(size_t nLen)
493 {
494 DoUngetWriteBuf(nLen);
495 }
496 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
497
498
499 // ---------------------------------------------------------------------------
500 // data access
501 // ---------------------------------------------------------------------------
502
503 // all functions are inline in string.h
504
505 // ---------------------------------------------------------------------------
506 // concatenation operators
507 // ---------------------------------------------------------------------------
508
509 /*
510 * concatenation functions come in 5 flavours:
511 * string + string
512 * char + string and string + char
513 * C str + string and string + C str
514 */
515
516 wxString operator+(const wxString& str1, const wxString& str2)
517 {
518 #if !wxUSE_STL_BASED_WXSTRING
519 wxASSERT( str1.IsValid() );
520 wxASSERT( str2.IsValid() );
521 #endif
522
523 wxString s = str1;
524 s += str2;
525
526 return s;
527 }
528
529 wxString operator+(const wxString& str, wxUniChar ch)
530 {
531 #if !wxUSE_STL_BASED_WXSTRING
532 wxASSERT( str.IsValid() );
533 #endif
534
535 wxString s = str;
536 s += ch;
537
538 return s;
539 }
540
541 wxString operator+(wxUniChar ch, const wxString& str)
542 {
543 #if !wxUSE_STL_BASED_WXSTRING
544 wxASSERT( str.IsValid() );
545 #endif
546
547 wxString s = ch;
548 s += str;
549
550 return s;
551 }
552
553 wxString operator+(const wxString& str, const char *psz)
554 {
555 #if !wxUSE_STL_BASED_WXSTRING
556 wxASSERT( str.IsValid() );
557 #endif
558
559 wxString s;
560 if ( !s.Alloc(strlen(psz) + str.length()) ) {
561 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
562 }
563 s += str;
564 s += psz;
565
566 return s;
567 }
568
569 wxString operator+(const wxString& str, const wchar_t *pwz)
570 {
571 #if !wxUSE_STL_BASED_WXSTRING
572 wxASSERT( str.IsValid() );
573 #endif
574
575 wxString s;
576 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
577 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
578 }
579 s += str;
580 s += pwz;
581
582 return s;
583 }
584
585 wxString operator+(const char *psz, const wxString& str)
586 {
587 #if !wxUSE_STL_BASED_WXSTRING
588 wxASSERT( str.IsValid() );
589 #endif
590
591 wxString s;
592 if ( !s.Alloc(strlen(psz) + str.length()) ) {
593 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
594 }
595 s = psz;
596 s += str;
597
598 return s;
599 }
600
601 wxString operator+(const wchar_t *pwz, const wxString& str)
602 {
603 #if !wxUSE_STL_BASED_WXSTRING
604 wxASSERT( str.IsValid() );
605 #endif
606
607 wxString s;
608 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
609 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
610 }
611 s = pwz;
612 s += str;
613
614 return s;
615 }
616
617 // ---------------------------------------------------------------------------
618 // string comparison
619 // ---------------------------------------------------------------------------
620
621 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
622 {
623 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
624 : wxToupper(GetChar(0u)) == wxToupper(c));
625 }
626
627 #ifdef HAVE_STD_STRING_COMPARE
628
629 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
630 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
631 // sort strings in characters code point order by sorting the byte sequence
632 // in byte values order (i.e. what strcmp() and memcmp() do).
633
634 int wxString::compare(const wxString& str) const
635 {
636 return m_impl.compare(str.m_impl);
637 }
638
639 int wxString::compare(size_t nStart, size_t nLen,
640 const wxString& str) const
641 {
642 size_t pos, len;
643 PosLenToImpl(nStart, nLen, &pos, &len);
644 return m_impl.compare(pos, len, str.m_impl);
645 }
646
647 int wxString::compare(size_t nStart, size_t nLen,
648 const wxString& str,
649 size_t nStart2, size_t nLen2) const
650 {
651 size_t pos, len;
652 PosLenToImpl(nStart, nLen, &pos, &len);
653
654 size_t pos2, len2;
655 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
656
657 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
658 }
659
660 int wxString::compare(const char* sz) const
661 {
662 return m_impl.compare(ImplStr(sz));
663 }
664
665 int wxString::compare(const wchar_t* sz) const
666 {
667 return m_impl.compare(ImplStr(sz));
668 }
669
670 int wxString::compare(size_t nStart, size_t nLen,
671 const char* sz, size_t nCount) const
672 {
673 size_t pos, len;
674 PosLenToImpl(nStart, nLen, &pos, &len);
675
676 SubstrBufFromMB str(ImplStr(sz, nCount));
677
678 return m_impl.compare(pos, len, str.data, str.len);
679 }
680
681 int wxString::compare(size_t nStart, size_t nLen,
682 const wchar_t* sz, size_t nCount) const
683 {
684 size_t pos, len;
685 PosLenToImpl(nStart, nLen, &pos, &len);
686
687 SubstrBufFromWC str(ImplStr(sz, nCount));
688
689 return m_impl.compare(pos, len, str.data, str.len);
690 }
691
692 #else // !HAVE_STD_STRING_COMPARE
693
694 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
695 const wxStringCharType* s2, size_t l2)
696 {
697 if( l1 == l2 )
698 return wxStringMemcmp(s1, s2, l1);
699 else if( l1 < l2 )
700 {
701 int ret = wxStringMemcmp(s1, s2, l1);
702 return ret == 0 ? -1 : ret;
703 }
704 else
705 {
706 int ret = wxStringMemcmp(s1, s2, l2);
707 return ret == 0 ? +1 : ret;
708 }
709 }
710
711 int wxString::compare(const wxString& str) const
712 {
713 return ::wxDoCmp(m_impl.data(), m_impl.length(),
714 str.m_impl.data(), str.m_impl.length());
715 }
716
717 int wxString::compare(size_t nStart, size_t nLen,
718 const wxString& str) const
719 {
720 wxASSERT(nStart <= length());
721 size_type strLen = length() - nStart;
722 nLen = strLen < nLen ? strLen : nLen;
723
724 size_t pos, len;
725 PosLenToImpl(nStart, nLen, &pos, &len);
726
727 return ::wxDoCmp(m_impl.data() + pos, len,
728 str.m_impl.data(), str.m_impl.length());
729 }
730
731 int wxString::compare(size_t nStart, size_t nLen,
732 const wxString& str,
733 size_t nStart2, size_t nLen2) const
734 {
735 wxASSERT(nStart <= length());
736 wxASSERT(nStart2 <= str.length());
737 size_type strLen = length() - nStart,
738 strLen2 = str.length() - nStart2;
739 nLen = strLen < nLen ? strLen : nLen;
740 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
741
742 size_t pos, len;
743 PosLenToImpl(nStart, nLen, &pos, &len);
744 size_t pos2, len2;
745 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
746
747 return ::wxDoCmp(m_impl.data() + pos, len,
748 str.m_impl.data() + pos2, len2);
749 }
750
751 int wxString::compare(const char* sz) const
752 {
753 SubstrBufFromMB str(ImplStr(sz, npos));
754 if ( str.len == npos )
755 str.len = wxStringStrlen(str.data);
756 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
757 }
758
759 int wxString::compare(const wchar_t* sz) const
760 {
761 SubstrBufFromWC str(ImplStr(sz, npos));
762 if ( str.len == npos )
763 str.len = wxStringStrlen(str.data);
764 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
765 }
766
767 int wxString::compare(size_t nStart, size_t nLen,
768 const char* sz, size_t nCount) const
769 {
770 wxASSERT(nStart <= length());
771 size_type strLen = length() - nStart;
772 nLen = strLen < nLen ? strLen : nLen;
773
774 size_t pos, len;
775 PosLenToImpl(nStart, nLen, &pos, &len);
776
777 SubstrBufFromMB str(ImplStr(sz, nCount));
778 if ( str.len == npos )
779 str.len = wxStringStrlen(str.data);
780
781 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
782 }
783
784 int wxString::compare(size_t nStart, size_t nLen,
785 const wchar_t* sz, size_t nCount) const
786 {
787 wxASSERT(nStart <= length());
788 size_type strLen = length() - nStart;
789 nLen = strLen < nLen ? strLen : nLen;
790
791 size_t pos, len;
792 PosLenToImpl(nStart, nLen, &pos, &len);
793
794 SubstrBufFromWC str(ImplStr(sz, nCount));
795 if ( str.len == npos )
796 str.len = wxStringStrlen(str.data);
797
798 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
799 }
800
801 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
802
803
804 // ---------------------------------------------------------------------------
805 // find_{first,last}_[not]_of functions
806 // ---------------------------------------------------------------------------
807
808 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
809
810 // NB: All these functions are implemented with the argument being wxChar*,
811 // i.e. widechar string in any Unicode build, even though native string
812 // representation is char* in the UTF-8 build. This is because we couldn't
813 // use memchr() to determine if a character is in a set encoded as UTF-8.
814
815 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
816 {
817 return find_first_of(sz, nStart, wxStrlen(sz));
818 }
819
820 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
821 {
822 return find_first_not_of(sz, nStart, wxStrlen(sz));
823 }
824
825 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
826 {
827 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
828
829 size_t idx = nStart;
830 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
831 {
832 if ( wxTmemchr(sz, *i, n) )
833 return idx;
834 }
835
836 return npos;
837 }
838
839 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
840 {
841 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
842
843 size_t idx = nStart;
844 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
845 {
846 if ( !wxTmemchr(sz, *i, n) )
847 return idx;
848 }
849
850 return npos;
851 }
852
853
854 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
855 {
856 return find_last_of(sz, nStart, wxStrlen(sz));
857 }
858
859 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
860 {
861 return find_last_not_of(sz, nStart, wxStrlen(sz));
862 }
863
864 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
865 {
866 size_t len = length();
867
868 if ( nStart == npos )
869 {
870 nStart = len - 1;
871 }
872 else
873 {
874 wxASSERT_MSG( nStart <= len, _T("invalid index") );
875 }
876
877 size_t idx = nStart;
878 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
879 i != rend(); --idx, ++i )
880 {
881 if ( wxTmemchr(sz, *i, n) )
882 return idx;
883 }
884
885 return npos;
886 }
887
888 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
889 {
890 size_t len = length();
891
892 if ( nStart == npos )
893 {
894 nStart = len - 1;
895 }
896 else
897 {
898 wxASSERT_MSG( nStart <= len, _T("invalid index") );
899 }
900
901 size_t idx = nStart;
902 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
903 i != rend(); --idx, ++i )
904 {
905 if ( !wxTmemchr(sz, *i, n) )
906 return idx;
907 }
908
909 return npos;
910 }
911
912 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
913 {
914 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
915
916 size_t idx = nStart;
917 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
918 {
919 if ( *i != ch )
920 return idx;
921 }
922
923 return npos;
924 }
925
926 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
927 {
928 size_t len = length();
929
930 if ( nStart == npos )
931 {
932 nStart = len - 1;
933 }
934 else
935 {
936 wxASSERT_MSG( nStart <= len, _T("invalid index") );
937 }
938
939 size_t idx = nStart;
940 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
941 i != rend(); --idx, ++i )
942 {
943 if ( *i != ch )
944 return idx;
945 }
946
947 return npos;
948 }
949
950 // the functions above were implemented for wchar_t* arguments in Unicode
951 // build and char* in ANSI build; below are implementations for the other
952 // version:
953 #if wxUSE_UNICODE
954 #define wxOtherCharType char
955 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
956 #else
957 #define wxOtherCharType wchar_t
958 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
959 #endif
960
961 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
962 { return find_first_of(STRCONV(sz), nStart); }
963
964 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
965 size_t n) const
966 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
967 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
968 { return find_last_of(STRCONV(sz), nStart); }
969 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
970 size_t n) const
971 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
972 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
973 { return find_first_not_of(STRCONV(sz), nStart); }
974 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
975 size_t n) const
976 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
977 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
978 { return find_last_not_of(STRCONV(sz), nStart); }
979 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
980 size_t n) const
981 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
982
983 #undef wxOtherCharType
984 #undef STRCONV
985
986 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
987
988 // ===========================================================================
989 // other common string functions
990 // ===========================================================================
991
992 int wxString::CmpNoCase(const wxString& s) const
993 {
994 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
995
996 const_iterator i1 = begin();
997 const_iterator end1 = end();
998 const_iterator i2 = s.begin();
999 const_iterator end2 = s.end();
1000
1001 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1002 {
1003 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1004 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1005 if ( lower1 != lower2 )
1006 return lower1 < lower2 ? -1 : 1;
1007 }
1008
1009 size_t len1 = length();
1010 size_t len2 = s.length();
1011
1012 if ( len1 < len2 )
1013 return -1;
1014 else if ( len1 > len2 )
1015 return 1;
1016 return 0;
1017 }
1018
1019
1020 #if wxUSE_UNICODE
1021
1022 #ifdef __MWERKS__
1023 #ifndef __SCHAR_MAX__
1024 #define __SCHAR_MAX__ 127
1025 #endif
1026 #endif
1027
1028 wxString wxString::FromAscii(const char *ascii, size_t len)
1029 {
1030 if (!ascii || len == 0)
1031 return wxEmptyString;
1032
1033 wxString res;
1034
1035 {
1036 wxStringInternalBuffer buf(res, len);
1037 wxStringCharType *dest = buf;
1038
1039 for ( ; len > 0; --len )
1040 {
1041 unsigned char c = (unsigned char)*ascii++;
1042 wxASSERT_MSG( c < 0x80,
1043 _T("Non-ASCII value passed to FromAscii().") );
1044
1045 *dest++ = (wchar_t)c;
1046 }
1047 }
1048
1049 return res;
1050 }
1051
1052 wxString wxString::FromAscii(const char *ascii)
1053 {
1054 return FromAscii(ascii, wxStrlen(ascii));
1055 }
1056
1057 wxString wxString::FromAscii(char ascii)
1058 {
1059 // What do we do with '\0' ?
1060
1061 unsigned char c = (unsigned char)ascii;
1062
1063 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1064
1065 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1066 return wxString(wxUniChar((wchar_t)c));
1067 }
1068
1069 const wxCharBuffer wxString::ToAscii() const
1070 {
1071 // this will allocate enough space for the terminating NUL too
1072 wxCharBuffer buffer(length());
1073 char *dest = buffer.data();
1074
1075 for ( const_iterator i = begin(); i != end(); ++i )
1076 {
1077 wxUniChar c(*i);
1078 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1079 *dest++ = c.IsAscii() ? (char)c : '_';
1080
1081 // the output string can't have embedded NULs anyhow, so we can safely
1082 // stop at first of them even if we do have any
1083 if ( !c )
1084 break;
1085 }
1086
1087 return buffer;
1088 }
1089
1090 #endif // wxUSE_UNICODE
1091
1092 // extract string of length nCount starting at nFirst
1093 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1094 {
1095 size_t nLen = length();
1096
1097 // default value of nCount is npos and means "till the end"
1098 if ( nCount == npos )
1099 {
1100 nCount = nLen - nFirst;
1101 }
1102
1103 // out-of-bounds requests return sensible things
1104 if ( nFirst + nCount > nLen )
1105 {
1106 nCount = nLen - nFirst;
1107 }
1108
1109 if ( nFirst > nLen )
1110 {
1111 // AllocCopy() will return empty string
1112 return wxEmptyString;
1113 }
1114
1115 wxString dest(*this, nFirst, nCount);
1116 if ( dest.length() != nCount )
1117 {
1118 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1119 }
1120
1121 return dest;
1122 }
1123
1124 // check that the string starts with prefix and return the rest of the string
1125 // in the provided pointer if it is not NULL, otherwise return false
1126 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1127 {
1128 if ( compare(0, prefix.length(), prefix) != 0 )
1129 return false;
1130
1131 if ( rest )
1132 {
1133 // put the rest of the string into provided pointer
1134 rest->assign(*this, prefix.length(), npos);
1135 }
1136
1137 return true;
1138 }
1139
1140
1141 // check that the string ends with suffix and return the rest of it in the
1142 // provided pointer if it is not NULL, otherwise return false
1143 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1144 {
1145 int start = length() - suffix.length();
1146
1147 if ( start < 0 || compare(start, npos, suffix) != 0 )
1148 return false;
1149
1150 if ( rest )
1151 {
1152 // put the rest of the string into provided pointer
1153 rest->assign(*this, 0, start);
1154 }
1155
1156 return true;
1157 }
1158
1159
1160 // extract nCount last (rightmost) characters
1161 wxString wxString::Right(size_t nCount) const
1162 {
1163 if ( nCount > length() )
1164 nCount = length();
1165
1166 wxString dest(*this, length() - nCount, nCount);
1167 if ( dest.length() != nCount ) {
1168 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1169 }
1170 return dest;
1171 }
1172
1173 // get all characters after the last occurence of ch
1174 // (returns the whole string if ch not found)
1175 wxString wxString::AfterLast(wxUniChar ch) const
1176 {
1177 wxString str;
1178 int iPos = Find(ch, true);
1179 if ( iPos == wxNOT_FOUND )
1180 str = *this;
1181 else
1182 str = wx_str() + iPos + 1;
1183
1184 return str;
1185 }
1186
1187 // extract nCount first (leftmost) characters
1188 wxString wxString::Left(size_t nCount) const
1189 {
1190 if ( nCount > length() )
1191 nCount = length();
1192
1193 wxString dest(*this, 0, nCount);
1194 if ( dest.length() != nCount ) {
1195 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1196 }
1197 return dest;
1198 }
1199
1200 // get all characters before the first occurence of ch
1201 // (returns the whole string if ch not found)
1202 wxString wxString::BeforeFirst(wxUniChar ch) const
1203 {
1204 int iPos = Find(ch);
1205 if ( iPos == wxNOT_FOUND ) iPos = length();
1206 return wxString(*this, 0, iPos);
1207 }
1208
1209 /// get all characters before the last occurence of ch
1210 /// (returns empty string if ch not found)
1211 wxString wxString::BeforeLast(wxUniChar ch) const
1212 {
1213 wxString str;
1214 int iPos = Find(ch, true);
1215 if ( iPos != wxNOT_FOUND && iPos != 0 )
1216 str = wxString(c_str(), iPos);
1217
1218 return str;
1219 }
1220
1221 /// get all characters after the first occurence of ch
1222 /// (returns empty string if ch not found)
1223 wxString wxString::AfterFirst(wxUniChar ch) const
1224 {
1225 wxString str;
1226 int iPos = Find(ch);
1227 if ( iPos != wxNOT_FOUND )
1228 str = wx_str() + iPos + 1;
1229
1230 return str;
1231 }
1232
1233 // replace first (or all) occurences of some substring with another one
1234 size_t wxString::Replace(const wxString& strOld,
1235 const wxString& strNew, bool bReplaceAll)
1236 {
1237 // if we tried to replace an empty string we'd enter an infinite loop below
1238 wxCHECK_MSG( !strOld.empty(), 0,
1239 _T("wxString::Replace(): invalid parameter") );
1240
1241 size_t uiCount = 0; // count of replacements made
1242
1243 size_t uiOldLen = strOld.length();
1244 size_t uiNewLen = strNew.length();
1245
1246 size_t dwPos = 0;
1247
1248 while ( (*this)[dwPos] != wxT('\0') )
1249 {
1250 //DO NOT USE STRSTR HERE
1251 //this string can contain embedded null characters,
1252 //so strstr will function incorrectly
1253 dwPos = find(strOld, dwPos);
1254 if ( dwPos == npos )
1255 break; // exit the loop
1256 else
1257 {
1258 //replace this occurance of the old string with the new one
1259 replace(dwPos, uiOldLen, strNew, uiNewLen);
1260
1261 //move up pos past the string that was replaced
1262 dwPos += uiNewLen;
1263
1264 //increase replace count
1265 ++uiCount;
1266
1267 // stop now?
1268 if ( !bReplaceAll )
1269 break; // exit the loop
1270 }
1271 }
1272
1273 return uiCount;
1274 }
1275
1276 bool wxString::IsAscii() const
1277 {
1278 for ( const_iterator i = begin(); i != end(); ++i )
1279 {
1280 if ( !(*i).IsAscii() )
1281 return false;
1282 }
1283
1284 return true;
1285 }
1286
1287 bool wxString::IsWord() const
1288 {
1289 for ( const_iterator i = begin(); i != end(); ++i )
1290 {
1291 if ( !wxIsalpha(*i) )
1292 return false;
1293 }
1294
1295 return true;
1296 }
1297
1298 bool wxString::IsNumber() const
1299 {
1300 if ( empty() )
1301 return true;
1302
1303 const_iterator i = begin();
1304
1305 if ( *i == _T('-') || *i == _T('+') )
1306 ++i;
1307
1308 for ( ; i != end(); ++i )
1309 {
1310 if ( !wxIsdigit(*i) )
1311 return false;
1312 }
1313
1314 return true;
1315 }
1316
1317 wxString wxString::Strip(stripType w) const
1318 {
1319 wxString s = *this;
1320 if ( w & leading ) s.Trim(false);
1321 if ( w & trailing ) s.Trim(true);
1322 return s;
1323 }
1324
1325 // ---------------------------------------------------------------------------
1326 // case conversion
1327 // ---------------------------------------------------------------------------
1328
1329 wxString& wxString::MakeUpper()
1330 {
1331 for ( iterator it = begin(), en = end(); it != en; ++it )
1332 *it = (wxChar)wxToupper(*it);
1333
1334 return *this;
1335 }
1336
1337 wxString& wxString::MakeLower()
1338 {
1339 for ( iterator it = begin(), en = end(); it != en; ++it )
1340 *it = (wxChar)wxTolower(*it);
1341
1342 return *this;
1343 }
1344
1345 // ---------------------------------------------------------------------------
1346 // trimming and padding
1347 // ---------------------------------------------------------------------------
1348
1349 // some compilers (VC++ 6.0 not to name them) return true for a call to
1350 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1351 // live with this by checking that the character is a 7 bit one - even if this
1352 // may fail to detect some spaces (I don't know if Unicode doesn't have
1353 // space-like symbols somewhere except in the first 128 chars), it is arguably
1354 // still better than trimming away accented letters
1355 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1356
1357 // trims spaces (in the sense of isspace) from left or right side
1358 wxString& wxString::Trim(bool bFromRight)
1359 {
1360 // first check if we're going to modify the string at all
1361 if ( !empty() &&
1362 (
1363 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1364 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1365 )
1366 )
1367 {
1368 if ( bFromRight )
1369 {
1370 // find last non-space character
1371 reverse_iterator psz = rbegin();
1372 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1373 ++psz;
1374
1375 // truncate at trailing space start
1376 erase(psz.base(), end());
1377 }
1378 else
1379 {
1380 // find first non-space character
1381 iterator psz = begin();
1382 while ( (psz != end()) && wxSafeIsspace(*psz) )
1383 ++psz;
1384
1385 // fix up data and length
1386 erase(begin(), psz);
1387 }
1388 }
1389
1390 return *this;
1391 }
1392
1393 // adds nCount characters chPad to the string from either side
1394 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1395 {
1396 wxString s(chPad, nCount);
1397
1398 if ( bFromRight )
1399 *this += s;
1400 else
1401 {
1402 s += *this;
1403 swap(s);
1404 }
1405
1406 return *this;
1407 }
1408
1409 // truncate the string
1410 wxString& wxString::Truncate(size_t uiLen)
1411 {
1412 if ( uiLen < length() )
1413 {
1414 erase(begin() + uiLen, end());
1415 }
1416 //else: nothing to do, string is already short enough
1417
1418 return *this;
1419 }
1420
1421 // ---------------------------------------------------------------------------
1422 // finding (return wxNOT_FOUND if not found and index otherwise)
1423 // ---------------------------------------------------------------------------
1424
1425 // find a character
1426 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1427 {
1428 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1429
1430 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1431 }
1432
1433 // ----------------------------------------------------------------------------
1434 // conversion to numbers
1435 // ----------------------------------------------------------------------------
1436
1437 // The implementation of all the functions below is exactly the same so factor
1438 // it out. Note that number extraction works correctly on UTF-8 strings, so
1439 // we can use wxStringCharType and wx_str() for maximum efficiency.
1440
1441 #ifndef __WXWINCE__
1442 #define DO_IF_NOT_WINCE(x) x
1443 #else
1444 #define DO_IF_NOT_WINCE(x)
1445 #endif
1446
1447 #define WX_STRING_TO_INT_TYPE(val, base, func) \
1448 wxCHECK_MSG( val, false, _T("NULL output pointer") ); \
1449 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1450 \
1451 DO_IF_NOT_WINCE( errno = 0; ) \
1452 \
1453 const wxStringCharType *start = wx_str(); \
1454 wxStringCharType *end; \
1455 *val = func(start, &end, base); \
1456 \
1457 /* return true only if scan was stopped by the terminating NUL and */ \
1458 /* if the string was not empty to start with and no under/overflow */ \
1459 /* occurred: */ \
1460 return !*end && (end != start) \
1461 DO_IF_NOT_WINCE( && (errno != ERANGE) )
1462
1463 bool wxString::ToLong(long *val, int base) const
1464 {
1465 WX_STRING_TO_INT_TYPE(val, base, wxStrtol);
1466 }
1467
1468 bool wxString::ToULong(unsigned long *val, int base) const
1469 {
1470 WX_STRING_TO_INT_TYPE(val, base, wxStrtoul);
1471 }
1472
1473 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1474 {
1475 WX_STRING_TO_INT_TYPE(val, base, wxStrtoll);
1476 }
1477
1478 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1479 {
1480 WX_STRING_TO_INT_TYPE(val, base, wxStrtoull);
1481 }
1482
1483 bool wxString::ToDouble(double *val) const
1484 {
1485 wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1486
1487 #ifndef __WXWINCE__
1488 errno = 0;
1489 #endif
1490
1491 const wxChar *start = c_str();
1492 wxChar *end;
1493 *val = wxStrtod(start, &end);
1494
1495 // return true only if scan was stopped by the terminating NUL and if the
1496 // string was not empty to start with and no under/overflow occurred
1497 return !*end && (end != start)
1498 #ifndef __WXWINCE__
1499 && (errno != ERANGE)
1500 #endif
1501 ;
1502 }
1503
1504 // ---------------------------------------------------------------------------
1505 // formatted output
1506 // ---------------------------------------------------------------------------
1507
1508 #if !wxUSE_UTF8_LOCALE_ONLY
1509 /* static */
1510 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1511 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1512 #else
1513 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1514 #endif
1515 {
1516 va_list argptr;
1517 va_start(argptr, format);
1518
1519 wxString s;
1520 s.PrintfV(format, argptr);
1521
1522 va_end(argptr);
1523
1524 return s;
1525 }
1526 #endif // !wxUSE_UTF8_LOCALE_ONLY
1527
1528 #if wxUSE_UNICODE_UTF8
1529 /* static */
1530 wxString wxString::DoFormatUtf8(const char *format, ...)
1531 {
1532 va_list argptr;
1533 va_start(argptr, format);
1534
1535 wxString s;
1536 s.PrintfV(format, argptr);
1537
1538 va_end(argptr);
1539
1540 return s;
1541 }
1542 #endif // wxUSE_UNICODE_UTF8
1543
1544 /* static */
1545 wxString wxString::FormatV(const wxString& format, va_list argptr)
1546 {
1547 wxString s;
1548 s.PrintfV(format, argptr);
1549 return s;
1550 }
1551
1552 #if !wxUSE_UTF8_LOCALE_ONLY
1553 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1554 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1555 #else
1556 int wxString::DoPrintfWchar(const wxChar *format, ...)
1557 #endif
1558 {
1559 va_list argptr;
1560 va_start(argptr, format);
1561
1562 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1563 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1564 // because it's the only cast that works safely for downcasting when
1565 // multiple inheritance is used:
1566 wxString *str = static_cast<wxString*>(this);
1567 #else
1568 wxString *str = this;
1569 #endif
1570
1571 int iLen = str->PrintfV(format, argptr);
1572
1573 va_end(argptr);
1574
1575 return iLen;
1576 }
1577 #endif // !wxUSE_UTF8_LOCALE_ONLY
1578
1579 #if wxUSE_UNICODE_UTF8
1580 int wxString::DoPrintfUtf8(const char *format, ...)
1581 {
1582 va_list argptr;
1583 va_start(argptr, format);
1584
1585 int iLen = PrintfV(format, argptr);
1586
1587 va_end(argptr);
1588
1589 return iLen;
1590 }
1591 #endif // wxUSE_UNICODE_UTF8
1592
1593 #if wxUSE_UNICODE_UTF8
1594 template<typename BufferType>
1595 #else
1596 // we only need one version in non-UTF8 builds and at least two Windows
1597 // compilers have problems with this function template, so use just one
1598 // normal function here
1599 #endif
1600 static int DoStringPrintfV(wxString& str,
1601 const wxString& format, va_list argptr)
1602 {
1603 int size = 1024;
1604
1605 for ( ;; )
1606 {
1607 #if wxUSE_UNICODE_UTF8
1608 BufferType tmp(str, size + 1);
1609 typename BufferType::CharType *buf = tmp;
1610 #else
1611 wxStringBuffer tmp(str, size + 1);
1612 wxChar *buf = tmp;
1613 #endif
1614
1615 if ( !buf )
1616 {
1617 // out of memory
1618
1619 // in UTF-8 build, leaving uninitialized junk in the buffer
1620 // could result in invalid non-empty UTF-8 string, so just
1621 // reset the string to empty on failure:
1622 buf[0] = '\0';
1623 return -1;
1624 }
1625
1626 // wxVsnprintf() may modify the original arg pointer, so pass it
1627 // only a copy
1628 va_list argptrcopy;
1629 wxVaCopy(argptrcopy, argptr);
1630 int len = wxVsnprintf(buf, size, format, argptrcopy);
1631 va_end(argptrcopy);
1632
1633 // some implementations of vsnprintf() don't NUL terminate
1634 // the string if there is not enough space for it so
1635 // always do it manually
1636 buf[size] = _T('\0');
1637
1638 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1639 // total number of characters which would have been written if the
1640 // buffer were large enough (newer standards such as Unix98)
1641 if ( len < 0 )
1642 {
1643 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1644 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1645 // is true if *both* of them use our own implementation,
1646 // otherwise we can't be sure
1647 #if wxUSE_WXVSNPRINTF
1648 // we know that our own implementation of wxVsnprintf() returns -1
1649 // only for a format error - thus there's something wrong with
1650 // the user's format string
1651 buf[0] = '\0';
1652 return -1;
1653 #else // possibly using system version
1654 // assume it only returns error if there is not enough space, but
1655 // as we don't know how much we need, double the current size of
1656 // the buffer
1657 size *= 2;
1658 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1659 }
1660 else if ( len >= size )
1661 {
1662 #if wxUSE_WXVSNPRINTF
1663 // we know that our own implementation of wxVsnprintf() returns
1664 // size+1 when there's not enough space but that's not the size
1665 // of the required buffer!
1666 size *= 2; // so we just double the current size of the buffer
1667 #else
1668 // some vsnprintf() implementations NUL-terminate the buffer and
1669 // some don't in len == size case, to be safe always add 1
1670 size = len + 1;
1671 #endif
1672 }
1673 else // ok, there was enough space
1674 {
1675 break;
1676 }
1677 }
1678
1679 // we could have overshot
1680 str.Shrink();
1681
1682 return str.length();
1683 }
1684
1685 int wxString::PrintfV(const wxString& format, va_list argptr)
1686 {
1687 #if wxUSE_UNICODE_UTF8
1688 #if wxUSE_STL_BASED_WXSTRING
1689 typedef wxStringTypeBuffer<char> Utf8Buffer;
1690 #else
1691 typedef wxStringInternalBuffer Utf8Buffer;
1692 #endif
1693 #endif
1694
1695 #if wxUSE_UTF8_LOCALE_ONLY
1696 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1697 #else
1698 #if wxUSE_UNICODE_UTF8
1699 if ( wxLocaleIsUtf8 )
1700 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1701 else
1702 // wxChar* version
1703 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1704 #else
1705 return DoStringPrintfV(*this, format, argptr);
1706 #endif // UTF8/WCHAR
1707 #endif
1708 }
1709
1710 // ----------------------------------------------------------------------------
1711 // misc other operations
1712 // ----------------------------------------------------------------------------
1713
1714 // returns true if the string matches the pattern which may contain '*' and
1715 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1716 // of them)
1717 bool wxString::Matches(const wxString& mask) const
1718 {
1719 // I disable this code as it doesn't seem to be faster (in fact, it seems
1720 // to be much slower) than the old, hand-written code below and using it
1721 // here requires always linking with libregex even if the user code doesn't
1722 // use it
1723 #if 0 // wxUSE_REGEX
1724 // first translate the shell-like mask into a regex
1725 wxString pattern;
1726 pattern.reserve(wxStrlen(pszMask));
1727
1728 pattern += _T('^');
1729 while ( *pszMask )
1730 {
1731 switch ( *pszMask )
1732 {
1733 case _T('?'):
1734 pattern += _T('.');
1735 break;
1736
1737 case _T('*'):
1738 pattern += _T(".*");
1739 break;
1740
1741 case _T('^'):
1742 case _T('.'):
1743 case _T('$'):
1744 case _T('('):
1745 case _T(')'):
1746 case _T('|'):
1747 case _T('+'):
1748 case _T('\\'):
1749 // these characters are special in a RE, quote them
1750 // (however note that we don't quote '[' and ']' to allow
1751 // using them for Unix shell like matching)
1752 pattern += _T('\\');
1753 // fall through
1754
1755 default:
1756 pattern += *pszMask;
1757 }
1758
1759 pszMask++;
1760 }
1761 pattern += _T('$');
1762
1763 // and now use it
1764 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1765 #else // !wxUSE_REGEX
1766 // TODO: this is, of course, awfully inefficient...
1767
1768 // FIXME-UTF8: implement using iterators, remove #if
1769 #if wxUSE_UNICODE_UTF8
1770 wxWCharBuffer maskBuf = mask.wc_str();
1771 wxWCharBuffer txtBuf = wc_str();
1772 const wxChar *pszMask = maskBuf.data();
1773 const wxChar *pszTxt = txtBuf.data();
1774 #else
1775 const wxChar *pszMask = mask.wx_str();
1776 // the char currently being checked
1777 const wxChar *pszTxt = wx_str();
1778 #endif
1779
1780 // the last location where '*' matched
1781 const wxChar *pszLastStarInText = NULL;
1782 const wxChar *pszLastStarInMask = NULL;
1783
1784 match:
1785 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1786 switch ( *pszMask ) {
1787 case wxT('?'):
1788 if ( *pszTxt == wxT('\0') )
1789 return false;
1790
1791 // pszTxt and pszMask will be incremented in the loop statement
1792
1793 break;
1794
1795 case wxT('*'):
1796 {
1797 // remember where we started to be able to backtrack later
1798 pszLastStarInText = pszTxt;
1799 pszLastStarInMask = pszMask;
1800
1801 // ignore special chars immediately following this one
1802 // (should this be an error?)
1803 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1804 pszMask++;
1805
1806 // if there is nothing more, match
1807 if ( *pszMask == wxT('\0') )
1808 return true;
1809
1810 // are there any other metacharacters in the mask?
1811 size_t uiLenMask;
1812 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1813
1814 if ( pEndMask != NULL ) {
1815 // we have to match the string between two metachars
1816 uiLenMask = pEndMask - pszMask;
1817 }
1818 else {
1819 // we have to match the remainder of the string
1820 uiLenMask = wxStrlen(pszMask);
1821 }
1822
1823 wxString strToMatch(pszMask, uiLenMask);
1824 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1825 if ( pMatch == NULL )
1826 return false;
1827
1828 // -1 to compensate "++" in the loop
1829 pszTxt = pMatch + uiLenMask - 1;
1830 pszMask += uiLenMask - 1;
1831 }
1832 break;
1833
1834 default:
1835 if ( *pszMask != *pszTxt )
1836 return false;
1837 break;
1838 }
1839 }
1840
1841 // match only if nothing left
1842 if ( *pszTxt == wxT('\0') )
1843 return true;
1844
1845 // if we failed to match, backtrack if we can
1846 if ( pszLastStarInText ) {
1847 pszTxt = pszLastStarInText + 1;
1848 pszMask = pszLastStarInMask;
1849
1850 pszLastStarInText = NULL;
1851
1852 // don't bother resetting pszLastStarInMask, it's unnecessary
1853
1854 goto match;
1855 }
1856
1857 return false;
1858 #endif // wxUSE_REGEX/!wxUSE_REGEX
1859 }
1860
1861 // Count the number of chars
1862 int wxString::Freq(wxUniChar ch) const
1863 {
1864 int count = 0;
1865 for ( const_iterator i = begin(); i != end(); ++i )
1866 {
1867 if ( *i == ch )
1868 count ++;
1869 }
1870 return count;
1871 }
1872
1873 // convert to upper case, return the copy of the string
1874 wxString wxString::Upper() const
1875 { wxString s(*this); return s.MakeUpper(); }
1876
1877 // convert to lower case, return the copy of the string
1878 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
1879
1880 // ----------------------------------------------------------------------------
1881 // wxUTF8StringBuffer
1882 // ----------------------------------------------------------------------------
1883
1884 #if wxUSE_UNICODE_WCHAR
1885 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1886 {
1887 wxMBConvStrictUTF8 conv;
1888 size_t wlen = conv.ToWChar(NULL, 0, m_buf);
1889 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1890
1891 wxStringInternalBuffer wbuf(m_str, wlen);
1892 conv.ToWChar(wbuf, wlen, m_buf);
1893 }
1894
1895 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
1896 {
1897 wxCHECK_RET(m_lenSet, "length not set");
1898
1899 wxMBConvStrictUTF8 conv;
1900 size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
1901 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1902
1903 wxStringInternalBufferLength wbuf(m_str, wlen);
1904 conv.ToWChar(wbuf, wlen, m_buf, m_len);
1905 wbuf.SetLength(wlen);
1906 }
1907 #endif // wxUSE_UNICODE_WCHAR