]> git.saurik.com Git - wxWidgets.git/blob - src/common/string.cpp
fixed operator<<(ostream&, wxString) to output string contents and not wchar_t pointe...
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
16
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #endif
28
29 #include <ctype.h>
30
31 #ifndef __WXWINCE__
32 #include <errno.h>
33 #endif
34
35 #include <string.h>
36 #include <stdlib.h>
37
38 #ifdef __SALFORDC__
39 #include <clib.h>
40 #endif
41
42 #include "wx/hashmap.h"
43
44 // string handling functions used by wxString:
45 #if wxUSE_UNICODE_UTF8
46 #define wxStringMemcpy memcpy
47 #define wxStringMemcmp memcmp
48 #define wxStringMemchr memchr
49 #define wxStringStrlen strlen
50 #else
51 #define wxStringMemcpy wxTmemcpy
52 #define wxStringMemcmp wxTmemcmp
53 #define wxStringMemchr wxTmemchr
54 #define wxStringStrlen wxStrlen
55 #endif
56
57
58 // ---------------------------------------------------------------------------
59 // static class variables definition
60 // ---------------------------------------------------------------------------
61
62 //According to STL _must_ be a -1 size_t
63 const size_t wxString::npos = (size_t) -1;
64
65 // ----------------------------------------------------------------------------
66 // global functions
67 // ----------------------------------------------------------------------------
68
69 #if wxUSE_STD_IOSTREAM
70
71 #include <iostream>
72
73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
74 {
75 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
76 return os << (const char *)str.AsCharBuf();
77 #else
78 return os << str.AsInternal();
79 #endif
80 }
81
82 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
83 {
84 return os << str.c_str();
85 }
86
87 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
88 {
89 return os << str.data();
90 }
91
92 #ifndef __BORLANDC__
93 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
94 {
95 return os << str.data();
96 }
97 #endif
98
99 #endif // wxUSE_STD_IOSTREAM
100
101 // ===========================================================================
102 // wxString class core
103 // ===========================================================================
104
105 #if wxUSE_UNICODE_UTF8
106
107 void wxString::PosLenToImpl(size_t pos, size_t len,
108 size_t *implPos, size_t *implLen) const
109 {
110 if ( pos == npos )
111 *implPos = npos;
112 else
113 {
114 const_iterator i = begin() + pos;
115 *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
116 if ( len == npos )
117 *implLen = npos;
118 else
119 {
120 // too large length is interpreted as "to the end of the string"
121 // FIXME-UTF8: verify this is the case in std::string, assert
122 // otherwise
123 if ( pos + len > length() )
124 len = length() - pos;
125
126 *implLen = (i + len).impl() - i.impl();
127 }
128 }
129 }
130
131 #endif // wxUSE_UNICODE_UTF8
132
133 // ----------------------------------------------------------------------------
134 // wxCStrData converted strings caching
135 // ----------------------------------------------------------------------------
136
137 // FIXME-UTF8: temporarily disabled because it doesn't work with global
138 // string objects; re-enable after fixing this bug and benchmarking
139 // performance to see if using a hash is a good idea at all
140 #if 0
141
142 // For backward compatibility reasons, it must be possible to assign the value
143 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
144 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
145 // because the memory would be freed immediately, but it has to be valid as long
146 // as the string is not modified, so that code like this still works:
147 //
148 // const wxChar *s = str.c_str();
149 // while ( s ) { ... }
150
151 // FIXME-UTF8: not thread safe!
152 // FIXME-UTF8: we currently clear the cached conversion only when the string is
153 // destroyed, but we should do it when the string is modified, to
154 // keep memory usage down
155 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
156 // invalidated the cache on every change, we could keep the previous
157 // conversion
158 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
159 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
160
161 template<typename T>
162 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
163 {
164 typename T::iterator i = hash.find(wxConstCast(s, wxString));
165 if ( i != hash.end() )
166 {
167 free(i->second);
168 hash.erase(i);
169 }
170 }
171
172 #if wxUSE_UNICODE
173 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
174 // so we have to use wxString* here and const-cast when used
175 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
176 wxStringCharConversionCache);
177 static wxStringCharConversionCache gs_stringsCharCache;
178
179 const char* wxCStrData::AsChar() const
180 {
181 // remove previously cache value, if any (see FIXMEs above):
182 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
183
184 // convert the string and keep it:
185 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
186 m_str->mb_str().release();
187
188 return s + m_offset;
189 }
190 #endif // wxUSE_UNICODE
191
192 #if !wxUSE_UNICODE_WCHAR
193 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
194 wxStringWCharConversionCache);
195 static wxStringWCharConversionCache gs_stringsWCharCache;
196
197 const wchar_t* wxCStrData::AsWChar() const
198 {
199 // remove previously cache value, if any (see FIXMEs above):
200 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
201
202 // convert the string and keep it:
203 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
204 m_str->wc_str().release();
205
206 return s + m_offset;
207 }
208 #endif // !wxUSE_UNICODE_WCHAR
209
210 wxString::~wxString()
211 {
212 #if wxUSE_UNICODE
213 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
214 DeleteStringFromConversionCache(gs_stringsCharCache, this);
215 #endif
216 #if !wxUSE_UNICODE_WCHAR
217 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
218 #endif
219 }
220 #endif
221
222 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
223 const char* wxCStrData::AsChar() const
224 {
225 #if wxUSE_UNICODE_UTF8
226 if ( wxLocaleIsUtf8 )
227 return AsInternal();
228 #endif
229 // under non-UTF8 locales, we have to convert the internal UTF-8
230 // representation using wxConvLibc and cache the result
231
232 wxString *str = wxConstCast(m_str, wxString);
233
234 // convert the string:
235 //
236 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
237 // have it) but it's unfortunately not obvious to implement
238 // because we don't know how big buffer do we need for the
239 // given string length (in case of multibyte encodings, e.g.
240 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
241 //
242 // One idea would be to store more than just m_convertedToChar
243 // in wxString: then we could record the length of the string
244 // which was converted the last time and try to reuse the same
245 // buffer if the current length is not greater than it (this
246 // could still fail because string could have been modified in
247 // place but it would work most of the time, so we'd do it and
248 // only allocate the new buffer if in-place conversion returned
249 // an error). We could also store a bit saying if the string
250 // was modified since the last conversion (and update it in all
251 // operation modifying the string, of course) to avoid unneeded
252 // consequential conversions. But both of these ideas require
253 // adding more fields to wxString and require profiling results
254 // to be sure that we really gain enough from them to justify
255 // doing it.
256 wxCharBuffer buf(str->mb_str());
257
258 // if it failed, return empty string and not NULL to avoid crashes in code
259 // written with either wxWidgets 2 wxString or std::string behaviour in
260 // mind: neither of them ever returns NULL and so we shouldn't neither
261 if ( !buf )
262 return "";
263
264 if ( str->m_convertedToChar &&
265 strlen(buf) == strlen(str->m_convertedToChar) )
266 {
267 // keep the same buffer for as long as possible, so that several calls
268 // to c_str() in a row still work:
269 strcpy(str->m_convertedToChar, buf);
270 }
271 else
272 {
273 str->m_convertedToChar = buf.release();
274 }
275
276 // and keep it:
277 return str->m_convertedToChar + m_offset;
278 }
279 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
280
281 #if !wxUSE_UNICODE_WCHAR
282 const wchar_t* wxCStrData::AsWChar() const
283 {
284 wxString *str = wxConstCast(m_str, wxString);
285
286 // convert the string:
287 wxWCharBuffer buf(str->wc_str());
288
289 // notice that here, unlike above in AsChar(), conversion can't fail as our
290 // internal UTF-8 is always well-formed -- or the string was corrupted and
291 // all bets are off anyhow
292
293 // FIXME-UTF8: do the conversion in-place in the existing buffer
294 if ( str->m_convertedToWChar &&
295 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
296 {
297 // keep the same buffer for as long as possible, so that several calls
298 // to c_str() in a row still work:
299 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
300 }
301 else
302 {
303 str->m_convertedToWChar = buf.release();
304 }
305
306 // and keep it:
307 return str->m_convertedToWChar + m_offset;
308 }
309 #endif // !wxUSE_UNICODE_WCHAR
310
311 // ===========================================================================
312 // wxString class core
313 // ===========================================================================
314
315 // ---------------------------------------------------------------------------
316 // construction and conversion
317 // ---------------------------------------------------------------------------
318
319 #if wxUSE_UNICODE_WCHAR
320 /* static */
321 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
322 const wxMBConv& conv)
323 {
324 // anything to do?
325 if ( !psz || nLength == 0 )
326 return SubstrBufFromMB(L"", 0);
327
328 if ( nLength == npos )
329 nLength = wxNO_LEN;
330
331 size_t wcLen;
332 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
333 if ( !wcLen )
334 return SubstrBufFromMB(_T(""), 0);
335 else
336 return SubstrBufFromMB(wcBuf, wcLen);
337 }
338 #endif // wxUSE_UNICODE_WCHAR
339
340 #if wxUSE_UNICODE_UTF8
341 /* static */
342 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
343 const wxMBConv& conv)
344 {
345 // anything to do?
346 if ( !psz || nLength == 0 )
347 return SubstrBufFromMB("", 0);
348
349 // if psz is already in UTF-8, we don't have to do the roundtrip to
350 // wchar_t* and back:
351 if ( conv.IsUTF8() )
352 {
353 // we need to validate the input because UTF8 iterators assume valid
354 // UTF-8 sequence and psz may be invalid:
355 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
356 {
357 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
358 }
359 // else: do the roundtrip through wchar_t*
360 }
361
362 if ( nLength == npos )
363 nLength = wxNO_LEN;
364
365 // first convert to wide string:
366 size_t wcLen;
367 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
368 if ( !wcLen )
369 return SubstrBufFromMB("", 0);
370
371 // and then to UTF-8:
372 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
373 // widechar -> UTF-8 conversion isn't supposed to ever fail:
374 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
375
376 return buf;
377 }
378 #endif // wxUSE_UNICODE_UTF8
379
380 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
381 /* static */
382 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
383 const wxMBConv& conv)
384 {
385 // anything to do?
386 if ( !pwz || nLength == 0 )
387 return SubstrBufFromWC("", 0);
388
389 if ( nLength == npos )
390 nLength = wxNO_LEN;
391
392 size_t mbLen;
393 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
394 if ( !mbLen )
395 return SubstrBufFromWC("", 0);
396 else
397 return SubstrBufFromWC(mbBuf, mbLen);
398 }
399 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
400
401
402 #if wxUSE_UNICODE_WCHAR
403
404 //Convert wxString in Unicode mode to a multi-byte string
405 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
406 {
407 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
408 }
409
410 #elif wxUSE_UNICODE_UTF8
411
412 const wxWCharBuffer wxString::wc_str() const
413 {
414 return wxMBConvStrictUTF8().cMB2WC
415 (
416 m_impl.c_str(),
417 m_impl.length() + 1, // size, not length
418 NULL
419 );
420 }
421
422 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
423 {
424 if ( conv.IsUTF8() )
425 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
426
427 // FIXME-UTF8: use wc_str() here once we have buffers with length
428
429 size_t wcLen;
430 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
431 (
432 m_impl.c_str(),
433 m_impl.length() + 1, // size
434 &wcLen
435 ));
436 if ( !wcLen )
437 return wxCharBuffer("");
438
439 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
440 }
441
442 #else // ANSI
443
444 //Converts this string to a wide character string if unicode
445 //mode is not enabled and wxUSE_WCHAR_T is enabled
446 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
447 {
448 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
449 }
450
451 #endif // Unicode/ANSI
452
453 // shrink to minimal size (releasing extra memory)
454 bool wxString::Shrink()
455 {
456 wxString tmp(begin(), end());
457 swap(tmp);
458 return tmp.length() == length();
459 }
460
461 // deprecated compatibility code:
462 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
463 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
464 {
465 return DoGetWriteBuf(nLen);
466 }
467
468 void wxString::UngetWriteBuf()
469 {
470 DoUngetWriteBuf();
471 }
472
473 void wxString::UngetWriteBuf(size_t nLen)
474 {
475 DoUngetWriteBuf(nLen);
476 }
477 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
478
479
480 // ---------------------------------------------------------------------------
481 // data access
482 // ---------------------------------------------------------------------------
483
484 // all functions are inline in string.h
485
486 // ---------------------------------------------------------------------------
487 // concatenation operators
488 // ---------------------------------------------------------------------------
489
490 /*
491 * concatenation functions come in 5 flavours:
492 * string + string
493 * char + string and string + char
494 * C str + string and string + C str
495 */
496
497 wxString operator+(const wxString& str1, const wxString& str2)
498 {
499 #if !wxUSE_STL_BASED_WXSTRING
500 wxASSERT( str1.IsValid() );
501 wxASSERT( str2.IsValid() );
502 #endif
503
504 wxString s = str1;
505 s += str2;
506
507 return s;
508 }
509
510 wxString operator+(const wxString& str, wxUniChar ch)
511 {
512 #if !wxUSE_STL_BASED_WXSTRING
513 wxASSERT( str.IsValid() );
514 #endif
515
516 wxString s = str;
517 s += ch;
518
519 return s;
520 }
521
522 wxString operator+(wxUniChar ch, const wxString& str)
523 {
524 #if !wxUSE_STL_BASED_WXSTRING
525 wxASSERT( str.IsValid() );
526 #endif
527
528 wxString s = ch;
529 s += str;
530
531 return s;
532 }
533
534 wxString operator+(const wxString& str, const char *psz)
535 {
536 #if !wxUSE_STL_BASED_WXSTRING
537 wxASSERT( str.IsValid() );
538 #endif
539
540 wxString s;
541 if ( !s.Alloc(strlen(psz) + str.length()) ) {
542 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
543 }
544 s += str;
545 s += psz;
546
547 return s;
548 }
549
550 wxString operator+(const wxString& str, const wchar_t *pwz)
551 {
552 #if !wxUSE_STL_BASED_WXSTRING
553 wxASSERT( str.IsValid() );
554 #endif
555
556 wxString s;
557 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
558 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
559 }
560 s += str;
561 s += pwz;
562
563 return s;
564 }
565
566 wxString operator+(const char *psz, const wxString& str)
567 {
568 #if !wxUSE_STL_BASED_WXSTRING
569 wxASSERT( str.IsValid() );
570 #endif
571
572 wxString s;
573 if ( !s.Alloc(strlen(psz) + str.length()) ) {
574 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
575 }
576 s = psz;
577 s += str;
578
579 return s;
580 }
581
582 wxString operator+(const wchar_t *pwz, const wxString& str)
583 {
584 #if !wxUSE_STL_BASED_WXSTRING
585 wxASSERT( str.IsValid() );
586 #endif
587
588 wxString s;
589 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
590 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
591 }
592 s = pwz;
593 s += str;
594
595 return s;
596 }
597
598 // ---------------------------------------------------------------------------
599 // string comparison
600 // ---------------------------------------------------------------------------
601
602 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
603 {
604 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
605 : wxToupper(GetChar(0u)) == wxToupper(c));
606 }
607
608 #ifdef HAVE_STD_STRING_COMPARE
609
610 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
611 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
612 // sort strings in characters code point order by sorting the byte sequence
613 // in byte values order (i.e. what strcmp() and memcmp() do).
614
615 int wxString::compare(const wxString& str) const
616 {
617 return m_impl.compare(str.m_impl);
618 }
619
620 int wxString::compare(size_t nStart, size_t nLen,
621 const wxString& str) const
622 {
623 size_t pos, len;
624 PosLenToImpl(nStart, nLen, &pos, &len);
625 return m_impl.compare(pos, len, str.m_impl);
626 }
627
628 int wxString::compare(size_t nStart, size_t nLen,
629 const wxString& str,
630 size_t nStart2, size_t nLen2) const
631 {
632 size_t pos, len;
633 PosLenToImpl(nStart, nLen, &pos, &len);
634
635 size_t pos2, len2;
636 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
637
638 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
639 }
640
641 int wxString::compare(const char* sz) const
642 {
643 return m_impl.compare(ImplStr(sz));
644 }
645
646 int wxString::compare(const wchar_t* sz) const
647 {
648 return m_impl.compare(ImplStr(sz));
649 }
650
651 int wxString::compare(size_t nStart, size_t nLen,
652 const char* sz, size_t nCount) const
653 {
654 size_t pos, len;
655 PosLenToImpl(nStart, nLen, &pos, &len);
656
657 SubstrBufFromMB str(ImplStr(sz, nCount));
658
659 return m_impl.compare(pos, len, str.data, str.len);
660 }
661
662 int wxString::compare(size_t nStart, size_t nLen,
663 const wchar_t* sz, size_t nCount) const
664 {
665 size_t pos, len;
666 PosLenToImpl(nStart, nLen, &pos, &len);
667
668 SubstrBufFromWC str(ImplStr(sz, nCount));
669
670 return m_impl.compare(pos, len, str.data, str.len);
671 }
672
673 #else // !HAVE_STD_STRING_COMPARE
674
675 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
676 const wxStringCharType* s2, size_t l2)
677 {
678 if( l1 == l2 )
679 return wxStringMemcmp(s1, s2, l1);
680 else if( l1 < l2 )
681 {
682 int ret = wxStringMemcmp(s1, s2, l1);
683 return ret == 0 ? -1 : ret;
684 }
685 else
686 {
687 int ret = wxStringMemcmp(s1, s2, l2);
688 return ret == 0 ? +1 : ret;
689 }
690 }
691
692 int wxString::compare(const wxString& str) const
693 {
694 return ::wxDoCmp(m_impl.data(), m_impl.length(),
695 str.m_impl.data(), str.m_impl.length());
696 }
697
698 int wxString::compare(size_t nStart, size_t nLen,
699 const wxString& str) const
700 {
701 wxASSERT(nStart <= length());
702 size_type strLen = length() - nStart;
703 nLen = strLen < nLen ? strLen : nLen;
704
705 size_t pos, len;
706 PosLenToImpl(nStart, nLen, &pos, &len);
707
708 return ::wxDoCmp(m_impl.data() + pos, len,
709 str.m_impl.data(), str.m_impl.length());
710 }
711
712 int wxString::compare(size_t nStart, size_t nLen,
713 const wxString& str,
714 size_t nStart2, size_t nLen2) const
715 {
716 wxASSERT(nStart <= length());
717 wxASSERT(nStart2 <= str.length());
718 size_type strLen = length() - nStart,
719 strLen2 = str.length() - nStart2;
720 nLen = strLen < nLen ? strLen : nLen;
721 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
722
723 size_t pos, len;
724 PosLenToImpl(nStart, nLen, &pos, &len);
725 size_t pos2, len2;
726 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
727
728 return ::wxDoCmp(m_impl.data() + pos, len,
729 str.m_impl.data() + pos2, len2);
730 }
731
732 int wxString::compare(const char* sz) const
733 {
734 SubstrBufFromMB str(ImplStr(sz, npos));
735 if ( str.len == npos )
736 str.len = wxStringStrlen(str.data);
737 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
738 }
739
740 int wxString::compare(const wchar_t* sz) const
741 {
742 SubstrBufFromWC str(ImplStr(sz, npos));
743 if ( str.len == npos )
744 str.len = wxStringStrlen(str.data);
745 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
746 }
747
748 int wxString::compare(size_t nStart, size_t nLen,
749 const char* sz, size_t nCount) const
750 {
751 wxASSERT(nStart <= length());
752 size_type strLen = length() - nStart;
753 nLen = strLen < nLen ? strLen : nLen;
754
755 size_t pos, len;
756 PosLenToImpl(nStart, nLen, &pos, &len);
757
758 SubstrBufFromMB str(ImplStr(sz, nCount));
759 if ( str.len == npos )
760 str.len = wxStringStrlen(str.data);
761
762 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
763 }
764
765 int wxString::compare(size_t nStart, size_t nLen,
766 const wchar_t* sz, size_t nCount) const
767 {
768 wxASSERT(nStart <= length());
769 size_type strLen = length() - nStart;
770 nLen = strLen < nLen ? strLen : nLen;
771
772 size_t pos, len;
773 PosLenToImpl(nStart, nLen, &pos, &len);
774
775 SubstrBufFromWC str(ImplStr(sz, nCount));
776 if ( str.len == npos )
777 str.len = wxStringStrlen(str.data);
778
779 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
780 }
781
782 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
783
784
785 // ---------------------------------------------------------------------------
786 // find_{first,last}_[not]_of functions
787 // ---------------------------------------------------------------------------
788
789 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
790
791 // NB: All these functions are implemented with the argument being wxChar*,
792 // i.e. widechar string in any Unicode build, even though native string
793 // representation is char* in the UTF-8 build. This is because we couldn't
794 // use memchr() to determine if a character is in a set encoded as UTF-8.
795
796 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
797 {
798 return find_first_of(sz, nStart, wxStrlen(sz));
799 }
800
801 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
802 {
803 return find_first_not_of(sz, nStart, wxStrlen(sz));
804 }
805
806 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
807 {
808 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
809
810 size_t idx = nStart;
811 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
812 {
813 if ( wxTmemchr(sz, *i, n) )
814 return idx;
815 }
816
817 return npos;
818 }
819
820 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
821 {
822 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
823
824 size_t idx = nStart;
825 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
826 {
827 if ( !wxTmemchr(sz, *i, n) )
828 return idx;
829 }
830
831 return npos;
832 }
833
834
835 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
836 {
837 return find_last_of(sz, nStart, wxStrlen(sz));
838 }
839
840 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
841 {
842 return find_last_not_of(sz, nStart, wxStrlen(sz));
843 }
844
845 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
846 {
847 size_t len = length();
848
849 if ( nStart == npos )
850 {
851 nStart = len - 1;
852 }
853 else
854 {
855 wxASSERT_MSG( nStart <= len, _T("invalid index") );
856 }
857
858 size_t idx = nStart;
859 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
860 i != rend(); --idx, ++i )
861 {
862 if ( wxTmemchr(sz, *i, n) )
863 return idx;
864 }
865
866 return npos;
867 }
868
869 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
870 {
871 size_t len = length();
872
873 if ( nStart == npos )
874 {
875 nStart = len - 1;
876 }
877 else
878 {
879 wxASSERT_MSG( nStart <= len, _T("invalid index") );
880 }
881
882 size_t idx = nStart;
883 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
884 i != rend(); --idx, ++i )
885 {
886 if ( !wxTmemchr(sz, *i, n) )
887 return idx;
888 }
889
890 return npos;
891 }
892
893 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
894 {
895 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
896
897 size_t idx = nStart;
898 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
899 {
900 if ( *i != ch )
901 return idx;
902 }
903
904 return npos;
905 }
906
907 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
908 {
909 size_t len = length();
910
911 if ( nStart == npos )
912 {
913 nStart = len - 1;
914 }
915 else
916 {
917 wxASSERT_MSG( nStart <= len, _T("invalid index") );
918 }
919
920 size_t idx = nStart;
921 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
922 i != rend(); --idx, ++i )
923 {
924 if ( *i != ch )
925 return idx;
926 }
927
928 return npos;
929 }
930
931 // the functions above were implemented for wchar_t* arguments in Unicode
932 // build and char* in ANSI build; below are implementations for the other
933 // version:
934 #if wxUSE_UNICODE
935 #define wxOtherCharType char
936 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
937 #else
938 #define wxOtherCharType wchar_t
939 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
940 #endif
941
942 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
943 { return find_first_of(STRCONV(sz), nStart); }
944
945 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
946 size_t n) const
947 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
948 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
949 { return find_last_of(STRCONV(sz), nStart); }
950 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
951 size_t n) const
952 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
953 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
954 { return find_first_not_of(STRCONV(sz), nStart); }
955 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
956 size_t n) const
957 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
958 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
959 { return find_last_not_of(STRCONV(sz), nStart); }
960 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
961 size_t n) const
962 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
963
964 #undef wxOtherCharType
965 #undef STRCONV
966
967 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
968
969 // ===========================================================================
970 // other common string functions
971 // ===========================================================================
972
973 int wxString::CmpNoCase(const wxString& s) const
974 {
975 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
976
977 const_iterator i1 = begin();
978 const_iterator end1 = end();
979 const_iterator i2 = s.begin();
980 const_iterator end2 = s.end();
981
982 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
983 {
984 wxUniChar lower1 = (wxChar)wxTolower(*i1);
985 wxUniChar lower2 = (wxChar)wxTolower(*i2);
986 if ( lower1 != lower2 )
987 return lower1 < lower2 ? -1 : 1;
988 }
989
990 size_t len1 = length();
991 size_t len2 = s.length();
992
993 if ( len1 < len2 )
994 return -1;
995 else if ( len1 > len2 )
996 return 1;
997 return 0;
998 }
999
1000
1001 #if wxUSE_UNICODE
1002
1003 #ifdef __MWERKS__
1004 #ifndef __SCHAR_MAX__
1005 #define __SCHAR_MAX__ 127
1006 #endif
1007 #endif
1008
1009 wxString wxString::FromAscii(const char *ascii, size_t len)
1010 {
1011 if (!ascii || len == 0)
1012 return wxEmptyString;
1013
1014 wxString res;
1015
1016 {
1017 wxStringInternalBuffer buf(res, len);
1018 wxStringCharType *dest = buf;
1019
1020 for ( ; len > 0; --len )
1021 {
1022 unsigned char c = (unsigned char)*ascii++;
1023 wxASSERT_MSG( c < 0x80,
1024 _T("Non-ASCII value passed to FromAscii().") );
1025
1026 *dest++ = (wchar_t)c;
1027 }
1028 }
1029
1030 return res;
1031 }
1032
1033 wxString wxString::FromAscii(const char *ascii)
1034 {
1035 return FromAscii(ascii, wxStrlen(ascii));
1036 }
1037
1038 wxString wxString::FromAscii(char ascii)
1039 {
1040 // What do we do with '\0' ?
1041
1042 unsigned char c = (unsigned char)ascii;
1043
1044 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1045
1046 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1047 return wxString(wxUniChar((wchar_t)c));
1048 }
1049
1050 const wxCharBuffer wxString::ToAscii() const
1051 {
1052 // this will allocate enough space for the terminating NUL too
1053 wxCharBuffer buffer(length());
1054 char *dest = buffer.data();
1055
1056 for ( const_iterator i = begin(); i != end(); ++i )
1057 {
1058 wxUniChar c(*i);
1059 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1060 *dest++ = c.IsAscii() ? (char)c : '_';
1061
1062 // the output string can't have embedded NULs anyhow, so we can safely
1063 // stop at first of them even if we do have any
1064 if ( !c )
1065 break;
1066 }
1067
1068 return buffer;
1069 }
1070
1071 #endif // wxUSE_UNICODE
1072
1073 // extract string of length nCount starting at nFirst
1074 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1075 {
1076 size_t nLen = length();
1077
1078 // default value of nCount is npos and means "till the end"
1079 if ( nCount == npos )
1080 {
1081 nCount = nLen - nFirst;
1082 }
1083
1084 // out-of-bounds requests return sensible things
1085 if ( nFirst + nCount > nLen )
1086 {
1087 nCount = nLen - nFirst;
1088 }
1089
1090 if ( nFirst > nLen )
1091 {
1092 // AllocCopy() will return empty string
1093 return wxEmptyString;
1094 }
1095
1096 wxString dest(*this, nFirst, nCount);
1097 if ( dest.length() != nCount )
1098 {
1099 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1100 }
1101
1102 return dest;
1103 }
1104
1105 // check that the string starts with prefix and return the rest of the string
1106 // in the provided pointer if it is not NULL, otherwise return false
1107 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1108 {
1109 if ( compare(0, prefix.length(), prefix) != 0 )
1110 return false;
1111
1112 if ( rest )
1113 {
1114 // put the rest of the string into provided pointer
1115 rest->assign(*this, prefix.length(), npos);
1116 }
1117
1118 return true;
1119 }
1120
1121
1122 // check that the string ends with suffix and return the rest of it in the
1123 // provided pointer if it is not NULL, otherwise return false
1124 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1125 {
1126 int start = length() - suffix.length();
1127
1128 if ( start < 0 || compare(start, npos, suffix) != 0 )
1129 return false;
1130
1131 if ( rest )
1132 {
1133 // put the rest of the string into provided pointer
1134 rest->assign(*this, 0, start);
1135 }
1136
1137 return true;
1138 }
1139
1140
1141 // extract nCount last (rightmost) characters
1142 wxString wxString::Right(size_t nCount) const
1143 {
1144 if ( nCount > length() )
1145 nCount = length();
1146
1147 wxString dest(*this, length() - nCount, nCount);
1148 if ( dest.length() != nCount ) {
1149 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1150 }
1151 return dest;
1152 }
1153
1154 // get all characters after the last occurence of ch
1155 // (returns the whole string if ch not found)
1156 wxString wxString::AfterLast(wxUniChar ch) const
1157 {
1158 wxString str;
1159 int iPos = Find(ch, true);
1160 if ( iPos == wxNOT_FOUND )
1161 str = *this;
1162 else
1163 str = wx_str() + iPos + 1;
1164
1165 return str;
1166 }
1167
1168 // extract nCount first (leftmost) characters
1169 wxString wxString::Left(size_t nCount) const
1170 {
1171 if ( nCount > length() )
1172 nCount = length();
1173
1174 wxString dest(*this, 0, nCount);
1175 if ( dest.length() != nCount ) {
1176 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1177 }
1178 return dest;
1179 }
1180
1181 // get all characters before the first occurence of ch
1182 // (returns the whole string if ch not found)
1183 wxString wxString::BeforeFirst(wxUniChar ch) const
1184 {
1185 int iPos = Find(ch);
1186 if ( iPos == wxNOT_FOUND ) iPos = length();
1187 return wxString(*this, 0, iPos);
1188 }
1189
1190 /// get all characters before the last occurence of ch
1191 /// (returns empty string if ch not found)
1192 wxString wxString::BeforeLast(wxUniChar ch) const
1193 {
1194 wxString str;
1195 int iPos = Find(ch, true);
1196 if ( iPos != wxNOT_FOUND && iPos != 0 )
1197 str = wxString(c_str(), iPos);
1198
1199 return str;
1200 }
1201
1202 /// get all characters after the first occurence of ch
1203 /// (returns empty string if ch not found)
1204 wxString wxString::AfterFirst(wxUniChar ch) const
1205 {
1206 wxString str;
1207 int iPos = Find(ch);
1208 if ( iPos != wxNOT_FOUND )
1209 str = wx_str() + iPos + 1;
1210
1211 return str;
1212 }
1213
1214 // replace first (or all) occurences of some substring with another one
1215 size_t wxString::Replace(const wxString& strOld,
1216 const wxString& strNew, bool bReplaceAll)
1217 {
1218 // if we tried to replace an empty string we'd enter an infinite loop below
1219 wxCHECK_MSG( !strOld.empty(), 0,
1220 _T("wxString::Replace(): invalid parameter") );
1221
1222 size_t uiCount = 0; // count of replacements made
1223
1224 size_t uiOldLen = strOld.length();
1225 size_t uiNewLen = strNew.length();
1226
1227 size_t dwPos = 0;
1228
1229 while ( (*this)[dwPos] != wxT('\0') )
1230 {
1231 //DO NOT USE STRSTR HERE
1232 //this string can contain embedded null characters,
1233 //so strstr will function incorrectly
1234 dwPos = find(strOld, dwPos);
1235 if ( dwPos == npos )
1236 break; // exit the loop
1237 else
1238 {
1239 //replace this occurance of the old string with the new one
1240 replace(dwPos, uiOldLen, strNew, uiNewLen);
1241
1242 //move up pos past the string that was replaced
1243 dwPos += uiNewLen;
1244
1245 //increase replace count
1246 ++uiCount;
1247
1248 // stop now?
1249 if ( !bReplaceAll )
1250 break; // exit the loop
1251 }
1252 }
1253
1254 return uiCount;
1255 }
1256
1257 bool wxString::IsAscii() const
1258 {
1259 for ( const_iterator i = begin(); i != end(); ++i )
1260 {
1261 if ( !(*i).IsAscii() )
1262 return false;
1263 }
1264
1265 return true;
1266 }
1267
1268 bool wxString::IsWord() const
1269 {
1270 for ( const_iterator i = begin(); i != end(); ++i )
1271 {
1272 if ( !wxIsalpha(*i) )
1273 return false;
1274 }
1275
1276 return true;
1277 }
1278
1279 bool wxString::IsNumber() const
1280 {
1281 if ( empty() )
1282 return true;
1283
1284 const_iterator i = begin();
1285
1286 if ( *i == _T('-') || *i == _T('+') )
1287 ++i;
1288
1289 for ( ; i != end(); ++i )
1290 {
1291 if ( !wxIsdigit(*i) )
1292 return false;
1293 }
1294
1295 return true;
1296 }
1297
1298 wxString wxString::Strip(stripType w) const
1299 {
1300 wxString s = *this;
1301 if ( w & leading ) s.Trim(false);
1302 if ( w & trailing ) s.Trim(true);
1303 return s;
1304 }
1305
1306 // ---------------------------------------------------------------------------
1307 // case conversion
1308 // ---------------------------------------------------------------------------
1309
1310 wxString& wxString::MakeUpper()
1311 {
1312 for ( iterator it = begin(), en = end(); it != en; ++it )
1313 *it = (wxChar)wxToupper(*it);
1314
1315 return *this;
1316 }
1317
1318 wxString& wxString::MakeLower()
1319 {
1320 for ( iterator it = begin(), en = end(); it != en; ++it )
1321 *it = (wxChar)wxTolower(*it);
1322
1323 return *this;
1324 }
1325
1326 // ---------------------------------------------------------------------------
1327 // trimming and padding
1328 // ---------------------------------------------------------------------------
1329
1330 // some compilers (VC++ 6.0 not to name them) return true for a call to
1331 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1332 // live with this by checking that the character is a 7 bit one - even if this
1333 // may fail to detect some spaces (I don't know if Unicode doesn't have
1334 // space-like symbols somewhere except in the first 128 chars), it is arguably
1335 // still better than trimming away accented letters
1336 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1337
1338 // trims spaces (in the sense of isspace) from left or right side
1339 wxString& wxString::Trim(bool bFromRight)
1340 {
1341 // first check if we're going to modify the string at all
1342 if ( !empty() &&
1343 (
1344 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1345 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1346 )
1347 )
1348 {
1349 if ( bFromRight )
1350 {
1351 // find last non-space character
1352 reverse_iterator psz = rbegin();
1353 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1354 ++psz;
1355
1356 // truncate at trailing space start
1357 erase(psz.base(), end());
1358 }
1359 else
1360 {
1361 // find first non-space character
1362 iterator psz = begin();
1363 while ( (psz != end()) && wxSafeIsspace(*psz) )
1364 ++psz;
1365
1366 // fix up data and length
1367 erase(begin(), psz);
1368 }
1369 }
1370
1371 return *this;
1372 }
1373
1374 // adds nCount characters chPad to the string from either side
1375 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1376 {
1377 wxString s(chPad, nCount);
1378
1379 if ( bFromRight )
1380 *this += s;
1381 else
1382 {
1383 s += *this;
1384 swap(s);
1385 }
1386
1387 return *this;
1388 }
1389
1390 // truncate the string
1391 wxString& wxString::Truncate(size_t uiLen)
1392 {
1393 if ( uiLen < length() )
1394 {
1395 erase(begin() + uiLen, end());
1396 }
1397 //else: nothing to do, string is already short enough
1398
1399 return *this;
1400 }
1401
1402 // ---------------------------------------------------------------------------
1403 // finding (return wxNOT_FOUND if not found and index otherwise)
1404 // ---------------------------------------------------------------------------
1405
1406 // find a character
1407 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1408 {
1409 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1410
1411 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1412 }
1413
1414 // ----------------------------------------------------------------------------
1415 // conversion to numbers
1416 // ----------------------------------------------------------------------------
1417
1418 // The implementation of all the functions below is exactly the same so factor
1419 // it out. Note that number extraction works correctly on UTF-8 strings, so
1420 // we can use wxStringCharType and wx_str() for maximum efficiency.
1421
1422 #ifndef __WXWINCE__
1423 #define DO_IF_NOT_WINCE(x) x
1424 #else
1425 #define DO_IF_NOT_WINCE(x)
1426 #endif
1427
1428 #define WX_STRING_TO_INT_TYPE(val, base, func) \
1429 wxCHECK_MSG( val, false, _T("NULL output pointer") ); \
1430 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1431 \
1432 DO_IF_NOT_WINCE( errno = 0; ) \
1433 \
1434 const wxStringCharType *start = wx_str(); \
1435 wxStringCharType *end; \
1436 *val = func(start, &end, base); \
1437 \
1438 /* return true only if scan was stopped by the terminating NUL and */ \
1439 /* if the string was not empty to start with and no under/overflow */ \
1440 /* occurred: */ \
1441 return !*end && (end != start) \
1442 DO_IF_NOT_WINCE( && (errno != ERANGE) )
1443
1444 bool wxString::ToLong(long *val, int base) const
1445 {
1446 WX_STRING_TO_INT_TYPE(val, base, wxStrtol);
1447 }
1448
1449 bool wxString::ToULong(unsigned long *val, int base) const
1450 {
1451 WX_STRING_TO_INT_TYPE(val, base, wxStrtoul);
1452 }
1453
1454 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1455 {
1456 WX_STRING_TO_INT_TYPE(val, base, wxStrtoll);
1457 }
1458
1459 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1460 {
1461 WX_STRING_TO_INT_TYPE(val, base, wxStrtoull);
1462 }
1463
1464 bool wxString::ToDouble(double *val) const
1465 {
1466 wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1467
1468 #ifndef __WXWINCE__
1469 errno = 0;
1470 #endif
1471
1472 const wxChar *start = c_str();
1473 wxChar *end;
1474 *val = wxStrtod(start, &end);
1475
1476 // return true only if scan was stopped by the terminating NUL and if the
1477 // string was not empty to start with and no under/overflow occurred
1478 return !*end && (end != start)
1479 #ifndef __WXWINCE__
1480 && (errno != ERANGE)
1481 #endif
1482 ;
1483 }
1484
1485 // ---------------------------------------------------------------------------
1486 // formatted output
1487 // ---------------------------------------------------------------------------
1488
1489 #if !wxUSE_UTF8_LOCALE_ONLY
1490 /* static */
1491 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1492 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1493 #else
1494 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1495 #endif
1496 {
1497 va_list argptr;
1498 va_start(argptr, format);
1499
1500 wxString s;
1501 s.PrintfV(format, argptr);
1502
1503 va_end(argptr);
1504
1505 return s;
1506 }
1507 #endif // !wxUSE_UTF8_LOCALE_ONLY
1508
1509 #if wxUSE_UNICODE_UTF8
1510 /* static */
1511 wxString wxString::DoFormatUtf8(const char *format, ...)
1512 {
1513 va_list argptr;
1514 va_start(argptr, format);
1515
1516 wxString s;
1517 s.PrintfV(format, argptr);
1518
1519 va_end(argptr);
1520
1521 return s;
1522 }
1523 #endif // wxUSE_UNICODE_UTF8
1524
1525 /* static */
1526 wxString wxString::FormatV(const wxString& format, va_list argptr)
1527 {
1528 wxString s;
1529 s.PrintfV(format, argptr);
1530 return s;
1531 }
1532
1533 #if !wxUSE_UTF8_LOCALE_ONLY
1534 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1535 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1536 #else
1537 int wxString::DoPrintfWchar(const wxChar *format, ...)
1538 #endif
1539 {
1540 va_list argptr;
1541 va_start(argptr, format);
1542
1543 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1544 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1545 // because it's the only cast that works safely for downcasting when
1546 // multiple inheritance is used:
1547 wxString *str = static_cast<wxString*>(this);
1548 #else
1549 wxString *str = this;
1550 #endif
1551
1552 int iLen = str->PrintfV(format, argptr);
1553
1554 va_end(argptr);
1555
1556 return iLen;
1557 }
1558 #endif // !wxUSE_UTF8_LOCALE_ONLY
1559
1560 #if wxUSE_UNICODE_UTF8
1561 int wxString::DoPrintfUtf8(const char *format, ...)
1562 {
1563 va_list argptr;
1564 va_start(argptr, format);
1565
1566 int iLen = PrintfV(format, argptr);
1567
1568 va_end(argptr);
1569
1570 return iLen;
1571 }
1572 #endif // wxUSE_UNICODE_UTF8
1573
1574 #if wxUSE_UNICODE_UTF8
1575 template<typename BufferType>
1576 #else
1577 // we only need one version in non-UTF8 builds and at least two Windows
1578 // compilers have problems with this function template, so use just one
1579 // normal function here
1580 #endif
1581 static int DoStringPrintfV(wxString& str,
1582 const wxString& format, va_list argptr)
1583 {
1584 int size = 1024;
1585
1586 for ( ;; )
1587 {
1588 #if wxUSE_UNICODE_UTF8
1589 BufferType tmp(str, size + 1);
1590 typename BufferType::CharType *buf = tmp;
1591 #else
1592 wxStringBuffer tmp(str, size + 1);
1593 wxChar *buf = tmp;
1594 #endif
1595
1596 if ( !buf )
1597 {
1598 // out of memory
1599
1600 // in UTF-8 build, leaving uninitialized junk in the buffer
1601 // could result in invalid non-empty UTF-8 string, so just
1602 // reset the string to empty on failure:
1603 buf[0] = '\0';
1604 return -1;
1605 }
1606
1607 // wxVsnprintf() may modify the original arg pointer, so pass it
1608 // only a copy
1609 va_list argptrcopy;
1610 wxVaCopy(argptrcopy, argptr);
1611 int len = wxVsnprintf(buf, size, format, argptrcopy);
1612 va_end(argptrcopy);
1613
1614 // some implementations of vsnprintf() don't NUL terminate
1615 // the string if there is not enough space for it so
1616 // always do it manually
1617 buf[size] = _T('\0');
1618
1619 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1620 // total number of characters which would have been written if the
1621 // buffer were large enough (newer standards such as Unix98)
1622 if ( len < 0 )
1623 {
1624 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1625 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1626 // is true if *both* of them use our own implementation,
1627 // otherwise we can't be sure
1628 #if wxUSE_WXVSNPRINTF
1629 // we know that our own implementation of wxVsnprintf() returns -1
1630 // only for a format error - thus there's something wrong with
1631 // the user's format string
1632 buf[0] = '\0';
1633 return -1;
1634 #else // possibly using system version
1635 // assume it only returns error if there is not enough space, but
1636 // as we don't know how much we need, double the current size of
1637 // the buffer
1638 size *= 2;
1639 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1640 }
1641 else if ( len >= size )
1642 {
1643 #if wxUSE_WXVSNPRINTF
1644 // we know that our own implementation of wxVsnprintf() returns
1645 // size+1 when there's not enough space but that's not the size
1646 // of the required buffer!
1647 size *= 2; // so we just double the current size of the buffer
1648 #else
1649 // some vsnprintf() implementations NUL-terminate the buffer and
1650 // some don't in len == size case, to be safe always add 1
1651 size = len + 1;
1652 #endif
1653 }
1654 else // ok, there was enough space
1655 {
1656 break;
1657 }
1658 }
1659
1660 // we could have overshot
1661 str.Shrink();
1662
1663 return str.length();
1664 }
1665
1666 int wxString::PrintfV(const wxString& format, va_list argptr)
1667 {
1668 #if wxUSE_UNICODE_UTF8
1669 #if wxUSE_STL_BASED_WXSTRING
1670 typedef wxStringTypeBuffer<char> Utf8Buffer;
1671 #else
1672 typedef wxStringInternalBuffer Utf8Buffer;
1673 #endif
1674 #endif
1675
1676 #if wxUSE_UTF8_LOCALE_ONLY
1677 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1678 #else
1679 #if wxUSE_UNICODE_UTF8
1680 if ( wxLocaleIsUtf8 )
1681 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1682 else
1683 // wxChar* version
1684 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1685 #else
1686 return DoStringPrintfV(*this, format, argptr);
1687 #endif // UTF8/WCHAR
1688 #endif
1689 }
1690
1691 // ----------------------------------------------------------------------------
1692 // misc other operations
1693 // ----------------------------------------------------------------------------
1694
1695 // returns true if the string matches the pattern which may contain '*' and
1696 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1697 // of them)
1698 bool wxString::Matches(const wxString& mask) const
1699 {
1700 // I disable this code as it doesn't seem to be faster (in fact, it seems
1701 // to be much slower) than the old, hand-written code below and using it
1702 // here requires always linking with libregex even if the user code doesn't
1703 // use it
1704 #if 0 // wxUSE_REGEX
1705 // first translate the shell-like mask into a regex
1706 wxString pattern;
1707 pattern.reserve(wxStrlen(pszMask));
1708
1709 pattern += _T('^');
1710 while ( *pszMask )
1711 {
1712 switch ( *pszMask )
1713 {
1714 case _T('?'):
1715 pattern += _T('.');
1716 break;
1717
1718 case _T('*'):
1719 pattern += _T(".*");
1720 break;
1721
1722 case _T('^'):
1723 case _T('.'):
1724 case _T('$'):
1725 case _T('('):
1726 case _T(')'):
1727 case _T('|'):
1728 case _T('+'):
1729 case _T('\\'):
1730 // these characters are special in a RE, quote them
1731 // (however note that we don't quote '[' and ']' to allow
1732 // using them for Unix shell like matching)
1733 pattern += _T('\\');
1734 // fall through
1735
1736 default:
1737 pattern += *pszMask;
1738 }
1739
1740 pszMask++;
1741 }
1742 pattern += _T('$');
1743
1744 // and now use it
1745 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1746 #else // !wxUSE_REGEX
1747 // TODO: this is, of course, awfully inefficient...
1748
1749 // FIXME-UTF8: implement using iterators, remove #if
1750 #if wxUSE_UNICODE_UTF8
1751 wxWCharBuffer maskBuf = mask.wc_str();
1752 wxWCharBuffer txtBuf = wc_str();
1753 const wxChar *pszMask = maskBuf.data();
1754 const wxChar *pszTxt = txtBuf.data();
1755 #else
1756 const wxChar *pszMask = mask.wx_str();
1757 // the char currently being checked
1758 const wxChar *pszTxt = wx_str();
1759 #endif
1760
1761 // the last location where '*' matched
1762 const wxChar *pszLastStarInText = NULL;
1763 const wxChar *pszLastStarInMask = NULL;
1764
1765 match:
1766 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1767 switch ( *pszMask ) {
1768 case wxT('?'):
1769 if ( *pszTxt == wxT('\0') )
1770 return false;
1771
1772 // pszTxt and pszMask will be incremented in the loop statement
1773
1774 break;
1775
1776 case wxT('*'):
1777 {
1778 // remember where we started to be able to backtrack later
1779 pszLastStarInText = pszTxt;
1780 pszLastStarInMask = pszMask;
1781
1782 // ignore special chars immediately following this one
1783 // (should this be an error?)
1784 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1785 pszMask++;
1786
1787 // if there is nothing more, match
1788 if ( *pszMask == wxT('\0') )
1789 return true;
1790
1791 // are there any other metacharacters in the mask?
1792 size_t uiLenMask;
1793 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1794
1795 if ( pEndMask != NULL ) {
1796 // we have to match the string between two metachars
1797 uiLenMask = pEndMask - pszMask;
1798 }
1799 else {
1800 // we have to match the remainder of the string
1801 uiLenMask = wxStrlen(pszMask);
1802 }
1803
1804 wxString strToMatch(pszMask, uiLenMask);
1805 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1806 if ( pMatch == NULL )
1807 return false;
1808
1809 // -1 to compensate "++" in the loop
1810 pszTxt = pMatch + uiLenMask - 1;
1811 pszMask += uiLenMask - 1;
1812 }
1813 break;
1814
1815 default:
1816 if ( *pszMask != *pszTxt )
1817 return false;
1818 break;
1819 }
1820 }
1821
1822 // match only if nothing left
1823 if ( *pszTxt == wxT('\0') )
1824 return true;
1825
1826 // if we failed to match, backtrack if we can
1827 if ( pszLastStarInText ) {
1828 pszTxt = pszLastStarInText + 1;
1829 pszMask = pszLastStarInMask;
1830
1831 pszLastStarInText = NULL;
1832
1833 // don't bother resetting pszLastStarInMask, it's unnecessary
1834
1835 goto match;
1836 }
1837
1838 return false;
1839 #endif // wxUSE_REGEX/!wxUSE_REGEX
1840 }
1841
1842 // Count the number of chars
1843 int wxString::Freq(wxUniChar ch) const
1844 {
1845 int count = 0;
1846 for ( const_iterator i = begin(); i != end(); ++i )
1847 {
1848 if ( *i == ch )
1849 count ++;
1850 }
1851 return count;
1852 }
1853
1854 // convert to upper case, return the copy of the string
1855 wxString wxString::Upper() const
1856 { wxString s(*this); return s.MakeUpper(); }
1857
1858 // convert to lower case, return the copy of the string
1859 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
1860
1861 // ----------------------------------------------------------------------------
1862 // wxUTF8StringBuffer
1863 // ----------------------------------------------------------------------------
1864
1865 #if wxUSE_UNICODE_WCHAR
1866 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1867 {
1868 wxMBConvStrictUTF8 conv;
1869 size_t wlen = conv.ToWChar(NULL, 0, m_buf);
1870 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1871
1872 wxStringInternalBuffer wbuf(m_str, wlen);
1873 conv.ToWChar(wbuf, wlen, m_buf);
1874 }
1875
1876 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
1877 {
1878 wxCHECK_RET(m_lenSet, "length not set");
1879
1880 wxMBConvStrictUTF8 conv;
1881 size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
1882 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1883
1884 wxStringInternalBufferLength wbuf(m_str, wlen);
1885 conv.ToWChar(wbuf, wlen, m_buf, m_len);
1886 wbuf.SetLength(wlen);
1887 }
1888 #endif // wxUSE_UNICODE_WCHAR