]> git.saurik.com Git - wxWidgets.git/blob - src/common/string.cpp
added missing wxUSE_RICHEDIT test
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
16
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #endif
28
29 #include <ctype.h>
30
31 #ifndef __WXWINCE__
32 #include <errno.h>
33 #endif
34
35 #include <string.h>
36 #include <stdlib.h>
37
38 #ifdef __SALFORDC__
39 #include <clib.h>
40 #endif
41
42 #include "wx/hashmap.h"
43
44 // string handling functions used by wxString:
45 #if wxUSE_UNICODE_UTF8
46 #define wxStringMemcpy memcpy
47 #define wxStringMemcmp memcmp
48 #define wxStringMemchr memchr
49 #define wxStringStrlen strlen
50 #else
51 #define wxStringMemcpy wxTmemcpy
52 #define wxStringMemcmp wxTmemcmp
53 #define wxStringMemchr wxTmemchr
54 #define wxStringStrlen wxStrlen
55 #endif
56
57
58 // ---------------------------------------------------------------------------
59 // static class variables definition
60 // ---------------------------------------------------------------------------
61
62 //According to STL _must_ be a -1 size_t
63 const size_t wxString::npos = (size_t) -1;
64
65 // ----------------------------------------------------------------------------
66 // global functions
67 // ----------------------------------------------------------------------------
68
69 #if wxUSE_STD_IOSTREAM
70
71 #include <iostream>
72
73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
74 {
75 // FIXME-UTF8: always, not only if wxUSE_UNICODE
76 #if wxUSE_UNICODE && !defined(__BORLANDC__)
77 return os << (const wchar_t*)str.AsWCharBuf();
78 #else
79 return os << (const char*)str.AsCharBuf();
80 #endif
81 }
82
83 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
84 {
85 return os << str.c_str();
86 }
87
88 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
89 {
90 return os << str.data();
91 }
92
93 #ifndef __BORLANDC__
94 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
95 {
96 return os << str.data();
97 }
98 #endif
99
100 #endif // wxUSE_STD_IOSTREAM
101
102 // ===========================================================================
103 // wxString class core
104 // ===========================================================================
105
106 #if wxUSE_UNICODE_UTF8
107
108 void wxString::PosLenToImpl(size_t pos, size_t len,
109 size_t *implPos, size_t *implLen) const
110 {
111 if ( pos == npos )
112 *implPos = npos;
113 else
114 {
115 const_iterator i = begin() + pos;
116 *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
117 if ( len == npos )
118 *implLen = npos;
119 else
120 {
121 // too large length is interpreted as "to the end of the string"
122 // FIXME-UTF8: verify this is the case in std::string, assert
123 // otherwise
124 if ( pos + len > length() )
125 len = length() - pos;
126
127 *implLen = (i + len).impl() - i.impl();
128 }
129 }
130 }
131
132 #endif // wxUSE_UNICODE_UTF8
133
134 // ----------------------------------------------------------------------------
135 // wxCStrData converted strings caching
136 // ----------------------------------------------------------------------------
137
138 // FIXME-UTF8: temporarily disabled because it doesn't work with global
139 // string objects; re-enable after fixing this bug and benchmarking
140 // performance to see if using a hash is a good idea at all
141 #if 0
142
143 // For backward compatibility reasons, it must be possible to assign the value
144 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
145 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
146 // because the memory would be freed immediately, but it has to be valid as long
147 // as the string is not modified, so that code like this still works:
148 //
149 // const wxChar *s = str.c_str();
150 // while ( s ) { ... }
151
152 // FIXME-UTF8: not thread safe!
153 // FIXME-UTF8: we currently clear the cached conversion only when the string is
154 // destroyed, but we should do it when the string is modified, to
155 // keep memory usage down
156 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
157 // invalidated the cache on every change, we could keep the previous
158 // conversion
159 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
160 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
161
162 template<typename T>
163 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
164 {
165 typename T::iterator i = hash.find(wxConstCast(s, wxString));
166 if ( i != hash.end() )
167 {
168 free(i->second);
169 hash.erase(i);
170 }
171 }
172
173 #if wxUSE_UNICODE
174 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
175 // so we have to use wxString* here and const-cast when used
176 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
177 wxStringCharConversionCache);
178 static wxStringCharConversionCache gs_stringsCharCache;
179
180 const char* wxCStrData::AsChar() const
181 {
182 // remove previously cache value, if any (see FIXMEs above):
183 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
184
185 // convert the string and keep it:
186 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
187 m_str->mb_str().release();
188
189 return s + m_offset;
190 }
191 #endif // wxUSE_UNICODE
192
193 #if !wxUSE_UNICODE_WCHAR
194 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
195 wxStringWCharConversionCache);
196 static wxStringWCharConversionCache gs_stringsWCharCache;
197
198 const wchar_t* wxCStrData::AsWChar() const
199 {
200 // remove previously cache value, if any (see FIXMEs above):
201 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
202
203 // convert the string and keep it:
204 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
205 m_str->wc_str().release();
206
207 return s + m_offset;
208 }
209 #endif // !wxUSE_UNICODE_WCHAR
210
211 wxString::~wxString()
212 {
213 #if wxUSE_UNICODE
214 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
215 DeleteStringFromConversionCache(gs_stringsCharCache, this);
216 #endif
217 #if !wxUSE_UNICODE_WCHAR
218 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
219 #endif
220 }
221 #endif
222
223 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
224 const char* wxCStrData::AsChar() const
225 {
226 #if wxUSE_UNICODE_UTF8
227 if ( wxLocaleIsUtf8 )
228 return AsInternal();
229 #endif
230 // under non-UTF8 locales, we have to convert the internal UTF-8
231 // representation using wxConvLibc and cache the result
232
233 wxString *str = wxConstCast(m_str, wxString);
234
235 // convert the string:
236 //
237 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
238 // have it) but it's unfortunately not obvious to implement
239 // because we don't know how big buffer do we need for the
240 // given string length (in case of multibyte encodings, e.g.
241 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
242 //
243 // One idea would be to store more than just m_convertedToChar
244 // in wxString: then we could record the length of the string
245 // which was converted the last time and try to reuse the same
246 // buffer if the current length is not greater than it (this
247 // could still fail because string could have been modified in
248 // place but it would work most of the time, so we'd do it and
249 // only allocate the new buffer if in-place conversion returned
250 // an error). We could also store a bit saying if the string
251 // was modified since the last conversion (and update it in all
252 // operation modifying the string, of course) to avoid unneeded
253 // consequential conversions. But both of these ideas require
254 // adding more fields to wxString and require profiling results
255 // to be sure that we really gain enough from them to justify
256 // doing it.
257 wxCharBuffer buf(str->mb_str());
258
259 // if it failed, return empty string and not NULL to avoid crashes in code
260 // written with either wxWidgets 2 wxString or std::string behaviour in
261 // mind: neither of them ever returns NULL and so we shouldn't neither
262 if ( !buf )
263 return "";
264
265 if ( str->m_convertedToChar &&
266 strlen(buf) == strlen(str->m_convertedToChar) )
267 {
268 // keep the same buffer for as long as possible, so that several calls
269 // to c_str() in a row still work:
270 strcpy(str->m_convertedToChar, buf);
271 }
272 else
273 {
274 str->m_convertedToChar = buf.release();
275 }
276
277 // and keep it:
278 return str->m_convertedToChar + m_offset;
279 }
280 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
281
282 #if !wxUSE_UNICODE_WCHAR
283 const wchar_t* wxCStrData::AsWChar() const
284 {
285 wxString *str = wxConstCast(m_str, wxString);
286
287 // convert the string:
288 wxWCharBuffer buf(str->wc_str());
289
290 // notice that here, unlike above in AsChar(), conversion can't fail as our
291 // internal UTF-8 is always well-formed -- or the string was corrupted and
292 // all bets are off anyhow
293
294 // FIXME-UTF8: do the conversion in-place in the existing buffer
295 if ( str->m_convertedToWChar &&
296 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
297 {
298 // keep the same buffer for as long as possible, so that several calls
299 // to c_str() in a row still work:
300 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
301 }
302 else
303 {
304 str->m_convertedToWChar = buf.release();
305 }
306
307 // and keep it:
308 return str->m_convertedToWChar + m_offset;
309 }
310 #endif // !wxUSE_UNICODE_WCHAR
311
312 // ===========================================================================
313 // wxString class core
314 // ===========================================================================
315
316 // ---------------------------------------------------------------------------
317 // construction and conversion
318 // ---------------------------------------------------------------------------
319
320 #if wxUSE_UNICODE_WCHAR
321 /* static */
322 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
323 const wxMBConv& conv)
324 {
325 // anything to do?
326 if ( !psz || nLength == 0 )
327 return SubstrBufFromMB(L"", 0);
328
329 if ( nLength == npos )
330 nLength = wxNO_LEN;
331
332 size_t wcLen;
333 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
334 if ( !wcLen )
335 return SubstrBufFromMB(_T(""), 0);
336 else
337 return SubstrBufFromMB(wcBuf, wcLen);
338 }
339 #endif // wxUSE_UNICODE_WCHAR
340
341 #if wxUSE_UNICODE_UTF8
342 /* static */
343 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
344 const wxMBConv& conv)
345 {
346 // anything to do?
347 if ( !psz || nLength == 0 )
348 return SubstrBufFromMB("", 0);
349
350 // if psz is already in UTF-8, we don't have to do the roundtrip to
351 // wchar_t* and back:
352 if ( conv.IsUTF8() )
353 {
354 // we need to validate the input because UTF8 iterators assume valid
355 // UTF-8 sequence and psz may be invalid:
356 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
357 {
358 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
359 }
360 // else: do the roundtrip through wchar_t*
361 }
362
363 if ( nLength == npos )
364 nLength = wxNO_LEN;
365
366 // first convert to wide string:
367 size_t wcLen;
368 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
369 if ( !wcLen )
370 return SubstrBufFromMB("", 0);
371
372 // and then to UTF-8:
373 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
374 // widechar -> UTF-8 conversion isn't supposed to ever fail:
375 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
376
377 return buf;
378 }
379 #endif // wxUSE_UNICODE_UTF8
380
381 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
382 /* static */
383 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
384 const wxMBConv& conv)
385 {
386 // anything to do?
387 if ( !pwz || nLength == 0 )
388 return SubstrBufFromWC("", 0);
389
390 if ( nLength == npos )
391 nLength = wxNO_LEN;
392
393 size_t mbLen;
394 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
395 if ( !mbLen )
396 return SubstrBufFromWC("", 0);
397 else
398 return SubstrBufFromWC(mbBuf, mbLen);
399 }
400 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
401
402
403 #if wxUSE_UNICODE_WCHAR
404
405 //Convert wxString in Unicode mode to a multi-byte string
406 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
407 {
408 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
409 }
410
411 #elif wxUSE_UNICODE_UTF8
412
413 const wxWCharBuffer wxString::wc_str() const
414 {
415 return wxMBConvStrictUTF8().cMB2WC
416 (
417 m_impl.c_str(),
418 m_impl.length() + 1, // size, not length
419 NULL
420 );
421 }
422
423 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
424 {
425 if ( conv.IsUTF8() )
426 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
427
428 // FIXME-UTF8: use wc_str() here once we have buffers with length
429
430 size_t wcLen;
431 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
432 (
433 m_impl.c_str(),
434 m_impl.length() + 1, // size
435 &wcLen
436 ));
437 if ( !wcLen )
438 return wxCharBuffer("");
439
440 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
441 }
442
443 #else // ANSI
444
445 //Converts this string to a wide character string if unicode
446 //mode is not enabled and wxUSE_WCHAR_T is enabled
447 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
448 {
449 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
450 }
451
452 #endif // Unicode/ANSI
453
454 // shrink to minimal size (releasing extra memory)
455 bool wxString::Shrink()
456 {
457 wxString tmp(begin(), end());
458 swap(tmp);
459 return tmp.length() == length();
460 }
461
462 // deprecated compatibility code:
463 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
464 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
465 {
466 return DoGetWriteBuf(nLen);
467 }
468
469 void wxString::UngetWriteBuf()
470 {
471 DoUngetWriteBuf();
472 }
473
474 void wxString::UngetWriteBuf(size_t nLen)
475 {
476 DoUngetWriteBuf(nLen);
477 }
478 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
479
480
481 // ---------------------------------------------------------------------------
482 // data access
483 // ---------------------------------------------------------------------------
484
485 // all functions are inline in string.h
486
487 // ---------------------------------------------------------------------------
488 // concatenation operators
489 // ---------------------------------------------------------------------------
490
491 /*
492 * concatenation functions come in 5 flavours:
493 * string + string
494 * char + string and string + char
495 * C str + string and string + C str
496 */
497
498 wxString operator+(const wxString& str1, const wxString& str2)
499 {
500 #if !wxUSE_STL_BASED_WXSTRING
501 wxASSERT( str1.IsValid() );
502 wxASSERT( str2.IsValid() );
503 #endif
504
505 wxString s = str1;
506 s += str2;
507
508 return s;
509 }
510
511 wxString operator+(const wxString& str, wxUniChar ch)
512 {
513 #if !wxUSE_STL_BASED_WXSTRING
514 wxASSERT( str.IsValid() );
515 #endif
516
517 wxString s = str;
518 s += ch;
519
520 return s;
521 }
522
523 wxString operator+(wxUniChar ch, const wxString& str)
524 {
525 #if !wxUSE_STL_BASED_WXSTRING
526 wxASSERT( str.IsValid() );
527 #endif
528
529 wxString s = ch;
530 s += str;
531
532 return s;
533 }
534
535 wxString operator+(const wxString& str, const char *psz)
536 {
537 #if !wxUSE_STL_BASED_WXSTRING
538 wxASSERT( str.IsValid() );
539 #endif
540
541 wxString s;
542 if ( !s.Alloc(strlen(psz) + str.length()) ) {
543 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
544 }
545 s += str;
546 s += psz;
547
548 return s;
549 }
550
551 wxString operator+(const wxString& str, const wchar_t *pwz)
552 {
553 #if !wxUSE_STL_BASED_WXSTRING
554 wxASSERT( str.IsValid() );
555 #endif
556
557 wxString s;
558 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
559 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
560 }
561 s += str;
562 s += pwz;
563
564 return s;
565 }
566
567 wxString operator+(const char *psz, const wxString& str)
568 {
569 #if !wxUSE_STL_BASED_WXSTRING
570 wxASSERT( str.IsValid() );
571 #endif
572
573 wxString s;
574 if ( !s.Alloc(strlen(psz) + str.length()) ) {
575 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
576 }
577 s = psz;
578 s += str;
579
580 return s;
581 }
582
583 wxString operator+(const wchar_t *pwz, const wxString& str)
584 {
585 #if !wxUSE_STL_BASED_WXSTRING
586 wxASSERT( str.IsValid() );
587 #endif
588
589 wxString s;
590 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
591 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
592 }
593 s = pwz;
594 s += str;
595
596 return s;
597 }
598
599 // ---------------------------------------------------------------------------
600 // string comparison
601 // ---------------------------------------------------------------------------
602
603 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
604 {
605 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
606 : wxToupper(GetChar(0u)) == wxToupper(c));
607 }
608
609 #ifdef HAVE_STD_STRING_COMPARE
610
611 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
612 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
613 // sort strings in characters code point order by sorting the byte sequence
614 // in byte values order (i.e. what strcmp() and memcmp() do).
615
616 int wxString::compare(const wxString& str) const
617 {
618 return m_impl.compare(str.m_impl);
619 }
620
621 int wxString::compare(size_t nStart, size_t nLen,
622 const wxString& str) const
623 {
624 size_t pos, len;
625 PosLenToImpl(nStart, nLen, &pos, &len);
626 return m_impl.compare(pos, len, str.m_impl);
627 }
628
629 int wxString::compare(size_t nStart, size_t nLen,
630 const wxString& str,
631 size_t nStart2, size_t nLen2) const
632 {
633 size_t pos, len;
634 PosLenToImpl(nStart, nLen, &pos, &len);
635
636 size_t pos2, len2;
637 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
638
639 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
640 }
641
642 int wxString::compare(const char* sz) const
643 {
644 return m_impl.compare(ImplStr(sz));
645 }
646
647 int wxString::compare(const wchar_t* sz) const
648 {
649 return m_impl.compare(ImplStr(sz));
650 }
651
652 int wxString::compare(size_t nStart, size_t nLen,
653 const char* sz, size_t nCount) const
654 {
655 size_t pos, len;
656 PosLenToImpl(nStart, nLen, &pos, &len);
657
658 SubstrBufFromMB str(ImplStr(sz, nCount));
659
660 return m_impl.compare(pos, len, str.data, str.len);
661 }
662
663 int wxString::compare(size_t nStart, size_t nLen,
664 const wchar_t* sz, size_t nCount) const
665 {
666 size_t pos, len;
667 PosLenToImpl(nStart, nLen, &pos, &len);
668
669 SubstrBufFromWC str(ImplStr(sz, nCount));
670
671 return m_impl.compare(pos, len, str.data, str.len);
672 }
673
674 #else // !HAVE_STD_STRING_COMPARE
675
676 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
677 const wxStringCharType* s2, size_t l2)
678 {
679 if( l1 == l2 )
680 return wxStringMemcmp(s1, s2, l1);
681 else if( l1 < l2 )
682 {
683 int ret = wxStringMemcmp(s1, s2, l1);
684 return ret == 0 ? -1 : ret;
685 }
686 else
687 {
688 int ret = wxStringMemcmp(s1, s2, l2);
689 return ret == 0 ? +1 : ret;
690 }
691 }
692
693 int wxString::compare(const wxString& str) const
694 {
695 return ::wxDoCmp(m_impl.data(), m_impl.length(),
696 str.m_impl.data(), str.m_impl.length());
697 }
698
699 int wxString::compare(size_t nStart, size_t nLen,
700 const wxString& str) const
701 {
702 wxASSERT(nStart <= length());
703 size_type strLen = length() - nStart;
704 nLen = strLen < nLen ? strLen : nLen;
705
706 size_t pos, len;
707 PosLenToImpl(nStart, nLen, &pos, &len);
708
709 return ::wxDoCmp(m_impl.data() + pos, len,
710 str.m_impl.data(), str.m_impl.length());
711 }
712
713 int wxString::compare(size_t nStart, size_t nLen,
714 const wxString& str,
715 size_t nStart2, size_t nLen2) const
716 {
717 wxASSERT(nStart <= length());
718 wxASSERT(nStart2 <= str.length());
719 size_type strLen = length() - nStart,
720 strLen2 = str.length() - nStart2;
721 nLen = strLen < nLen ? strLen : nLen;
722 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
723
724 size_t pos, len;
725 PosLenToImpl(nStart, nLen, &pos, &len);
726 size_t pos2, len2;
727 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
728
729 return ::wxDoCmp(m_impl.data() + pos, len,
730 str.m_impl.data() + pos2, len2);
731 }
732
733 int wxString::compare(const char* sz) const
734 {
735 SubstrBufFromMB str(ImplStr(sz, npos));
736 if ( str.len == npos )
737 str.len = wxStringStrlen(str.data);
738 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
739 }
740
741 int wxString::compare(const wchar_t* sz) const
742 {
743 SubstrBufFromWC str(ImplStr(sz, npos));
744 if ( str.len == npos )
745 str.len = wxStringStrlen(str.data);
746 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
747 }
748
749 int wxString::compare(size_t nStart, size_t nLen,
750 const char* sz, size_t nCount) const
751 {
752 wxASSERT(nStart <= length());
753 size_type strLen = length() - nStart;
754 nLen = strLen < nLen ? strLen : nLen;
755
756 size_t pos, len;
757 PosLenToImpl(nStart, nLen, &pos, &len);
758
759 SubstrBufFromMB str(ImplStr(sz, nCount));
760 if ( str.len == npos )
761 str.len = wxStringStrlen(str.data);
762
763 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
764 }
765
766 int wxString::compare(size_t nStart, size_t nLen,
767 const wchar_t* sz, size_t nCount) const
768 {
769 wxASSERT(nStart <= length());
770 size_type strLen = length() - nStart;
771 nLen = strLen < nLen ? strLen : nLen;
772
773 size_t pos, len;
774 PosLenToImpl(nStart, nLen, &pos, &len);
775
776 SubstrBufFromWC str(ImplStr(sz, nCount));
777 if ( str.len == npos )
778 str.len = wxStringStrlen(str.data);
779
780 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
781 }
782
783 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
784
785
786 // ---------------------------------------------------------------------------
787 // find_{first,last}_[not]_of functions
788 // ---------------------------------------------------------------------------
789
790 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
791
792 // NB: All these functions are implemented with the argument being wxChar*,
793 // i.e. widechar string in any Unicode build, even though native string
794 // representation is char* in the UTF-8 build. This is because we couldn't
795 // use memchr() to determine if a character is in a set encoded as UTF-8.
796
797 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
798 {
799 return find_first_of(sz, nStart, wxStrlen(sz));
800 }
801
802 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
803 {
804 return find_first_not_of(sz, nStart, wxStrlen(sz));
805 }
806
807 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
808 {
809 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
810
811 size_t idx = nStart;
812 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
813 {
814 if ( wxTmemchr(sz, *i, n) )
815 return idx;
816 }
817
818 return npos;
819 }
820
821 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
822 {
823 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
824
825 size_t idx = nStart;
826 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
827 {
828 if ( !wxTmemchr(sz, *i, n) )
829 return idx;
830 }
831
832 return npos;
833 }
834
835
836 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
837 {
838 return find_last_of(sz, nStart, wxStrlen(sz));
839 }
840
841 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
842 {
843 return find_last_not_of(sz, nStart, wxStrlen(sz));
844 }
845
846 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
847 {
848 size_t len = length();
849
850 if ( nStart == npos )
851 {
852 nStart = len - 1;
853 }
854 else
855 {
856 wxASSERT_MSG( nStart <= len, _T("invalid index") );
857 }
858
859 size_t idx = nStart;
860 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
861 i != rend(); --idx, ++i )
862 {
863 if ( wxTmemchr(sz, *i, n) )
864 return idx;
865 }
866
867 return npos;
868 }
869
870 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
871 {
872 size_t len = length();
873
874 if ( nStart == npos )
875 {
876 nStart = len - 1;
877 }
878 else
879 {
880 wxASSERT_MSG( nStart <= len, _T("invalid index") );
881 }
882
883 size_t idx = nStart;
884 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
885 i != rend(); --idx, ++i )
886 {
887 if ( !wxTmemchr(sz, *i, n) )
888 return idx;
889 }
890
891 return npos;
892 }
893
894 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
895 {
896 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
897
898 size_t idx = nStart;
899 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
900 {
901 if ( *i != ch )
902 return idx;
903 }
904
905 return npos;
906 }
907
908 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
909 {
910 size_t len = length();
911
912 if ( nStart == npos )
913 {
914 nStart = len - 1;
915 }
916 else
917 {
918 wxASSERT_MSG( nStart <= len, _T("invalid index") );
919 }
920
921 size_t idx = nStart;
922 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
923 i != rend(); --idx, ++i )
924 {
925 if ( *i != ch )
926 return idx;
927 }
928
929 return npos;
930 }
931
932 // the functions above were implemented for wchar_t* arguments in Unicode
933 // build and char* in ANSI build; below are implementations for the other
934 // version:
935 #if wxUSE_UNICODE
936 #define wxOtherCharType char
937 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
938 #else
939 #define wxOtherCharType wchar_t
940 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
941 #endif
942
943 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
944 { return find_first_of(STRCONV(sz), nStart); }
945
946 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
947 size_t n) const
948 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
949 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
950 { return find_last_of(STRCONV(sz), nStart); }
951 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
952 size_t n) const
953 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
954 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
955 { return find_first_not_of(STRCONV(sz), nStart); }
956 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
957 size_t n) const
958 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
959 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
960 { return find_last_not_of(STRCONV(sz), nStart); }
961 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
962 size_t n) const
963 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
964
965 #undef wxOtherCharType
966 #undef STRCONV
967
968 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
969
970 // ===========================================================================
971 // other common string functions
972 // ===========================================================================
973
974 int wxString::CmpNoCase(const wxString& s) const
975 {
976 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
977
978 size_t idx = 0;
979 const_iterator i1 = begin();
980 const_iterator end1 = end();
981 const_iterator i2 = s.begin();
982 const_iterator end2 = s.end();
983
984 for ( ; i1 != end1 && i2 != end2; ++idx, ++i1, ++i2 )
985 {
986 wxUniChar lower1 = (wxChar)wxTolower(*i1);
987 wxUniChar lower2 = (wxChar)wxTolower(*i2);
988 if ( lower1 != lower2 )
989 return lower1 < lower2 ? -1 : 1;
990 }
991
992 size_t len1 = length();
993 size_t len2 = s.length();
994
995 if ( len1 < len2 )
996 return -1;
997 else if ( len1 > len2 )
998 return 1;
999 return 0;
1000 }
1001
1002
1003 #if wxUSE_UNICODE
1004
1005 #ifdef __MWERKS__
1006 #ifndef __SCHAR_MAX__
1007 #define __SCHAR_MAX__ 127
1008 #endif
1009 #endif
1010
1011 wxString wxString::FromAscii(const char *ascii, size_t len)
1012 {
1013 if (!ascii || len == 0)
1014 return wxEmptyString;
1015
1016 wxString res;
1017
1018 {
1019 wxStringInternalBuffer buf(res, len);
1020 wxStringCharType *dest = buf;
1021
1022 for ( ; len > 0; --len )
1023 {
1024 unsigned char c = (unsigned char)*ascii++;
1025 wxASSERT_MSG( c < 0x80,
1026 _T("Non-ASCII value passed to FromAscii().") );
1027
1028 *dest++ = (wchar_t)c;
1029 }
1030 }
1031
1032 return res;
1033 }
1034
1035 wxString wxString::FromAscii(const char *ascii)
1036 {
1037 return FromAscii(ascii, wxStrlen(ascii));
1038 }
1039
1040 wxString wxString::FromAscii(char ascii)
1041 {
1042 // What do we do with '\0' ?
1043
1044 unsigned char c = (unsigned char)ascii;
1045
1046 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1047
1048 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1049 return wxString(wxUniChar((wchar_t)c));
1050 }
1051
1052 const wxCharBuffer wxString::ToAscii() const
1053 {
1054 // this will allocate enough space for the terminating NUL too
1055 wxCharBuffer buffer(length());
1056 char *dest = buffer.data();
1057
1058 for ( const_iterator i = begin(); i != end(); ++i )
1059 {
1060 wxUniChar c(*i);
1061 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1062 *dest++ = c.IsAscii() ? (char)c : '_';
1063
1064 // the output string can't have embedded NULs anyhow, so we can safely
1065 // stop at first of them even if we do have any
1066 if ( !c )
1067 break;
1068 }
1069
1070 return buffer;
1071 }
1072
1073 #endif // wxUSE_UNICODE
1074
1075 // extract string of length nCount starting at nFirst
1076 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1077 {
1078 size_t nLen = length();
1079
1080 // default value of nCount is npos and means "till the end"
1081 if ( nCount == npos )
1082 {
1083 nCount = nLen - nFirst;
1084 }
1085
1086 // out-of-bounds requests return sensible things
1087 if ( nFirst + nCount > nLen )
1088 {
1089 nCount = nLen - nFirst;
1090 }
1091
1092 if ( nFirst > nLen )
1093 {
1094 // AllocCopy() will return empty string
1095 return wxEmptyString;
1096 }
1097
1098 wxString dest(*this, nFirst, nCount);
1099 if ( dest.length() != nCount )
1100 {
1101 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1102 }
1103
1104 return dest;
1105 }
1106
1107 // check that the string starts with prefix and return the rest of the string
1108 // in the provided pointer if it is not NULL, otherwise return false
1109 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1110 {
1111 if ( compare(0, prefix.length(), prefix) != 0 )
1112 return false;
1113
1114 if ( rest )
1115 {
1116 // put the rest of the string into provided pointer
1117 rest->assign(*this, prefix.length(), npos);
1118 }
1119
1120 return true;
1121 }
1122
1123
1124 // check that the string ends with suffix and return the rest of it in the
1125 // provided pointer if it is not NULL, otherwise return false
1126 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1127 {
1128 int start = length() - suffix.length();
1129
1130 if ( start < 0 || compare(start, npos, suffix) != 0 )
1131 return false;
1132
1133 if ( rest )
1134 {
1135 // put the rest of the string into provided pointer
1136 rest->assign(*this, 0, start);
1137 }
1138
1139 return true;
1140 }
1141
1142
1143 // extract nCount last (rightmost) characters
1144 wxString wxString::Right(size_t nCount) const
1145 {
1146 if ( nCount > length() )
1147 nCount = length();
1148
1149 wxString dest(*this, length() - nCount, nCount);
1150 if ( dest.length() != nCount ) {
1151 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1152 }
1153 return dest;
1154 }
1155
1156 // get all characters after the last occurence of ch
1157 // (returns the whole string if ch not found)
1158 wxString wxString::AfterLast(wxUniChar ch) const
1159 {
1160 wxString str;
1161 int iPos = Find(ch, true);
1162 if ( iPos == wxNOT_FOUND )
1163 str = *this;
1164 else
1165 str = wx_str() + iPos + 1;
1166
1167 return str;
1168 }
1169
1170 // extract nCount first (leftmost) characters
1171 wxString wxString::Left(size_t nCount) const
1172 {
1173 if ( nCount > length() )
1174 nCount = length();
1175
1176 wxString dest(*this, 0, nCount);
1177 if ( dest.length() != nCount ) {
1178 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1179 }
1180 return dest;
1181 }
1182
1183 // get all characters before the first occurence of ch
1184 // (returns the whole string if ch not found)
1185 wxString wxString::BeforeFirst(wxUniChar ch) const
1186 {
1187 int iPos = Find(ch);
1188 if ( iPos == wxNOT_FOUND ) iPos = length();
1189 return wxString(*this, 0, iPos);
1190 }
1191
1192 /// get all characters before the last occurence of ch
1193 /// (returns empty string if ch not found)
1194 wxString wxString::BeforeLast(wxUniChar ch) const
1195 {
1196 wxString str;
1197 int iPos = Find(ch, true);
1198 if ( iPos != wxNOT_FOUND && iPos != 0 )
1199 str = wxString(c_str(), iPos);
1200
1201 return str;
1202 }
1203
1204 /// get all characters after the first occurence of ch
1205 /// (returns empty string if ch not found)
1206 wxString wxString::AfterFirst(wxUniChar ch) const
1207 {
1208 wxString str;
1209 int iPos = Find(ch);
1210 if ( iPos != wxNOT_FOUND )
1211 str = wx_str() + iPos + 1;
1212
1213 return str;
1214 }
1215
1216 // replace first (or all) occurences of some substring with another one
1217 size_t wxString::Replace(const wxString& strOld,
1218 const wxString& strNew, bool bReplaceAll)
1219 {
1220 // if we tried to replace an empty string we'd enter an infinite loop below
1221 wxCHECK_MSG( !strOld.empty(), 0,
1222 _T("wxString::Replace(): invalid parameter") );
1223
1224 size_t uiCount = 0; // count of replacements made
1225
1226 size_t uiOldLen = strOld.length();
1227 size_t uiNewLen = strNew.length();
1228
1229 size_t dwPos = 0;
1230
1231 while ( (*this)[dwPos] != wxT('\0') )
1232 {
1233 //DO NOT USE STRSTR HERE
1234 //this string can contain embedded null characters,
1235 //so strstr will function incorrectly
1236 dwPos = find(strOld, dwPos);
1237 if ( dwPos == npos )
1238 break; // exit the loop
1239 else
1240 {
1241 //replace this occurance of the old string with the new one
1242 replace(dwPos, uiOldLen, strNew, uiNewLen);
1243
1244 //move up pos past the string that was replaced
1245 dwPos += uiNewLen;
1246
1247 //increase replace count
1248 ++uiCount;
1249
1250 // stop now?
1251 if ( !bReplaceAll )
1252 break; // exit the loop
1253 }
1254 }
1255
1256 return uiCount;
1257 }
1258
1259 bool wxString::IsAscii() const
1260 {
1261 for ( const_iterator i = begin(); i != end(); ++i )
1262 {
1263 if ( !(*i).IsAscii() )
1264 return false;
1265 }
1266
1267 return true;
1268 }
1269
1270 bool wxString::IsWord() const
1271 {
1272 for ( const_iterator i = begin(); i != end(); ++i )
1273 {
1274 if ( !wxIsalpha(*i) )
1275 return false;
1276 }
1277
1278 return true;
1279 }
1280
1281 bool wxString::IsNumber() const
1282 {
1283 if ( empty() )
1284 return true;
1285
1286 const_iterator i = begin();
1287
1288 if ( *i == _T('-') || *i == _T('+') )
1289 ++i;
1290
1291 for ( ; i != end(); ++i )
1292 {
1293 if ( !wxIsdigit(*i) )
1294 return false;
1295 }
1296
1297 return true;
1298 }
1299
1300 wxString wxString::Strip(stripType w) const
1301 {
1302 wxString s = *this;
1303 if ( w & leading ) s.Trim(false);
1304 if ( w & trailing ) s.Trim(true);
1305 return s;
1306 }
1307
1308 // ---------------------------------------------------------------------------
1309 // case conversion
1310 // ---------------------------------------------------------------------------
1311
1312 wxString& wxString::MakeUpper()
1313 {
1314 for ( iterator it = begin(), en = end(); it != en; ++it )
1315 *it = (wxChar)wxToupper(*it);
1316
1317 return *this;
1318 }
1319
1320 wxString& wxString::MakeLower()
1321 {
1322 for ( iterator it = begin(), en = end(); it != en; ++it )
1323 *it = (wxChar)wxTolower(*it);
1324
1325 return *this;
1326 }
1327
1328 // ---------------------------------------------------------------------------
1329 // trimming and padding
1330 // ---------------------------------------------------------------------------
1331
1332 // some compilers (VC++ 6.0 not to name them) return true for a call to
1333 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1334 // live with this by checking that the character is a 7 bit one - even if this
1335 // may fail to detect some spaces (I don't know if Unicode doesn't have
1336 // space-like symbols somewhere except in the first 128 chars), it is arguably
1337 // still better than trimming away accented letters
1338 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1339
1340 // trims spaces (in the sense of isspace) from left or right side
1341 wxString& wxString::Trim(bool bFromRight)
1342 {
1343 // first check if we're going to modify the string at all
1344 if ( !empty() &&
1345 (
1346 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1347 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1348 )
1349 )
1350 {
1351 if ( bFromRight )
1352 {
1353 // find last non-space character
1354 reverse_iterator psz = rbegin();
1355 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1356 psz++;
1357
1358 // truncate at trailing space start
1359 erase(psz.base(), end());
1360 }
1361 else
1362 {
1363 // find first non-space character
1364 iterator psz = begin();
1365 while ( (psz != end()) && wxSafeIsspace(*psz) )
1366 psz++;
1367
1368 // fix up data and length
1369 erase(begin(), psz);
1370 }
1371 }
1372
1373 return *this;
1374 }
1375
1376 // adds nCount characters chPad to the string from either side
1377 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1378 {
1379 wxString s(chPad, nCount);
1380
1381 if ( bFromRight )
1382 *this += s;
1383 else
1384 {
1385 s += *this;
1386 swap(s);
1387 }
1388
1389 return *this;
1390 }
1391
1392 // truncate the string
1393 wxString& wxString::Truncate(size_t uiLen)
1394 {
1395 if ( uiLen < length() )
1396 {
1397 erase(begin() + uiLen, end());
1398 }
1399 //else: nothing to do, string is already short enough
1400
1401 return *this;
1402 }
1403
1404 // ---------------------------------------------------------------------------
1405 // finding (return wxNOT_FOUND if not found and index otherwise)
1406 // ---------------------------------------------------------------------------
1407
1408 // find a character
1409 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1410 {
1411 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1412
1413 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1414 }
1415
1416 // ----------------------------------------------------------------------------
1417 // conversion to numbers
1418 // ----------------------------------------------------------------------------
1419
1420 // The implementation of all the functions below is exactly the same so factor
1421 // it out. Note that number extraction works correctly on UTF-8 strings, so
1422 // we can use wxStringCharType and wx_str() for maximum efficiency.
1423
1424 #ifndef __WXWINCE__
1425 #define DO_IF_NOT_WINCE(x) x
1426 #else
1427 #define DO_IF_NOT_WINCE(x)
1428 #endif
1429
1430 #define WX_STRING_TO_INT_TYPE(val, base, func) \
1431 wxCHECK_MSG( val, false, _T("NULL output pointer") ); \
1432 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1433 \
1434 DO_IF_NOT_WINCE( errno = 0; ) \
1435 \
1436 const wxStringCharType *start = wx_str(); \
1437 wxStringCharType *end; \
1438 *val = func(start, &end, base); \
1439 \
1440 /* return true only if scan was stopped by the terminating NUL and */ \
1441 /* if the string was not empty to start with and no under/overflow */ \
1442 /* occurred: */ \
1443 return !*end && (end != start) \
1444 DO_IF_NOT_WINCE( && (errno != ERANGE) )
1445
1446 bool wxString::ToLong(long *val, int base) const
1447 {
1448 WX_STRING_TO_INT_TYPE(val, base, wxStrtol);
1449 }
1450
1451 bool wxString::ToULong(unsigned long *val, int base) const
1452 {
1453 WX_STRING_TO_INT_TYPE(val, base, wxStrtoul);
1454 }
1455
1456 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1457 {
1458 WX_STRING_TO_INT_TYPE(val, base, wxStrtoll);
1459 }
1460
1461 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1462 {
1463 WX_STRING_TO_INT_TYPE(val, base, wxStrtoull);
1464 }
1465
1466 bool wxString::ToDouble(double *val) const
1467 {
1468 wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1469
1470 #ifndef __WXWINCE__
1471 errno = 0;
1472 #endif
1473
1474 const wxChar *start = c_str();
1475 wxChar *end;
1476 *val = wxStrtod(start, &end);
1477
1478 // return true only if scan was stopped by the terminating NUL and if the
1479 // string was not empty to start with and no under/overflow occurred
1480 return !*end && (end != start)
1481 #ifndef __WXWINCE__
1482 && (errno != ERANGE)
1483 #endif
1484 ;
1485 }
1486
1487 // ---------------------------------------------------------------------------
1488 // formatted output
1489 // ---------------------------------------------------------------------------
1490
1491 #if !wxUSE_UTF8_LOCALE_ONLY
1492 /* static */
1493 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1494 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1495 #else
1496 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1497 #endif
1498 {
1499 va_list argptr;
1500 va_start(argptr, format);
1501
1502 wxString s;
1503 s.PrintfV(format, argptr);
1504
1505 va_end(argptr);
1506
1507 return s;
1508 }
1509 #endif // !wxUSE_UTF8_LOCALE_ONLY
1510
1511 #if wxUSE_UNICODE_UTF8
1512 /* static */
1513 wxString wxString::DoFormatUtf8(const char *format, ...)
1514 {
1515 va_list argptr;
1516 va_start(argptr, format);
1517
1518 wxString s;
1519 s.PrintfV(format, argptr);
1520
1521 va_end(argptr);
1522
1523 return s;
1524 }
1525 #endif // wxUSE_UNICODE_UTF8
1526
1527 /* static */
1528 wxString wxString::FormatV(const wxString& format, va_list argptr)
1529 {
1530 wxString s;
1531 s.PrintfV(format, argptr);
1532 return s;
1533 }
1534
1535 #if !wxUSE_UTF8_LOCALE_ONLY
1536 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1537 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1538 #else
1539 int wxString::DoPrintfWchar(const wxChar *format, ...)
1540 #endif
1541 {
1542 va_list argptr;
1543 va_start(argptr, format);
1544
1545 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1546 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1547 // because it's the only cast that works safely for downcasting when
1548 // multiple inheritance is used:
1549 wxString *str = static_cast<wxString*>(this);
1550 #else
1551 wxString *str = this;
1552 #endif
1553
1554 int iLen = str->PrintfV(format, argptr);
1555
1556 va_end(argptr);
1557
1558 return iLen;
1559 }
1560 #endif // !wxUSE_UTF8_LOCALE_ONLY
1561
1562 #if wxUSE_UNICODE_UTF8
1563 int wxString::DoPrintfUtf8(const char *format, ...)
1564 {
1565 va_list argptr;
1566 va_start(argptr, format);
1567
1568 int iLen = PrintfV(format, argptr);
1569
1570 va_end(argptr);
1571
1572 return iLen;
1573 }
1574 #endif // wxUSE_UNICODE_UTF8
1575
1576 #if wxUSE_UNICODE_UTF8
1577 template<typename BufferType>
1578 #else
1579 // we only need one version in non-UTF8 builds and at least two Windows
1580 // compilers have problems with this function template, so use just one
1581 // normal function here
1582 #endif
1583 static int DoStringPrintfV(wxString& str,
1584 const wxString& format, va_list argptr)
1585 {
1586 int size = 1024;
1587
1588 for ( ;; )
1589 {
1590 #if wxUSE_UNICODE_UTF8
1591 BufferType tmp(str, size + 1);
1592 typename BufferType::CharType *buf = tmp;
1593 #else
1594 wxStringBuffer tmp(str, size + 1);
1595 wxChar *buf = tmp;
1596 #endif
1597
1598 if ( !buf )
1599 {
1600 // out of memory
1601
1602 // in UTF-8 build, leaving uninitialized junk in the buffer
1603 // could result in invalid non-empty UTF-8 string, so just
1604 // reset the string to empty on failure:
1605 buf[0] = '\0';
1606 return -1;
1607 }
1608
1609 // wxVsnprintf() may modify the original arg pointer, so pass it
1610 // only a copy
1611 va_list argptrcopy;
1612 wxVaCopy(argptrcopy, argptr);
1613 int len = wxVsnprintf(buf, size, format, argptrcopy);
1614 va_end(argptrcopy);
1615
1616 // some implementations of vsnprintf() don't NUL terminate
1617 // the string if there is not enough space for it so
1618 // always do it manually
1619 buf[size] = _T('\0');
1620
1621 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1622 // total number of characters which would have been written if the
1623 // buffer were large enough (newer standards such as Unix98)
1624 if ( len < 0 )
1625 {
1626 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1627 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1628 // is true if *both* of them use our own implementation,
1629 // otherwise we can't be sure
1630 #if wxUSE_WXVSNPRINTF
1631 // we know that our own implementation of wxVsnprintf() returns -1
1632 // only for a format error - thus there's something wrong with
1633 // the user's format string
1634 buf[0] = '\0';
1635 return -1;
1636 #else // possibly using system version
1637 // assume it only returns error if there is not enough space, but
1638 // as we don't know how much we need, double the current size of
1639 // the buffer
1640 size *= 2;
1641 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1642 }
1643 else if ( len >= size )
1644 {
1645 #if wxUSE_WXVSNPRINTF
1646 // we know that our own implementation of wxVsnprintf() returns
1647 // size+1 when there's not enough space but that's not the size
1648 // of the required buffer!
1649 size *= 2; // so we just double the current size of the buffer
1650 #else
1651 // some vsnprintf() implementations NUL-terminate the buffer and
1652 // some don't in len == size case, to be safe always add 1
1653 size = len + 1;
1654 #endif
1655 }
1656 else // ok, there was enough space
1657 {
1658 break;
1659 }
1660 }
1661
1662 // we could have overshot
1663 str.Shrink();
1664
1665 return str.length();
1666 }
1667
1668 int wxString::PrintfV(const wxString& format, va_list argptr)
1669 {
1670 #if wxUSE_UNICODE_UTF8
1671 #if wxUSE_STL_BASED_WXSTRING
1672 typedef wxStringTypeBuffer<char> Utf8Buffer;
1673 #else
1674 typedef wxStringInternalBuffer Utf8Buffer;
1675 #endif
1676 #endif
1677
1678 #if wxUSE_UTF8_LOCALE_ONLY
1679 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1680 #else
1681 #if wxUSE_UNICODE_UTF8
1682 if ( wxLocaleIsUtf8 )
1683 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1684 else
1685 // wxChar* version
1686 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1687 #else
1688 return DoStringPrintfV(*this, format, argptr);
1689 #endif // UTF8/WCHAR
1690 #endif
1691 }
1692
1693 // ----------------------------------------------------------------------------
1694 // misc other operations
1695 // ----------------------------------------------------------------------------
1696
1697 // returns true if the string matches the pattern which may contain '*' and
1698 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1699 // of them)
1700 bool wxString::Matches(const wxString& mask) const
1701 {
1702 // I disable this code as it doesn't seem to be faster (in fact, it seems
1703 // to be much slower) than the old, hand-written code below and using it
1704 // here requires always linking with libregex even if the user code doesn't
1705 // use it
1706 #if 0 // wxUSE_REGEX
1707 // first translate the shell-like mask into a regex
1708 wxString pattern;
1709 pattern.reserve(wxStrlen(pszMask));
1710
1711 pattern += _T('^');
1712 while ( *pszMask )
1713 {
1714 switch ( *pszMask )
1715 {
1716 case _T('?'):
1717 pattern += _T('.');
1718 break;
1719
1720 case _T('*'):
1721 pattern += _T(".*");
1722 break;
1723
1724 case _T('^'):
1725 case _T('.'):
1726 case _T('$'):
1727 case _T('('):
1728 case _T(')'):
1729 case _T('|'):
1730 case _T('+'):
1731 case _T('\\'):
1732 // these characters are special in a RE, quote them
1733 // (however note that we don't quote '[' and ']' to allow
1734 // using them for Unix shell like matching)
1735 pattern += _T('\\');
1736 // fall through
1737
1738 default:
1739 pattern += *pszMask;
1740 }
1741
1742 pszMask++;
1743 }
1744 pattern += _T('$');
1745
1746 // and now use it
1747 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1748 #else // !wxUSE_REGEX
1749 // TODO: this is, of course, awfully inefficient...
1750
1751 // FIXME-UTF8: implement using iterators, remove #if
1752 #if wxUSE_UNICODE_UTF8
1753 wxWCharBuffer maskBuf = mask.wc_str();
1754 wxWCharBuffer txtBuf = wc_str();
1755 const wxChar *pszMask = maskBuf.data();
1756 const wxChar *pszTxt = txtBuf.data();
1757 #else
1758 const wxChar *pszMask = mask.wx_str();
1759 // the char currently being checked
1760 const wxChar *pszTxt = wx_str();
1761 #endif
1762
1763 // the last location where '*' matched
1764 const wxChar *pszLastStarInText = NULL;
1765 const wxChar *pszLastStarInMask = NULL;
1766
1767 match:
1768 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1769 switch ( *pszMask ) {
1770 case wxT('?'):
1771 if ( *pszTxt == wxT('\0') )
1772 return false;
1773
1774 // pszTxt and pszMask will be incremented in the loop statement
1775
1776 break;
1777
1778 case wxT('*'):
1779 {
1780 // remember where we started to be able to backtrack later
1781 pszLastStarInText = pszTxt;
1782 pszLastStarInMask = pszMask;
1783
1784 // ignore special chars immediately following this one
1785 // (should this be an error?)
1786 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1787 pszMask++;
1788
1789 // if there is nothing more, match
1790 if ( *pszMask == wxT('\0') )
1791 return true;
1792
1793 // are there any other metacharacters in the mask?
1794 size_t uiLenMask;
1795 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1796
1797 if ( pEndMask != NULL ) {
1798 // we have to match the string between two metachars
1799 uiLenMask = pEndMask - pszMask;
1800 }
1801 else {
1802 // we have to match the remainder of the string
1803 uiLenMask = wxStrlen(pszMask);
1804 }
1805
1806 wxString strToMatch(pszMask, uiLenMask);
1807 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1808 if ( pMatch == NULL )
1809 return false;
1810
1811 // -1 to compensate "++" in the loop
1812 pszTxt = pMatch + uiLenMask - 1;
1813 pszMask += uiLenMask - 1;
1814 }
1815 break;
1816
1817 default:
1818 if ( *pszMask != *pszTxt )
1819 return false;
1820 break;
1821 }
1822 }
1823
1824 // match only if nothing left
1825 if ( *pszTxt == wxT('\0') )
1826 return true;
1827
1828 // if we failed to match, backtrack if we can
1829 if ( pszLastStarInText ) {
1830 pszTxt = pszLastStarInText + 1;
1831 pszMask = pszLastStarInMask;
1832
1833 pszLastStarInText = NULL;
1834
1835 // don't bother resetting pszLastStarInMask, it's unnecessary
1836
1837 goto match;
1838 }
1839
1840 return false;
1841 #endif // wxUSE_REGEX/!wxUSE_REGEX
1842 }
1843
1844 // Count the number of chars
1845 int wxString::Freq(wxUniChar ch) const
1846 {
1847 int count = 0;
1848 for ( const_iterator i = begin(); i != end(); ++i )
1849 {
1850 if ( *i == ch )
1851 count ++;
1852 }
1853 return count;
1854 }
1855
1856 // convert to upper case, return the copy of the string
1857 wxString wxString::Upper() const
1858 { wxString s(*this); return s.MakeUpper(); }
1859
1860 // convert to lower case, return the copy of the string
1861 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
1862
1863 // ----------------------------------------------------------------------------
1864 // wxUTF8StringBuffer
1865 // ----------------------------------------------------------------------------
1866
1867 #if wxUSE_UNICODE_WCHAR
1868 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1869 {
1870 wxMBConvStrictUTF8 conv;
1871 size_t wlen = conv.ToWChar(NULL, 0, m_buf);
1872 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1873
1874 wxStringInternalBuffer wbuf(m_str, wlen);
1875 conv.ToWChar(wbuf, wlen, m_buf);
1876 }
1877
1878 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
1879 {
1880 wxCHECK_RET(m_lenSet, "length not set");
1881
1882 wxMBConvStrictUTF8 conv;
1883 size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
1884 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1885
1886 wxStringInternalBufferLength wbuf(m_str, wlen);
1887 conv.ToWChar(wbuf, wlen, m_buf, m_len);
1888 wbuf.SetLength(wlen);
1889 }
1890 #endif // wxUSE_UNICODE_WCHAR