]> git.saurik.com Git - wxWidgets.git/blob - src/common/string.cpp
cleanup
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
16
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #endif
28
29 #include <ctype.h>
30
31 #ifndef __WXWINCE__
32 #include <errno.h>
33 #endif
34
35 #include <string.h>
36 #include <stdlib.h>
37
38 #ifdef __SALFORDC__
39 #include <clib.h>
40 #endif
41
42 #include "wx/hashmap.h"
43
44 // string handling functions used by wxString:
45 #if wxUSE_UNICODE_UTF8
46 #define wxStringMemcpy memcpy
47 #define wxStringMemcmp memcmp
48 #define wxStringMemchr memchr
49 #define wxStringStrlen strlen
50 #else
51 #define wxStringMemcpy wxTmemcpy
52 #define wxStringMemcmp wxTmemcmp
53 #define wxStringMemchr wxTmemchr
54 #define wxStringStrlen wxStrlen
55 #endif
56
57
58 // ---------------------------------------------------------------------------
59 // static class variables definition
60 // ---------------------------------------------------------------------------
61
62 //According to STL _must_ be a -1 size_t
63 const size_t wxString::npos = (size_t) -1;
64
65 // ----------------------------------------------------------------------------
66 // global functions
67 // ----------------------------------------------------------------------------
68
69 #if wxUSE_STD_IOSTREAM
70
71 #include <iostream>
72
73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
74 {
75 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
76 return os << (const char *)str.AsCharBuf();
77 #else
78 return os << str.AsInternal();
79 #endif
80 }
81
82 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
83 {
84 return os << str.c_str();
85 }
86
87 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
88 {
89 return os << str.data();
90 }
91
92 #ifndef __BORLANDC__
93 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
94 {
95 return os << str.data();
96 }
97 #endif
98
99 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
100
101 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
102 {
103 return wos << str.wc_str();
104 }
105
106 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
107 {
108 return wos << str.AsWChar();
109 }
110
111 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
112 {
113 return wos << str.data();
114 }
115
116 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
117
118 #endif // wxUSE_STD_IOSTREAM
119
120 // ===========================================================================
121 // wxString class core
122 // ===========================================================================
123
124 #if wxUSE_UNICODE_UTF8
125
126 void wxString::PosLenToImpl(size_t pos, size_t len,
127 size_t *implPos, size_t *implLen) const
128 {
129 if ( pos == npos )
130 *implPos = npos;
131 else
132 {
133 const_iterator i = begin() + pos;
134 *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
135 if ( len == npos )
136 *implLen = npos;
137 else
138 {
139 // too large length is interpreted as "to the end of the string"
140 // FIXME-UTF8: verify this is the case in std::string, assert
141 // otherwise
142 if ( pos + len > length() )
143 len = length() - pos;
144
145 *implLen = (i + len).impl() - i.impl();
146 }
147 }
148 }
149
150 #endif // wxUSE_UNICODE_UTF8
151
152 // ----------------------------------------------------------------------------
153 // wxCStrData converted strings caching
154 // ----------------------------------------------------------------------------
155
156 // FIXME-UTF8: temporarily disabled because it doesn't work with global
157 // string objects; re-enable after fixing this bug and benchmarking
158 // performance to see if using a hash is a good idea at all
159 #if 0
160
161 // For backward compatibility reasons, it must be possible to assign the value
162 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
163 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
164 // because the memory would be freed immediately, but it has to be valid as long
165 // as the string is not modified, so that code like this still works:
166 //
167 // const wxChar *s = str.c_str();
168 // while ( s ) { ... }
169
170 // FIXME-UTF8: not thread safe!
171 // FIXME-UTF8: we currently clear the cached conversion only when the string is
172 // destroyed, but we should do it when the string is modified, to
173 // keep memory usage down
174 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
175 // invalidated the cache on every change, we could keep the previous
176 // conversion
177 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
178 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
179
180 template<typename T>
181 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
182 {
183 typename T::iterator i = hash.find(wxConstCast(s, wxString));
184 if ( i != hash.end() )
185 {
186 free(i->second);
187 hash.erase(i);
188 }
189 }
190
191 #if wxUSE_UNICODE
192 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
193 // so we have to use wxString* here and const-cast when used
194 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
195 wxStringCharConversionCache);
196 static wxStringCharConversionCache gs_stringsCharCache;
197
198 const char* wxCStrData::AsChar() const
199 {
200 // remove previously cache value, if any (see FIXMEs above):
201 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
202
203 // convert the string and keep it:
204 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
205 m_str->mb_str().release();
206
207 return s + m_offset;
208 }
209 #endif // wxUSE_UNICODE
210
211 #if !wxUSE_UNICODE_WCHAR
212 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
213 wxStringWCharConversionCache);
214 static wxStringWCharConversionCache gs_stringsWCharCache;
215
216 const wchar_t* wxCStrData::AsWChar() const
217 {
218 // remove previously cache value, if any (see FIXMEs above):
219 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
220
221 // convert the string and keep it:
222 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
223 m_str->wc_str().release();
224
225 return s + m_offset;
226 }
227 #endif // !wxUSE_UNICODE_WCHAR
228
229 wxString::~wxString()
230 {
231 #if wxUSE_UNICODE
232 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
233 DeleteStringFromConversionCache(gs_stringsCharCache, this);
234 #endif
235 #if !wxUSE_UNICODE_WCHAR
236 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
237 #endif
238 }
239 #endif
240
241 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
242 const char* wxCStrData::AsChar() const
243 {
244 #if wxUSE_UNICODE_UTF8
245 if ( wxLocaleIsUtf8 )
246 return AsInternal();
247 #endif
248 // under non-UTF8 locales, we have to convert the internal UTF-8
249 // representation using wxConvLibc and cache the result
250
251 wxString *str = wxConstCast(m_str, wxString);
252
253 // convert the string:
254 //
255 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
256 // have it) but it's unfortunately not obvious to implement
257 // because we don't know how big buffer do we need for the
258 // given string length (in case of multibyte encodings, e.g.
259 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
260 //
261 // One idea would be to store more than just m_convertedToChar
262 // in wxString: then we could record the length of the string
263 // which was converted the last time and try to reuse the same
264 // buffer if the current length is not greater than it (this
265 // could still fail because string could have been modified in
266 // place but it would work most of the time, so we'd do it and
267 // only allocate the new buffer if in-place conversion returned
268 // an error). We could also store a bit saying if the string
269 // was modified since the last conversion (and update it in all
270 // operation modifying the string, of course) to avoid unneeded
271 // consequential conversions. But both of these ideas require
272 // adding more fields to wxString and require profiling results
273 // to be sure that we really gain enough from them to justify
274 // doing it.
275 wxCharBuffer buf(str->mb_str());
276
277 // if it failed, return empty string and not NULL to avoid crashes in code
278 // written with either wxWidgets 2 wxString or std::string behaviour in
279 // mind: neither of them ever returns NULL and so we shouldn't neither
280 if ( !buf )
281 return "";
282
283 if ( str->m_convertedToChar &&
284 strlen(buf) == strlen(str->m_convertedToChar) )
285 {
286 // keep the same buffer for as long as possible, so that several calls
287 // to c_str() in a row still work:
288 strcpy(str->m_convertedToChar, buf);
289 }
290 else
291 {
292 str->m_convertedToChar = buf.release();
293 }
294
295 // and keep it:
296 return str->m_convertedToChar + m_offset;
297 }
298 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
299
300 #if !wxUSE_UNICODE_WCHAR
301 const wchar_t* wxCStrData::AsWChar() const
302 {
303 wxString *str = wxConstCast(m_str, wxString);
304
305 // convert the string:
306 wxWCharBuffer buf(str->wc_str());
307
308 // notice that here, unlike above in AsChar(), conversion can't fail as our
309 // internal UTF-8 is always well-formed -- or the string was corrupted and
310 // all bets are off anyhow
311
312 // FIXME-UTF8: do the conversion in-place in the existing buffer
313 if ( str->m_convertedToWChar &&
314 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
315 {
316 // keep the same buffer for as long as possible, so that several calls
317 // to c_str() in a row still work:
318 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
319 }
320 else
321 {
322 str->m_convertedToWChar = buf.release();
323 }
324
325 // and keep it:
326 return str->m_convertedToWChar + m_offset;
327 }
328 #endif // !wxUSE_UNICODE_WCHAR
329
330 // ===========================================================================
331 // wxString class core
332 // ===========================================================================
333
334 // ---------------------------------------------------------------------------
335 // construction and conversion
336 // ---------------------------------------------------------------------------
337
338 #if wxUSE_UNICODE_WCHAR
339 /* static */
340 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
341 const wxMBConv& conv)
342 {
343 // anything to do?
344 if ( !psz || nLength == 0 )
345 return SubstrBufFromMB(L"", 0);
346
347 if ( nLength == npos )
348 nLength = wxNO_LEN;
349
350 size_t wcLen;
351 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
352 if ( !wcLen )
353 return SubstrBufFromMB(_T(""), 0);
354 else
355 return SubstrBufFromMB(wcBuf, wcLen);
356 }
357 #endif // wxUSE_UNICODE_WCHAR
358
359 #if wxUSE_UNICODE_UTF8
360 /* static */
361 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
362 const wxMBConv& conv)
363 {
364 // anything to do?
365 if ( !psz || nLength == 0 )
366 return SubstrBufFromMB("", 0);
367
368 // if psz is already in UTF-8, we don't have to do the roundtrip to
369 // wchar_t* and back:
370 if ( conv.IsUTF8() )
371 {
372 // we need to validate the input because UTF8 iterators assume valid
373 // UTF-8 sequence and psz may be invalid:
374 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
375 {
376 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
377 }
378 // else: do the roundtrip through wchar_t*
379 }
380
381 if ( nLength == npos )
382 nLength = wxNO_LEN;
383
384 // first convert to wide string:
385 size_t wcLen;
386 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
387 if ( !wcLen )
388 return SubstrBufFromMB("", 0);
389
390 // and then to UTF-8:
391 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
392 // widechar -> UTF-8 conversion isn't supposed to ever fail:
393 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
394
395 return buf;
396 }
397 #endif // wxUSE_UNICODE_UTF8
398
399 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
400 /* static */
401 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
402 const wxMBConv& conv)
403 {
404 // anything to do?
405 if ( !pwz || nLength == 0 )
406 return SubstrBufFromWC("", 0);
407
408 if ( nLength == npos )
409 nLength = wxNO_LEN;
410
411 size_t mbLen;
412 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
413 if ( !mbLen )
414 return SubstrBufFromWC("", 0);
415 else
416 return SubstrBufFromWC(mbBuf, mbLen);
417 }
418 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
419
420
421 #if wxUSE_UNICODE_WCHAR
422
423 //Convert wxString in Unicode mode to a multi-byte string
424 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
425 {
426 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
427 }
428
429 #elif wxUSE_UNICODE_UTF8
430
431 const wxWCharBuffer wxString::wc_str() const
432 {
433 return wxMBConvStrictUTF8().cMB2WC
434 (
435 m_impl.c_str(),
436 m_impl.length() + 1, // size, not length
437 NULL
438 );
439 }
440
441 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
442 {
443 if ( conv.IsUTF8() )
444 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
445
446 // FIXME-UTF8: use wc_str() here once we have buffers with length
447
448 size_t wcLen;
449 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
450 (
451 m_impl.c_str(),
452 m_impl.length() + 1, // size
453 &wcLen
454 ));
455 if ( !wcLen )
456 return wxCharBuffer("");
457
458 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
459 }
460
461 #else // ANSI
462
463 //Converts this string to a wide character string if unicode
464 //mode is not enabled and wxUSE_WCHAR_T is enabled
465 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
466 {
467 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
468 }
469
470 #endif // Unicode/ANSI
471
472 // shrink to minimal size (releasing extra memory)
473 bool wxString::Shrink()
474 {
475 wxString tmp(begin(), end());
476 swap(tmp);
477 return tmp.length() == length();
478 }
479
480 // deprecated compatibility code:
481 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
482 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
483 {
484 return DoGetWriteBuf(nLen);
485 }
486
487 void wxString::UngetWriteBuf()
488 {
489 DoUngetWriteBuf();
490 }
491
492 void wxString::UngetWriteBuf(size_t nLen)
493 {
494 DoUngetWriteBuf(nLen);
495 }
496 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
497
498
499 // ---------------------------------------------------------------------------
500 // data access
501 // ---------------------------------------------------------------------------
502
503 // all functions are inline in string.h
504
505 // ---------------------------------------------------------------------------
506 // concatenation operators
507 // ---------------------------------------------------------------------------
508
509 /*
510 * concatenation functions come in 5 flavours:
511 * string + string
512 * char + string and string + char
513 * C str + string and string + C str
514 */
515
516 wxString operator+(const wxString& str1, const wxString& str2)
517 {
518 #if !wxUSE_STL_BASED_WXSTRING
519 wxASSERT( str1.IsValid() );
520 wxASSERT( str2.IsValid() );
521 #endif
522
523 wxString s = str1;
524 s += str2;
525
526 return s;
527 }
528
529 wxString operator+(const wxString& str, wxUniChar ch)
530 {
531 #if !wxUSE_STL_BASED_WXSTRING
532 wxASSERT( str.IsValid() );
533 #endif
534
535 wxString s = str;
536 s += ch;
537
538 return s;
539 }
540
541 wxString operator+(wxUniChar ch, const wxString& str)
542 {
543 #if !wxUSE_STL_BASED_WXSTRING
544 wxASSERT( str.IsValid() );
545 #endif
546
547 wxString s = ch;
548 s += str;
549
550 return s;
551 }
552
553 wxString operator+(const wxString& str, const char *psz)
554 {
555 #if !wxUSE_STL_BASED_WXSTRING
556 wxASSERT( str.IsValid() );
557 #endif
558
559 wxString s;
560 if ( !s.Alloc(strlen(psz) + str.length()) ) {
561 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
562 }
563 s += str;
564 s += psz;
565
566 return s;
567 }
568
569 wxString operator+(const wxString& str, const wchar_t *pwz)
570 {
571 #if !wxUSE_STL_BASED_WXSTRING
572 wxASSERT( str.IsValid() );
573 #endif
574
575 wxString s;
576 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
577 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
578 }
579 s += str;
580 s += pwz;
581
582 return s;
583 }
584
585 wxString operator+(const char *psz, const wxString& str)
586 {
587 #if !wxUSE_STL_BASED_WXSTRING
588 wxASSERT( str.IsValid() );
589 #endif
590
591 wxString s;
592 if ( !s.Alloc(strlen(psz) + str.length()) ) {
593 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
594 }
595 s = psz;
596 s += str;
597
598 return s;
599 }
600
601 wxString operator+(const wchar_t *pwz, const wxString& str)
602 {
603 #if !wxUSE_STL_BASED_WXSTRING
604 wxASSERT( str.IsValid() );
605 #endif
606
607 wxString s;
608 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
609 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
610 }
611 s = pwz;
612 s += str;
613
614 return s;
615 }
616
617 // ---------------------------------------------------------------------------
618 // string comparison
619 // ---------------------------------------------------------------------------
620
621 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
622 {
623 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
624 : wxToupper(GetChar(0u)) == wxToupper(c));
625 }
626
627 #ifdef HAVE_STD_STRING_COMPARE
628
629 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
630 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
631 // sort strings in characters code point order by sorting the byte sequence
632 // in byte values order (i.e. what strcmp() and memcmp() do).
633
634 int wxString::compare(const wxString& str) const
635 {
636 return m_impl.compare(str.m_impl);
637 }
638
639 int wxString::compare(size_t nStart, size_t nLen,
640 const wxString& str) const
641 {
642 size_t pos, len;
643 PosLenToImpl(nStart, nLen, &pos, &len);
644 return m_impl.compare(pos, len, str.m_impl);
645 }
646
647 int wxString::compare(size_t nStart, size_t nLen,
648 const wxString& str,
649 size_t nStart2, size_t nLen2) const
650 {
651 size_t pos, len;
652 PosLenToImpl(nStart, nLen, &pos, &len);
653
654 size_t pos2, len2;
655 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
656
657 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
658 }
659
660 int wxString::compare(const char* sz) const
661 {
662 return m_impl.compare(ImplStr(sz));
663 }
664
665 int wxString::compare(const wchar_t* sz) const
666 {
667 return m_impl.compare(ImplStr(sz));
668 }
669
670 int wxString::compare(size_t nStart, size_t nLen,
671 const char* sz, size_t nCount) const
672 {
673 size_t pos, len;
674 PosLenToImpl(nStart, nLen, &pos, &len);
675
676 SubstrBufFromMB str(ImplStr(sz, nCount));
677
678 return m_impl.compare(pos, len, str.data, str.len);
679 }
680
681 int wxString::compare(size_t nStart, size_t nLen,
682 const wchar_t* sz, size_t nCount) const
683 {
684 size_t pos, len;
685 PosLenToImpl(nStart, nLen, &pos, &len);
686
687 SubstrBufFromWC str(ImplStr(sz, nCount));
688
689 return m_impl.compare(pos, len, str.data, str.len);
690 }
691
692 #else // !HAVE_STD_STRING_COMPARE
693
694 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
695 const wxStringCharType* s2, size_t l2)
696 {
697 if( l1 == l2 )
698 return wxStringMemcmp(s1, s2, l1);
699 else if( l1 < l2 )
700 {
701 int ret = wxStringMemcmp(s1, s2, l1);
702 return ret == 0 ? -1 : ret;
703 }
704 else
705 {
706 int ret = wxStringMemcmp(s1, s2, l2);
707 return ret == 0 ? +1 : ret;
708 }
709 }
710
711 int wxString::compare(const wxString& str) const
712 {
713 return ::wxDoCmp(m_impl.data(), m_impl.length(),
714 str.m_impl.data(), str.m_impl.length());
715 }
716
717 int wxString::compare(size_t nStart, size_t nLen,
718 const wxString& str) const
719 {
720 wxASSERT(nStart <= length());
721 size_type strLen = length() - nStart;
722 nLen = strLen < nLen ? strLen : nLen;
723
724 size_t pos, len;
725 PosLenToImpl(nStart, nLen, &pos, &len);
726
727 return ::wxDoCmp(m_impl.data() + pos, len,
728 str.m_impl.data(), str.m_impl.length());
729 }
730
731 int wxString::compare(size_t nStart, size_t nLen,
732 const wxString& str,
733 size_t nStart2, size_t nLen2) const
734 {
735 wxASSERT(nStart <= length());
736 wxASSERT(nStart2 <= str.length());
737 size_type strLen = length() - nStart,
738 strLen2 = str.length() - nStart2;
739 nLen = strLen < nLen ? strLen : nLen;
740 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
741
742 size_t pos, len;
743 PosLenToImpl(nStart, nLen, &pos, &len);
744 size_t pos2, len2;
745 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
746
747 return ::wxDoCmp(m_impl.data() + pos, len,
748 str.m_impl.data() + pos2, len2);
749 }
750
751 int wxString::compare(const char* sz) const
752 {
753 SubstrBufFromMB str(ImplStr(sz, npos));
754 if ( str.len == npos )
755 str.len = wxStringStrlen(str.data);
756 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
757 }
758
759 int wxString::compare(const wchar_t* sz) const
760 {
761 SubstrBufFromWC str(ImplStr(sz, npos));
762 if ( str.len == npos )
763 str.len = wxStringStrlen(str.data);
764 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
765 }
766
767 int wxString::compare(size_t nStart, size_t nLen,
768 const char* sz, size_t nCount) const
769 {
770 wxASSERT(nStart <= length());
771 size_type strLen = length() - nStart;
772 nLen = strLen < nLen ? strLen : nLen;
773
774 size_t pos, len;
775 PosLenToImpl(nStart, nLen, &pos, &len);
776
777 SubstrBufFromMB str(ImplStr(sz, nCount));
778 if ( str.len == npos )
779 str.len = wxStringStrlen(str.data);
780
781 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
782 }
783
784 int wxString::compare(size_t nStart, size_t nLen,
785 const wchar_t* sz, size_t nCount) const
786 {
787 wxASSERT(nStart <= length());
788 size_type strLen = length() - nStart;
789 nLen = strLen < nLen ? strLen : nLen;
790
791 size_t pos, len;
792 PosLenToImpl(nStart, nLen, &pos, &len);
793
794 SubstrBufFromWC str(ImplStr(sz, nCount));
795 if ( str.len == npos )
796 str.len = wxStringStrlen(str.data);
797
798 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
799 }
800
801 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
802
803
804 // ---------------------------------------------------------------------------
805 // find_{first,last}_[not]_of functions
806 // ---------------------------------------------------------------------------
807
808 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
809
810 // NB: All these functions are implemented with the argument being wxChar*,
811 // i.e. widechar string in any Unicode build, even though native string
812 // representation is char* in the UTF-8 build. This is because we couldn't
813 // use memchr() to determine if a character is in a set encoded as UTF-8.
814
815 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
816 {
817 return find_first_of(sz, nStart, wxStrlen(sz));
818 }
819
820 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
821 {
822 return find_first_not_of(sz, nStart, wxStrlen(sz));
823 }
824
825 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
826 {
827 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
828
829 size_t idx = nStart;
830 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
831 {
832 if ( wxTmemchr(sz, *i, n) )
833 return idx;
834 }
835
836 return npos;
837 }
838
839 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
840 {
841 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
842
843 size_t idx = nStart;
844 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
845 {
846 if ( !wxTmemchr(sz, *i, n) )
847 return idx;
848 }
849
850 return npos;
851 }
852
853
854 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
855 {
856 return find_last_of(sz, nStart, wxStrlen(sz));
857 }
858
859 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
860 {
861 return find_last_not_of(sz, nStart, wxStrlen(sz));
862 }
863
864 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
865 {
866 size_t len = length();
867
868 if ( nStart == npos )
869 {
870 nStart = len - 1;
871 }
872 else
873 {
874 wxASSERT_MSG( nStart <= len, _T("invalid index") );
875 }
876
877 size_t idx = nStart;
878 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
879 i != rend(); --idx, ++i )
880 {
881 if ( wxTmemchr(sz, *i, n) )
882 return idx;
883 }
884
885 return npos;
886 }
887
888 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
889 {
890 size_t len = length();
891
892 if ( nStart == npos )
893 {
894 nStart = len - 1;
895 }
896 else
897 {
898 wxASSERT_MSG( nStart <= len, _T("invalid index") );
899 }
900
901 size_t idx = nStart;
902 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
903 i != rend(); --idx, ++i )
904 {
905 if ( !wxTmemchr(sz, *i, n) )
906 return idx;
907 }
908
909 return npos;
910 }
911
912 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
913 {
914 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
915
916 size_t idx = nStart;
917 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
918 {
919 if ( *i != ch )
920 return idx;
921 }
922
923 return npos;
924 }
925
926 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
927 {
928 size_t len = length();
929
930 if ( nStart == npos )
931 {
932 nStart = len - 1;
933 }
934 else
935 {
936 wxASSERT_MSG( nStart <= len, _T("invalid index") );
937 }
938
939 size_t idx = nStart;
940 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
941 i != rend(); --idx, ++i )
942 {
943 if ( *i != ch )
944 return idx;
945 }
946
947 return npos;
948 }
949
950 // the functions above were implemented for wchar_t* arguments in Unicode
951 // build and char* in ANSI build; below are implementations for the other
952 // version:
953 #if wxUSE_UNICODE
954 #define wxOtherCharType char
955 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
956 #else
957 #define wxOtherCharType wchar_t
958 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
959 #endif
960
961 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
962 { return find_first_of(STRCONV(sz), nStart); }
963
964 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
965 size_t n) const
966 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
967 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
968 { return find_last_of(STRCONV(sz), nStart); }
969 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
970 size_t n) const
971 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
972 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
973 { return find_first_not_of(STRCONV(sz), nStart); }
974 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
975 size_t n) const
976 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
977 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
978 { return find_last_not_of(STRCONV(sz), nStart); }
979 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
980 size_t n) const
981 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
982
983 #undef wxOtherCharType
984 #undef STRCONV
985
986 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
987
988 // ===========================================================================
989 // other common string functions
990 // ===========================================================================
991
992 int wxString::CmpNoCase(const wxString& s) const
993 {
994 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
995
996 const_iterator i1 = begin();
997 const_iterator end1 = end();
998 const_iterator i2 = s.begin();
999 const_iterator end2 = s.end();
1000
1001 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1002 {
1003 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1004 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1005 if ( lower1 != lower2 )
1006 return lower1 < lower2 ? -1 : 1;
1007 }
1008
1009 size_t len1 = length();
1010 size_t len2 = s.length();
1011
1012 if ( len1 < len2 )
1013 return -1;
1014 else if ( len1 > len2 )
1015 return 1;
1016 return 0;
1017 }
1018
1019
1020 #if wxUSE_UNICODE
1021
1022 #ifdef __MWERKS__
1023 #ifndef __SCHAR_MAX__
1024 #define __SCHAR_MAX__ 127
1025 #endif
1026 #endif
1027
1028 wxString wxString::FromAscii(const char *ascii, size_t len)
1029 {
1030 if (!ascii || len == 0)
1031 return wxEmptyString;
1032
1033 wxString res;
1034
1035 {
1036 wxStringInternalBuffer buf(res, len);
1037 wxStringCharType *dest = buf;
1038
1039 for ( ; len > 0; --len )
1040 {
1041 unsigned char c = (unsigned char)*ascii++;
1042 wxASSERT_MSG( c < 0x80,
1043 _T("Non-ASCII value passed to FromAscii().") );
1044
1045 *dest++ = (wchar_t)c;
1046 }
1047 }
1048
1049 return res;
1050 }
1051
1052 wxString wxString::FromAscii(const char *ascii)
1053 {
1054 return FromAscii(ascii, wxStrlen(ascii));
1055 }
1056
1057 wxString wxString::FromAscii(char ascii)
1058 {
1059 // What do we do with '\0' ?
1060
1061 unsigned char c = (unsigned char)ascii;
1062
1063 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1064
1065 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1066 return wxString(wxUniChar((wchar_t)c));
1067 }
1068
1069 const wxCharBuffer wxString::ToAscii() const
1070 {
1071 // this will allocate enough space for the terminating NUL too
1072 wxCharBuffer buffer(length());
1073 char *dest = buffer.data();
1074
1075 for ( const_iterator i = begin(); i != end(); ++i )
1076 {
1077 wxUniChar c(*i);
1078 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1079 *dest++ = c.IsAscii() ? (char)c : '_';
1080
1081 // the output string can't have embedded NULs anyhow, so we can safely
1082 // stop at first of them even if we do have any
1083 if ( !c )
1084 break;
1085 }
1086
1087 return buffer;
1088 }
1089
1090 #endif // wxUSE_UNICODE
1091
1092 // extract string of length nCount starting at nFirst
1093 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1094 {
1095 size_t nLen = length();
1096
1097 // default value of nCount is npos and means "till the end"
1098 if ( nCount == npos )
1099 {
1100 nCount = nLen - nFirst;
1101 }
1102
1103 // out-of-bounds requests return sensible things
1104 if ( nFirst + nCount > nLen )
1105 {
1106 nCount = nLen - nFirst;
1107 }
1108
1109 if ( nFirst > nLen )
1110 {
1111 // AllocCopy() will return empty string
1112 return wxEmptyString;
1113 }
1114
1115 wxString dest(*this, nFirst, nCount);
1116 if ( dest.length() != nCount )
1117 {
1118 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1119 }
1120
1121 return dest;
1122 }
1123
1124 // check that the string starts with prefix and return the rest of the string
1125 // in the provided pointer if it is not NULL, otherwise return false
1126 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1127 {
1128 if ( compare(0, prefix.length(), prefix) != 0 )
1129 return false;
1130
1131 if ( rest )
1132 {
1133 // put the rest of the string into provided pointer
1134 rest->assign(*this, prefix.length(), npos);
1135 }
1136
1137 return true;
1138 }
1139
1140
1141 // check that the string ends with suffix and return the rest of it in the
1142 // provided pointer if it is not NULL, otherwise return false
1143 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1144 {
1145 int start = length() - suffix.length();
1146
1147 if ( start < 0 || compare(start, npos, suffix) != 0 )
1148 return false;
1149
1150 if ( rest )
1151 {
1152 // put the rest of the string into provided pointer
1153 rest->assign(*this, 0, start);
1154 }
1155
1156 return true;
1157 }
1158
1159
1160 // extract nCount last (rightmost) characters
1161 wxString wxString::Right(size_t nCount) const
1162 {
1163 if ( nCount > length() )
1164 nCount = length();
1165
1166 wxString dest(*this, length() - nCount, nCount);
1167 if ( dest.length() != nCount ) {
1168 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1169 }
1170 return dest;
1171 }
1172
1173 // get all characters after the last occurence of ch
1174 // (returns the whole string if ch not found)
1175 wxString wxString::AfterLast(wxUniChar ch) const
1176 {
1177 wxString str;
1178 int iPos = Find(ch, true);
1179 if ( iPos == wxNOT_FOUND )
1180 str = *this;
1181 else
1182 str = wx_str() + iPos + 1;
1183
1184 return str;
1185 }
1186
1187 // extract nCount first (leftmost) characters
1188 wxString wxString::Left(size_t nCount) const
1189 {
1190 if ( nCount > length() )
1191 nCount = length();
1192
1193 wxString dest(*this, 0, nCount);
1194 if ( dest.length() != nCount ) {
1195 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1196 }
1197 return dest;
1198 }
1199
1200 // get all characters before the first occurence of ch
1201 // (returns the whole string if ch not found)
1202 wxString wxString::BeforeFirst(wxUniChar ch) const
1203 {
1204 int iPos = Find(ch);
1205 if ( iPos == wxNOT_FOUND ) iPos = length();
1206 return wxString(*this, 0, iPos);
1207 }
1208
1209 /// get all characters before the last occurence of ch
1210 /// (returns empty string if ch not found)
1211 wxString wxString::BeforeLast(wxUniChar ch) const
1212 {
1213 wxString str;
1214 int iPos = Find(ch, true);
1215 if ( iPos != wxNOT_FOUND && iPos != 0 )
1216 str = wxString(c_str(), iPos);
1217
1218 return str;
1219 }
1220
1221 /// get all characters after the first occurence of ch
1222 /// (returns empty string if ch not found)
1223 wxString wxString::AfterFirst(wxUniChar ch) const
1224 {
1225 wxString str;
1226 int iPos = Find(ch);
1227 if ( iPos != wxNOT_FOUND )
1228 str = wx_str() + iPos + 1;
1229
1230 return str;
1231 }
1232
1233 // replace first (or all) occurences of some substring with another one
1234 size_t wxString::Replace(const wxString& strOld,
1235 const wxString& strNew, bool bReplaceAll)
1236 {
1237 // if we tried to replace an empty string we'd enter an infinite loop below
1238 wxCHECK_MSG( !strOld.empty(), 0,
1239 _T("wxString::Replace(): invalid parameter") );
1240
1241 size_t uiCount = 0; // count of replacements made
1242
1243 size_t uiOldLen = strOld.length();
1244 size_t uiNewLen = strNew.length();
1245
1246 size_t dwPos = 0;
1247
1248 while ( (*this)[dwPos] != wxT('\0') )
1249 {
1250 //DO NOT USE STRSTR HERE
1251 //this string can contain embedded null characters,
1252 //so strstr will function incorrectly
1253 dwPos = find(strOld, dwPos);
1254 if ( dwPos == npos )
1255 break; // exit the loop
1256 else
1257 {
1258 //replace this occurance of the old string with the new one
1259 replace(dwPos, uiOldLen, strNew, uiNewLen);
1260
1261 //move up pos past the string that was replaced
1262 dwPos += uiNewLen;
1263
1264 //increase replace count
1265 ++uiCount;
1266
1267 // stop now?
1268 if ( !bReplaceAll )
1269 break; // exit the loop
1270 }
1271 }
1272
1273 return uiCount;
1274 }
1275
1276 bool wxString::IsAscii() const
1277 {
1278 for ( const_iterator i = begin(); i != end(); ++i )
1279 {
1280 if ( !(*i).IsAscii() )
1281 return false;
1282 }
1283
1284 return true;
1285 }
1286
1287 bool wxString::IsWord() const
1288 {
1289 for ( const_iterator i = begin(); i != end(); ++i )
1290 {
1291 if ( !wxIsalpha(*i) )
1292 return false;
1293 }
1294
1295 return true;
1296 }
1297
1298 bool wxString::IsNumber() const
1299 {
1300 if ( empty() )
1301 return true;
1302
1303 const_iterator i = begin();
1304
1305 if ( *i == _T('-') || *i == _T('+') )
1306 ++i;
1307
1308 for ( ; i != end(); ++i )
1309 {
1310 if ( !wxIsdigit(*i) )
1311 return false;
1312 }
1313
1314 return true;
1315 }
1316
1317 wxString wxString::Strip(stripType w) const
1318 {
1319 wxString s = *this;
1320 if ( w & leading ) s.Trim(false);
1321 if ( w & trailing ) s.Trim(true);
1322 return s;
1323 }
1324
1325 // ---------------------------------------------------------------------------
1326 // case conversion
1327 // ---------------------------------------------------------------------------
1328
1329 wxString& wxString::MakeUpper()
1330 {
1331 for ( iterator it = begin(), en = end(); it != en; ++it )
1332 *it = (wxChar)wxToupper(*it);
1333
1334 return *this;
1335 }
1336
1337 wxString& wxString::MakeLower()
1338 {
1339 for ( iterator it = begin(), en = end(); it != en; ++it )
1340 *it = (wxChar)wxTolower(*it);
1341
1342 return *this;
1343 }
1344
1345 // ---------------------------------------------------------------------------
1346 // trimming and padding
1347 // ---------------------------------------------------------------------------
1348
1349 // some compilers (VC++ 6.0 not to name them) return true for a call to
1350 // isspace('ê') in the C locale which seems to be broken to me, but we have to
1351 // live with this by checking that the character is a 7 bit one - even if this
1352 // may fail to detect some spaces (I don't know if Unicode doesn't have
1353 // space-like symbols somewhere except in the first 128 chars), it is arguably
1354 // still better than trimming away accented letters
1355 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1356
1357 // trims spaces (in the sense of isspace) from left or right side
1358 wxString& wxString::Trim(bool bFromRight)
1359 {
1360 // first check if we're going to modify the string at all
1361 if ( !empty() &&
1362 (
1363 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1364 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1365 )
1366 )
1367 {
1368 if ( bFromRight )
1369 {
1370 // find last non-space character
1371 reverse_iterator psz = rbegin();
1372 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1373 ++psz;
1374
1375 // truncate at trailing space start
1376 erase(psz.base(), end());
1377 }
1378 else
1379 {
1380 // find first non-space character
1381 iterator psz = begin();
1382 while ( (psz != end()) && wxSafeIsspace(*psz) )
1383 ++psz;
1384
1385 // fix up data and length
1386 erase(begin(), psz);
1387 }
1388 }
1389
1390 return *this;
1391 }
1392
1393 // adds nCount characters chPad to the string from either side
1394 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1395 {
1396 wxString s(chPad, nCount);
1397
1398 if ( bFromRight )
1399 *this += s;
1400 else
1401 {
1402 s += *this;
1403 swap(s);
1404 }
1405
1406 return *this;
1407 }
1408
1409 // truncate the string
1410 wxString& wxString::Truncate(size_t uiLen)
1411 {
1412 if ( uiLen < length() )
1413 {
1414 erase(begin() + uiLen, end());
1415 }
1416 //else: nothing to do, string is already short enough
1417
1418 return *this;
1419 }
1420
1421 // ---------------------------------------------------------------------------
1422 // finding (return wxNOT_FOUND if not found and index otherwise)
1423 // ---------------------------------------------------------------------------
1424
1425 // find a character
1426 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1427 {
1428 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1429
1430 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1431 }
1432
1433 // ----------------------------------------------------------------------------
1434 // conversion to numbers
1435 // ----------------------------------------------------------------------------
1436
1437 // The implementation of all the functions below is exactly the same so factor
1438 // it out. Note that number extraction works correctly on UTF-8 strings, so
1439 // we can use wxStringCharType and wx_str() for maximum efficiency.
1440
1441 #ifndef __WXWINCE__
1442 #define DO_IF_NOT_WINCE(x) x
1443 #else
1444 #define DO_IF_NOT_WINCE(x)
1445 #endif
1446
1447 #define WX_STRING_TO_INT_TYPE(val, base, func) \
1448 wxCHECK_MSG( val, false, _T("NULL output pointer") ); \
1449 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1450 \
1451 DO_IF_NOT_WINCE( errno = 0; ) \
1452 \
1453 const wxStringCharType *start = wx_str(); \
1454 wxStringCharType *end; \
1455 *val = func(start, &end, base); \
1456 \
1457 /* return true only if scan was stopped by the terminating NUL and */ \
1458 /* if the string was not empty to start with and no under/overflow */ \
1459 /* occurred: */ \
1460 return !*end && (end != start) \
1461 DO_IF_NOT_WINCE( && (errno != ERANGE) )
1462
1463 bool wxString::ToLong(long *val, int base) const
1464 {
1465 WX_STRING_TO_INT_TYPE(val, base, wxStrtol);
1466 }
1467
1468 bool wxString::ToULong(unsigned long *val, int base) const
1469 {
1470 WX_STRING_TO_INT_TYPE(val, base, wxStrtoul);
1471 }
1472
1473 bool wxString::ToLongLong(wxLongLong_t *val, int base) const
1474 {
1475 WX_STRING_TO_INT_TYPE(val, base, wxStrtoll);
1476 }
1477
1478 bool wxString::ToULongLong(wxULongLong_t *val, int base) const
1479 {
1480 WX_STRING_TO_INT_TYPE(val, base, wxStrtoull);
1481 }
1482
1483 bool wxString::ToDouble(double *val) const
1484 {
1485 wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
1486
1487 #ifndef __WXWINCE__
1488 errno = 0;
1489 #endif
1490
1491 const wxChar *start = c_str();
1492 wxChar *end;
1493 *val = wxStrtod(start, &end);
1494
1495 // return true only if scan was stopped by the terminating NUL and if the
1496 // string was not empty to start with and no under/overflow occurred
1497 return !*end && (end != start)
1498 #ifndef __WXWINCE__
1499 && (errno != ERANGE)
1500 #endif
1501 ;
1502 }
1503
1504 // ---------------------------------------------------------------------------
1505 // formatted output
1506 // ---------------------------------------------------------------------------
1507
1508 #if !wxUSE_UTF8_LOCALE_ONLY
1509 /* static */
1510 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1511 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1512 #else
1513 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1514 #endif
1515 {
1516 va_list argptr;
1517 va_start(argptr, format);
1518
1519 wxString s;
1520 s.PrintfV(format, argptr);
1521
1522 va_end(argptr);
1523
1524 return s;
1525 }
1526 #endif // !wxUSE_UTF8_LOCALE_ONLY
1527
1528 #if wxUSE_UNICODE_UTF8
1529 /* static */
1530 wxString wxString::DoFormatUtf8(const char *format, ...)
1531 {
1532 va_list argptr;
1533 va_start(argptr, format);
1534
1535 wxString s;
1536 s.PrintfV(format, argptr);
1537
1538 va_end(argptr);
1539
1540 return s;
1541 }
1542 #endif // wxUSE_UNICODE_UTF8
1543
1544 /* static */
1545 wxString wxString::FormatV(const wxString& format, va_list argptr)
1546 {
1547 wxString s;
1548 s.PrintfV(format, argptr);
1549 return s;
1550 }
1551
1552 #if !wxUSE_UTF8_LOCALE_ONLY
1553 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1554 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1555 #else
1556 int wxString::DoPrintfWchar(const wxChar *format, ...)
1557 #endif
1558 {
1559 va_list argptr;
1560 va_start(argptr, format);
1561
1562 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1563 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1564 // because it's the only cast that works safely for downcasting when
1565 // multiple inheritance is used:
1566 wxString *str = static_cast<wxString*>(this);
1567 #else
1568 wxString *str = this;
1569 #endif
1570
1571 int iLen = str->PrintfV(format, argptr);
1572
1573 va_end(argptr);
1574
1575 return iLen;
1576 }
1577 #endif // !wxUSE_UTF8_LOCALE_ONLY
1578
1579 #if wxUSE_UNICODE_UTF8
1580 int wxString::DoPrintfUtf8(const char *format, ...)
1581 {
1582 va_list argptr;
1583 va_start(argptr, format);
1584
1585 int iLen = PrintfV(format, argptr);
1586
1587 va_end(argptr);
1588
1589 return iLen;
1590 }
1591 #endif // wxUSE_UNICODE_UTF8
1592
1593 /*
1594 Uses wxVsnprintf and places the result into the this string.
1595
1596 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1597 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1598 the ISO C99 (and thus SUSv3) standard the return value for the case of
1599 an undersized buffer is inconsistent. For conforming vsnprintf
1600 implementations the function must return the number of characters that
1601 would have been printed had the buffer been large enough. For conforming
1602 vswprintf implementations the function must return a negative number
1603 and set errno.
1604
1605 What vswprintf sets errno to is undefined but Darwin seems to set it to
1606 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1607 those are defined in the standard and backed up by several conformance
1608 statements. Note that ENOMEM mentioned in the manual page does not
1609 apply to swprintf, only wprintf and fwprintf.
1610
1611 Official manual page:
1612 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1613
1614 Some conformance statements (AIX, Solaris):
1615 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1616 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1617
1618 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1619 EILSEQ and EINVAL are specifically defined to mean the error is other than
1620 an undersized buffer and no other errno are defined we treat those two
1621 as meaning hard errors and everything else gets the old behavior which
1622 is to keep looping and increasing buffer size until the function succeeds.
1623
1624 In practice it's impossible to determine before compilation which behavior
1625 may be used. The vswprintf function may have vsnprintf-like behavior or
1626 vice-versa. Behavior detected on one release can theoretically change
1627 with an updated release. Not to mention that configure testing for it
1628 would require the test to be run on the host system, not the build system
1629 which makes cross compilation difficult. Therefore, we make no assumptions
1630 about behavior and try our best to handle every known case, including the
1631 case where wxVsnprintf returns a negative number and fails to set errno.
1632
1633 There is yet one more non-standard implementation and that is our own.
1634 Fortunately, that can be detected at compile-time.
1635
1636 On top of all that, ISO C99 explicitly defines snprintf to write a null
1637 character to the last position of the specified buffer. That would be at
1638 at the given buffer size minus 1. It is supposed to do this even if it
1639 turns out that the buffer is sized too small.
1640
1641 Darwin (tested on 10.5) follows the C99 behavior exactly.
1642
1643 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1644 errno even when it fails. However, it only seems to ever fail due
1645 to an undersized buffer.
1646 */
1647 #if wxUSE_UNICODE_UTF8
1648 template<typename BufferType>
1649 #else
1650 // we only need one version in non-UTF8 builds and at least two Windows
1651 // compilers have problems with this function template, so use just one
1652 // normal function here
1653 #endif
1654 static int DoStringPrintfV(wxString& str,
1655 const wxString& format, va_list argptr)
1656 {
1657 int size = 1024;
1658
1659 for ( ;; )
1660 {
1661 #if wxUSE_UNICODE_UTF8
1662 BufferType tmp(str, size + 1);
1663 typename BufferType::CharType *buf = tmp;
1664 #else
1665 wxStringBuffer tmp(str, size + 1);
1666 wxChar *buf = tmp;
1667 #endif
1668
1669 if ( !buf )
1670 {
1671 // out of memory
1672
1673 // in UTF-8 build, leaving uninitialized junk in the buffer
1674 // could result in invalid non-empty UTF-8 string, so just
1675 // reset the string to empty on failure:
1676 buf[0] = '\0';
1677 return -1;
1678 }
1679
1680 // wxVsnprintf() may modify the original arg pointer, so pass it
1681 // only a copy
1682 va_list argptrcopy;
1683 wxVaCopy(argptrcopy, argptr);
1684
1685 #ifndef __WXWINCE__
1686 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1687 errno = 0;
1688 #endif
1689 int len = wxVsnprintf(buf, size, format, argptrcopy);
1690 va_end(argptrcopy);
1691
1692 // some implementations of vsnprintf() don't NUL terminate
1693 // the string if there is not enough space for it so
1694 // always do it manually
1695 // FIXME: This really seems to be the wrong and would be an off-by-one
1696 // bug except the code above allocates an extra character.
1697 buf[size] = _T('\0');
1698
1699 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1700 // total number of characters which would have been written if the
1701 // buffer were large enough (newer standards such as Unix98)
1702 if ( len < 0 )
1703 {
1704 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1705 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1706 // is true if *both* of them use our own implementation,
1707 // otherwise we can't be sure
1708 #if wxUSE_WXVSNPRINTF
1709 // we know that our own implementation of wxVsnprintf() returns -1
1710 // only for a format error - thus there's something wrong with
1711 // the user's format string
1712 buf[0] = '\0';
1713 return -1;
1714 #else // possibly using system version
1715 // assume it only returns error if there is not enough space, but
1716 // as we don't know how much we need, double the current size of
1717 // the buffer
1718 #ifndef __WXWINCE__
1719 if( (errno == EILSEQ) || (errno == EINVAL) )
1720 // If errno was set to one of the two well-known hard errors
1721 // then fail immediately to avoid an infinite loop.
1722 return -1;
1723 else
1724 #endif // __WXWINCE__
1725 // still not enough, as we don't know how much we need, double the
1726 // current size of the buffer
1727 size *= 2;
1728 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1729 }
1730 else if ( len >= size )
1731 {
1732 #if wxUSE_WXVSNPRINTF
1733 // we know that our own implementation of wxVsnprintf() returns
1734 // size+1 when there's not enough space but that's not the size
1735 // of the required buffer!
1736 size *= 2; // so we just double the current size of the buffer
1737 #else
1738 // some vsnprintf() implementations NUL-terminate the buffer and
1739 // some don't in len == size case, to be safe always add 1
1740 // FIXME: I don't quite understand this comment. The vsnprintf
1741 // function is specifically defined to return the number of
1742 // characters printed not including the null terminator.
1743 // So OF COURSE you need to add 1 to get the right buffer size.
1744 // The following line is definitely correct, no question.
1745 size = len + 1;
1746 #endif
1747 }
1748 else // ok, there was enough space
1749 {
1750 break;
1751 }
1752 }
1753
1754 // we could have overshot
1755 str.Shrink();
1756
1757 return str.length();
1758 }
1759
1760 int wxString::PrintfV(const wxString& format, va_list argptr)
1761 {
1762 #if wxUSE_UNICODE_UTF8
1763 #if wxUSE_STL_BASED_WXSTRING
1764 typedef wxStringTypeBuffer<char> Utf8Buffer;
1765 #else
1766 typedef wxStringInternalBuffer Utf8Buffer;
1767 #endif
1768 #endif
1769
1770 #if wxUSE_UTF8_LOCALE_ONLY
1771 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1772 #else
1773 #if wxUSE_UNICODE_UTF8
1774 if ( wxLocaleIsUtf8 )
1775 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1776 else
1777 // wxChar* version
1778 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1779 #else
1780 return DoStringPrintfV(*this, format, argptr);
1781 #endif // UTF8/WCHAR
1782 #endif
1783 }
1784
1785 // ----------------------------------------------------------------------------
1786 // misc other operations
1787 // ----------------------------------------------------------------------------
1788
1789 // returns true if the string matches the pattern which may contain '*' and
1790 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1791 // of them)
1792 bool wxString::Matches(const wxString& mask) const
1793 {
1794 // I disable this code as it doesn't seem to be faster (in fact, it seems
1795 // to be much slower) than the old, hand-written code below and using it
1796 // here requires always linking with libregex even if the user code doesn't
1797 // use it
1798 #if 0 // wxUSE_REGEX
1799 // first translate the shell-like mask into a regex
1800 wxString pattern;
1801 pattern.reserve(wxStrlen(pszMask));
1802
1803 pattern += _T('^');
1804 while ( *pszMask )
1805 {
1806 switch ( *pszMask )
1807 {
1808 case _T('?'):
1809 pattern += _T('.');
1810 break;
1811
1812 case _T('*'):
1813 pattern += _T(".*");
1814 break;
1815
1816 case _T('^'):
1817 case _T('.'):
1818 case _T('$'):
1819 case _T('('):
1820 case _T(')'):
1821 case _T('|'):
1822 case _T('+'):
1823 case _T('\\'):
1824 // these characters are special in a RE, quote them
1825 // (however note that we don't quote '[' and ']' to allow
1826 // using them for Unix shell like matching)
1827 pattern += _T('\\');
1828 // fall through
1829
1830 default:
1831 pattern += *pszMask;
1832 }
1833
1834 pszMask++;
1835 }
1836 pattern += _T('$');
1837
1838 // and now use it
1839 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1840 #else // !wxUSE_REGEX
1841 // TODO: this is, of course, awfully inefficient...
1842
1843 // FIXME-UTF8: implement using iterators, remove #if
1844 #if wxUSE_UNICODE_UTF8
1845 wxWCharBuffer maskBuf = mask.wc_str();
1846 wxWCharBuffer txtBuf = wc_str();
1847 const wxChar *pszMask = maskBuf.data();
1848 const wxChar *pszTxt = txtBuf.data();
1849 #else
1850 const wxChar *pszMask = mask.wx_str();
1851 // the char currently being checked
1852 const wxChar *pszTxt = wx_str();
1853 #endif
1854
1855 // the last location where '*' matched
1856 const wxChar *pszLastStarInText = NULL;
1857 const wxChar *pszLastStarInMask = NULL;
1858
1859 match:
1860 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1861 switch ( *pszMask ) {
1862 case wxT('?'):
1863 if ( *pszTxt == wxT('\0') )
1864 return false;
1865
1866 // pszTxt and pszMask will be incremented in the loop statement
1867
1868 break;
1869
1870 case wxT('*'):
1871 {
1872 // remember where we started to be able to backtrack later
1873 pszLastStarInText = pszTxt;
1874 pszLastStarInMask = pszMask;
1875
1876 // ignore special chars immediately following this one
1877 // (should this be an error?)
1878 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1879 pszMask++;
1880
1881 // if there is nothing more, match
1882 if ( *pszMask == wxT('\0') )
1883 return true;
1884
1885 // are there any other metacharacters in the mask?
1886 size_t uiLenMask;
1887 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1888
1889 if ( pEndMask != NULL ) {
1890 // we have to match the string between two metachars
1891 uiLenMask = pEndMask - pszMask;
1892 }
1893 else {
1894 // we have to match the remainder of the string
1895 uiLenMask = wxStrlen(pszMask);
1896 }
1897
1898 wxString strToMatch(pszMask, uiLenMask);
1899 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1900 if ( pMatch == NULL )
1901 return false;
1902
1903 // -1 to compensate "++" in the loop
1904 pszTxt = pMatch + uiLenMask - 1;
1905 pszMask += uiLenMask - 1;
1906 }
1907 break;
1908
1909 default:
1910 if ( *pszMask != *pszTxt )
1911 return false;
1912 break;
1913 }
1914 }
1915
1916 // match only if nothing left
1917 if ( *pszTxt == wxT('\0') )
1918 return true;
1919
1920 // if we failed to match, backtrack if we can
1921 if ( pszLastStarInText ) {
1922 pszTxt = pszLastStarInText + 1;
1923 pszMask = pszLastStarInMask;
1924
1925 pszLastStarInText = NULL;
1926
1927 // don't bother resetting pszLastStarInMask, it's unnecessary
1928
1929 goto match;
1930 }
1931
1932 return false;
1933 #endif // wxUSE_REGEX/!wxUSE_REGEX
1934 }
1935
1936 // Count the number of chars
1937 int wxString::Freq(wxUniChar ch) const
1938 {
1939 int count = 0;
1940 for ( const_iterator i = begin(); i != end(); ++i )
1941 {
1942 if ( *i == ch )
1943 count ++;
1944 }
1945 return count;
1946 }
1947
1948 // convert to upper case, return the copy of the string
1949 wxString wxString::Upper() const
1950 { wxString s(*this); return s.MakeUpper(); }
1951
1952 // convert to lower case, return the copy of the string
1953 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
1954
1955 // ----------------------------------------------------------------------------
1956 // wxUTF8StringBuffer
1957 // ----------------------------------------------------------------------------
1958
1959 #if wxUSE_UNICODE_WCHAR
1960 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1961 {
1962 wxMBConvStrictUTF8 conv;
1963 size_t wlen = conv.ToWChar(NULL, 0, m_buf);
1964 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1965
1966 wxStringInternalBuffer wbuf(m_str, wlen);
1967 conv.ToWChar(wbuf, wlen, m_buf);
1968 }
1969
1970 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
1971 {
1972 wxCHECK_RET(m_lenSet, "length not set");
1973
1974 wxMBConvStrictUTF8 conv;
1975 size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
1976 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1977
1978 wxStringInternalBufferLength wbuf(m_str, wlen);
1979 conv.ToWChar(wbuf, wlen, m_buf, m_len);
1980 wbuf.SetLength(wlen);
1981 }
1982 #endif // wxUSE_UNICODE_WCHAR