]> git.saurik.com Git - wxWidgets.git/blob - src/common/string.cpp
99f0f816a0aec0de222384815eecdfbdde98ae61
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
16
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #endif
28
29 #include <ctype.h>
30
31 #ifndef __WXWINCE__
32 #include <errno.h>
33 #endif
34
35 #include <string.h>
36 #include <stdlib.h>
37
38 #include "wx/hashmap.h"
39
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
46 #else
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
51 #endif
52
53
54 // ---------------------------------------------------------------------------
55 // static class variables definition
56 // ---------------------------------------------------------------------------
57
58 //According to STL _must_ be a -1 size_t
59 const size_t wxString::npos = (size_t) -1;
60
61 #if wxUSE_UNICODE_UTF8
62 wxString::PosToImplCache wxString::ms_cache;
63 #endif // wxUSE_UNICODE_UTF8
64
65 // ----------------------------------------------------------------------------
66 // global functions
67 // ----------------------------------------------------------------------------
68
69 #if wxUSE_STD_IOSTREAM
70
71 #include <iostream>
72
73 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
74 {
75 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
76 return os << (const char *)str.AsCharBuf();
77 #else
78 return os << str.AsInternal();
79 #endif
80 }
81
82 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
83 {
84 return os << str.c_str();
85 }
86
87 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
88 {
89 return os << str.data();
90 }
91
92 #ifndef __BORLANDC__
93 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
94 {
95 return os << str.data();
96 }
97 #endif
98
99 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
100
101 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
102 {
103 return wos << str.wc_str();
104 }
105
106 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
107 {
108 return wos << str.AsWChar();
109 }
110
111 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
112 {
113 return wos << str.data();
114 }
115
116 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
117
118 #endif // wxUSE_STD_IOSTREAM
119
120 // ===========================================================================
121 // wxString class core
122 // ===========================================================================
123
124 #if wxUSE_UNICODE_UTF8
125
126 void wxString::PosLenToImpl(size_t pos, size_t len,
127 size_t *implPos, size_t *implLen) const
128 {
129 if ( pos == npos )
130 *implPos = npos;
131 else
132 {
133 const_iterator i = begin() + pos;
134 *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
135 if ( len == npos )
136 *implLen = npos;
137 else
138 {
139 // too large length is interpreted as "to the end of the string"
140 // FIXME-UTF8: verify this is the case in std::string, assert
141 // otherwise
142 if ( pos + len > length() )
143 len = length() - pos;
144
145 *implLen = (i + len).impl() - i.impl();
146 }
147 }
148 }
149
150 #endif // wxUSE_UNICODE_UTF8
151
152 // ----------------------------------------------------------------------------
153 // wxCStrData converted strings caching
154 // ----------------------------------------------------------------------------
155
156 // FIXME-UTF8: temporarily disabled because it doesn't work with global
157 // string objects; re-enable after fixing this bug and benchmarking
158 // performance to see if using a hash is a good idea at all
159 #if 0
160
161 // For backward compatibility reasons, it must be possible to assign the value
162 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
163 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
164 // because the memory would be freed immediately, but it has to be valid as long
165 // as the string is not modified, so that code like this still works:
166 //
167 // const wxChar *s = str.c_str();
168 // while ( s ) { ... }
169
170 // FIXME-UTF8: not thread safe!
171 // FIXME-UTF8: we currently clear the cached conversion only when the string is
172 // destroyed, but we should do it when the string is modified, to
173 // keep memory usage down
174 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
175 // invalidated the cache on every change, we could keep the previous
176 // conversion
177 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
178 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
179
180 template<typename T>
181 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
182 {
183 typename T::iterator i = hash.find(wxConstCast(s, wxString));
184 if ( i != hash.end() )
185 {
186 free(i->second);
187 hash.erase(i);
188 }
189 }
190
191 #if wxUSE_UNICODE
192 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
193 // so we have to use wxString* here and const-cast when used
194 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
195 wxStringCharConversionCache);
196 static wxStringCharConversionCache gs_stringsCharCache;
197
198 const char* wxCStrData::AsChar() const
199 {
200 // remove previously cache value, if any (see FIXMEs above):
201 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
202
203 // convert the string and keep it:
204 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
205 m_str->mb_str().release();
206
207 return s + m_offset;
208 }
209 #endif // wxUSE_UNICODE
210
211 #if !wxUSE_UNICODE_WCHAR
212 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
213 wxStringWCharConversionCache);
214 static wxStringWCharConversionCache gs_stringsWCharCache;
215
216 const wchar_t* wxCStrData::AsWChar() const
217 {
218 // remove previously cache value, if any (see FIXMEs above):
219 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
220
221 // convert the string and keep it:
222 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
223 m_str->wc_str().release();
224
225 return s + m_offset;
226 }
227 #endif // !wxUSE_UNICODE_WCHAR
228
229 wxString::~wxString()
230 {
231 #if wxUSE_UNICODE
232 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
233 DeleteStringFromConversionCache(gs_stringsCharCache, this);
234 #endif
235 #if !wxUSE_UNICODE_WCHAR
236 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
237 #endif
238 }
239 #endif
240
241 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
242 const char* wxCStrData::AsChar() const
243 {
244 #if wxUSE_UNICODE_UTF8
245 if ( wxLocaleIsUtf8 )
246 return AsInternal();
247 #endif
248 // under non-UTF8 locales, we have to convert the internal UTF-8
249 // representation using wxConvLibc and cache the result
250
251 wxString *str = wxConstCast(m_str, wxString);
252
253 // convert the string:
254 //
255 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
256 // have it) but it's unfortunately not obvious to implement
257 // because we don't know how big buffer do we need for the
258 // given string length (in case of multibyte encodings, e.g.
259 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
260 //
261 // One idea would be to store more than just m_convertedToChar
262 // in wxString: then we could record the length of the string
263 // which was converted the last time and try to reuse the same
264 // buffer if the current length is not greater than it (this
265 // could still fail because string could have been modified in
266 // place but it would work most of the time, so we'd do it and
267 // only allocate the new buffer if in-place conversion returned
268 // an error). We could also store a bit saying if the string
269 // was modified since the last conversion (and update it in all
270 // operation modifying the string, of course) to avoid unneeded
271 // consequential conversions. But both of these ideas require
272 // adding more fields to wxString and require profiling results
273 // to be sure that we really gain enough from them to justify
274 // doing it.
275 wxCharBuffer buf(str->mb_str());
276
277 // if it failed, return empty string and not NULL to avoid crashes in code
278 // written with either wxWidgets 2 wxString or std::string behaviour in
279 // mind: neither of them ever returns NULL and so we shouldn't neither
280 if ( !buf )
281 return "";
282
283 if ( str->m_convertedToChar &&
284 strlen(buf) == strlen(str->m_convertedToChar) )
285 {
286 // keep the same buffer for as long as possible, so that several calls
287 // to c_str() in a row still work:
288 strcpy(str->m_convertedToChar, buf);
289 }
290 else
291 {
292 str->m_convertedToChar = buf.release();
293 }
294
295 // and keep it:
296 return str->m_convertedToChar + m_offset;
297 }
298 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
299
300 #if !wxUSE_UNICODE_WCHAR
301 const wchar_t* wxCStrData::AsWChar() const
302 {
303 wxString *str = wxConstCast(m_str, wxString);
304
305 // convert the string:
306 wxWCharBuffer buf(str->wc_str());
307
308 // notice that here, unlike above in AsChar(), conversion can't fail as our
309 // internal UTF-8 is always well-formed -- or the string was corrupted and
310 // all bets are off anyhow
311
312 // FIXME-UTF8: do the conversion in-place in the existing buffer
313 if ( str->m_convertedToWChar &&
314 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
315 {
316 // keep the same buffer for as long as possible, so that several calls
317 // to c_str() in a row still work:
318 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
319 }
320 else
321 {
322 str->m_convertedToWChar = buf.release();
323 }
324
325 // and keep it:
326 return str->m_convertedToWChar + m_offset;
327 }
328 #endif // !wxUSE_UNICODE_WCHAR
329
330 // ===========================================================================
331 // wxString class core
332 // ===========================================================================
333
334 // ---------------------------------------------------------------------------
335 // construction and conversion
336 // ---------------------------------------------------------------------------
337
338 #if wxUSE_UNICODE_WCHAR
339 /* static */
340 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
341 const wxMBConv& conv)
342 {
343 // anything to do?
344 if ( !psz || nLength == 0 )
345 return SubstrBufFromMB(L"", 0);
346
347 if ( nLength == npos )
348 nLength = wxNO_LEN;
349
350 size_t wcLen;
351 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
352 if ( !wcLen )
353 return SubstrBufFromMB(_T(""), 0);
354 else
355 return SubstrBufFromMB(wcBuf, wcLen);
356 }
357 #endif // wxUSE_UNICODE_WCHAR
358
359 #if wxUSE_UNICODE_UTF8
360 /* static */
361 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
362 const wxMBConv& conv)
363 {
364 // anything to do?
365 if ( !psz || nLength == 0 )
366 return SubstrBufFromMB("", 0);
367
368 // if psz is already in UTF-8, we don't have to do the roundtrip to
369 // wchar_t* and back:
370 if ( conv.IsUTF8() )
371 {
372 // we need to validate the input because UTF8 iterators assume valid
373 // UTF-8 sequence and psz may be invalid:
374 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
375 {
376 // we must pass the real string length to SubstrBufFromMB ctor
377 if ( nLength == npos )
378 nLength = psz ? strlen(psz) : 0;
379 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
380 }
381 // else: do the roundtrip through wchar_t*
382 }
383
384 if ( nLength == npos )
385 nLength = wxNO_LEN;
386
387 // first convert to wide string:
388 size_t wcLen;
389 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
390 if ( !wcLen )
391 return SubstrBufFromMB("", 0);
392
393 // and then to UTF-8:
394 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
395 // widechar -> UTF-8 conversion isn't supposed to ever fail:
396 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
397
398 return buf;
399 }
400 #endif // wxUSE_UNICODE_UTF8
401
402 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
403 /* static */
404 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
405 const wxMBConv& conv)
406 {
407 // anything to do?
408 if ( !pwz || nLength == 0 )
409 return SubstrBufFromWC("", 0);
410
411 if ( nLength == npos )
412 nLength = wxNO_LEN;
413
414 size_t mbLen;
415 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
416 if ( !mbLen )
417 return SubstrBufFromWC("", 0);
418 else
419 return SubstrBufFromWC(mbBuf, mbLen);
420 }
421 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
422
423
424 #if wxUSE_UNICODE_WCHAR
425
426 //Convert wxString in Unicode mode to a multi-byte string
427 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
428 {
429 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
430 }
431
432 #elif wxUSE_UNICODE_UTF8
433
434 const wxWCharBuffer wxString::wc_str() const
435 {
436 return wxMBConvStrictUTF8().cMB2WC
437 (
438 m_impl.c_str(),
439 m_impl.length() + 1, // size, not length
440 NULL
441 );
442 }
443
444 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
445 {
446 if ( conv.IsUTF8() )
447 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
448
449 // FIXME-UTF8: use wc_str() here once we have buffers with length
450
451 size_t wcLen;
452 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
453 (
454 m_impl.c_str(),
455 m_impl.length() + 1, // size
456 &wcLen
457 ));
458 if ( !wcLen )
459 return wxCharBuffer("");
460
461 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
462 }
463
464 #else // ANSI
465
466 //Converts this string to a wide character string if unicode
467 //mode is not enabled and wxUSE_WCHAR_T is enabled
468 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
469 {
470 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
471 }
472
473 #endif // Unicode/ANSI
474
475 // shrink to minimal size (releasing extra memory)
476 bool wxString::Shrink()
477 {
478 wxString tmp(begin(), end());
479 swap(tmp);
480 return tmp.length() == length();
481 }
482
483 // deprecated compatibility code:
484 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
485 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
486 {
487 return DoGetWriteBuf(nLen);
488 }
489
490 void wxString::UngetWriteBuf()
491 {
492 DoUngetWriteBuf();
493 }
494
495 void wxString::UngetWriteBuf(size_t nLen)
496 {
497 DoUngetWriteBuf(nLen);
498 }
499 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
500
501
502 // ---------------------------------------------------------------------------
503 // data access
504 // ---------------------------------------------------------------------------
505
506 // all functions are inline in string.h
507
508 // ---------------------------------------------------------------------------
509 // concatenation operators
510 // ---------------------------------------------------------------------------
511
512 /*
513 * concatenation functions come in 5 flavours:
514 * string + string
515 * char + string and string + char
516 * C str + string and string + C str
517 */
518
519 wxString operator+(const wxString& str1, const wxString& str2)
520 {
521 #if !wxUSE_STL_BASED_WXSTRING
522 wxASSERT( str1.IsValid() );
523 wxASSERT( str2.IsValid() );
524 #endif
525
526 wxString s = str1;
527 s += str2;
528
529 return s;
530 }
531
532 wxString operator+(const wxString& str, wxUniChar ch)
533 {
534 #if !wxUSE_STL_BASED_WXSTRING
535 wxASSERT( str.IsValid() );
536 #endif
537
538 wxString s = str;
539 s += ch;
540
541 return s;
542 }
543
544 wxString operator+(wxUniChar ch, const wxString& str)
545 {
546 #if !wxUSE_STL_BASED_WXSTRING
547 wxASSERT( str.IsValid() );
548 #endif
549
550 wxString s = ch;
551 s += str;
552
553 return s;
554 }
555
556 wxString operator+(const wxString& str, const char *psz)
557 {
558 #if !wxUSE_STL_BASED_WXSTRING
559 wxASSERT( str.IsValid() );
560 #endif
561
562 wxString s;
563 if ( !s.Alloc(strlen(psz) + str.length()) ) {
564 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
565 }
566 s += str;
567 s += psz;
568
569 return s;
570 }
571
572 wxString operator+(const wxString& str, const wchar_t *pwz)
573 {
574 #if !wxUSE_STL_BASED_WXSTRING
575 wxASSERT( str.IsValid() );
576 #endif
577
578 wxString s;
579 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
580 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
581 }
582 s += str;
583 s += pwz;
584
585 return s;
586 }
587
588 wxString operator+(const char *psz, const wxString& str)
589 {
590 #if !wxUSE_STL_BASED_WXSTRING
591 wxASSERT( str.IsValid() );
592 #endif
593
594 wxString s;
595 if ( !s.Alloc(strlen(psz) + str.length()) ) {
596 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
597 }
598 s = psz;
599 s += str;
600
601 return s;
602 }
603
604 wxString operator+(const wchar_t *pwz, const wxString& str)
605 {
606 #if !wxUSE_STL_BASED_WXSTRING
607 wxASSERT( str.IsValid() );
608 #endif
609
610 wxString s;
611 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
612 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
613 }
614 s = pwz;
615 s += str;
616
617 return s;
618 }
619
620 // ---------------------------------------------------------------------------
621 // string comparison
622 // ---------------------------------------------------------------------------
623
624 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
625 {
626 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
627 : wxToupper(GetChar(0u)) == wxToupper(c));
628 }
629
630 #ifdef HAVE_STD_STRING_COMPARE
631
632 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
633 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
634 // sort strings in characters code point order by sorting the byte sequence
635 // in byte values order (i.e. what strcmp() and memcmp() do).
636
637 int wxString::compare(const wxString& str) const
638 {
639 return m_impl.compare(str.m_impl);
640 }
641
642 int wxString::compare(size_t nStart, size_t nLen,
643 const wxString& str) const
644 {
645 size_t pos, len;
646 PosLenToImpl(nStart, nLen, &pos, &len);
647 return m_impl.compare(pos, len, str.m_impl);
648 }
649
650 int wxString::compare(size_t nStart, size_t nLen,
651 const wxString& str,
652 size_t nStart2, size_t nLen2) const
653 {
654 size_t pos, len;
655 PosLenToImpl(nStart, nLen, &pos, &len);
656
657 size_t pos2, len2;
658 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
659
660 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
661 }
662
663 int wxString::compare(const char* sz) const
664 {
665 return m_impl.compare(ImplStr(sz));
666 }
667
668 int wxString::compare(const wchar_t* sz) const
669 {
670 return m_impl.compare(ImplStr(sz));
671 }
672
673 int wxString::compare(size_t nStart, size_t nLen,
674 const char* sz, size_t nCount) const
675 {
676 size_t pos, len;
677 PosLenToImpl(nStart, nLen, &pos, &len);
678
679 SubstrBufFromMB str(ImplStr(sz, nCount));
680
681 return m_impl.compare(pos, len, str.data, str.len);
682 }
683
684 int wxString::compare(size_t nStart, size_t nLen,
685 const wchar_t* sz, size_t nCount) const
686 {
687 size_t pos, len;
688 PosLenToImpl(nStart, nLen, &pos, &len);
689
690 SubstrBufFromWC str(ImplStr(sz, nCount));
691
692 return m_impl.compare(pos, len, str.data, str.len);
693 }
694
695 #else // !HAVE_STD_STRING_COMPARE
696
697 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
698 const wxStringCharType* s2, size_t l2)
699 {
700 if( l1 == l2 )
701 return wxStringMemcmp(s1, s2, l1);
702 else if( l1 < l2 )
703 {
704 int ret = wxStringMemcmp(s1, s2, l1);
705 return ret == 0 ? -1 : ret;
706 }
707 else
708 {
709 int ret = wxStringMemcmp(s1, s2, l2);
710 return ret == 0 ? +1 : ret;
711 }
712 }
713
714 int wxString::compare(const wxString& str) const
715 {
716 return ::wxDoCmp(m_impl.data(), m_impl.length(),
717 str.m_impl.data(), str.m_impl.length());
718 }
719
720 int wxString::compare(size_t nStart, size_t nLen,
721 const wxString& str) const
722 {
723 wxASSERT(nStart <= length());
724 size_type strLen = length() - nStart;
725 nLen = strLen < nLen ? strLen : nLen;
726
727 size_t pos, len;
728 PosLenToImpl(nStart, nLen, &pos, &len);
729
730 return ::wxDoCmp(m_impl.data() + pos, len,
731 str.m_impl.data(), str.m_impl.length());
732 }
733
734 int wxString::compare(size_t nStart, size_t nLen,
735 const wxString& str,
736 size_t nStart2, size_t nLen2) const
737 {
738 wxASSERT(nStart <= length());
739 wxASSERT(nStart2 <= str.length());
740 size_type strLen = length() - nStart,
741 strLen2 = str.length() - nStart2;
742 nLen = strLen < nLen ? strLen : nLen;
743 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
744
745 size_t pos, len;
746 PosLenToImpl(nStart, nLen, &pos, &len);
747 size_t pos2, len2;
748 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
749
750 return ::wxDoCmp(m_impl.data() + pos, len,
751 str.m_impl.data() + pos2, len2);
752 }
753
754 int wxString::compare(const char* sz) const
755 {
756 SubstrBufFromMB str(ImplStr(sz, npos));
757 if ( str.len == npos )
758 str.len = wxStringStrlen(str.data);
759 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
760 }
761
762 int wxString::compare(const wchar_t* sz) const
763 {
764 SubstrBufFromWC str(ImplStr(sz, npos));
765 if ( str.len == npos )
766 str.len = wxStringStrlen(str.data);
767 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
768 }
769
770 int wxString::compare(size_t nStart, size_t nLen,
771 const char* sz, size_t nCount) const
772 {
773 wxASSERT(nStart <= length());
774 size_type strLen = length() - nStart;
775 nLen = strLen < nLen ? strLen : nLen;
776
777 size_t pos, len;
778 PosLenToImpl(nStart, nLen, &pos, &len);
779
780 SubstrBufFromMB str(ImplStr(sz, nCount));
781 if ( str.len == npos )
782 str.len = wxStringStrlen(str.data);
783
784 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
785 }
786
787 int wxString::compare(size_t nStart, size_t nLen,
788 const wchar_t* sz, size_t nCount) const
789 {
790 wxASSERT(nStart <= length());
791 size_type strLen = length() - nStart;
792 nLen = strLen < nLen ? strLen : nLen;
793
794 size_t pos, len;
795 PosLenToImpl(nStart, nLen, &pos, &len);
796
797 SubstrBufFromWC str(ImplStr(sz, nCount));
798 if ( str.len == npos )
799 str.len = wxStringStrlen(str.data);
800
801 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
802 }
803
804 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
805
806
807 // ---------------------------------------------------------------------------
808 // find_{first,last}_[not]_of functions
809 // ---------------------------------------------------------------------------
810
811 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
812
813 // NB: All these functions are implemented with the argument being wxChar*,
814 // i.e. widechar string in any Unicode build, even though native string
815 // representation is char* in the UTF-8 build. This is because we couldn't
816 // use memchr() to determine if a character is in a set encoded as UTF-8.
817
818 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
819 {
820 return find_first_of(sz, nStart, wxStrlen(sz));
821 }
822
823 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
824 {
825 return find_first_not_of(sz, nStart, wxStrlen(sz));
826 }
827
828 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
829 {
830 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
831
832 size_t idx = nStart;
833 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
834 {
835 if ( wxTmemchr(sz, *i, n) )
836 return idx;
837 }
838
839 return npos;
840 }
841
842 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
843 {
844 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
845
846 size_t idx = nStart;
847 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
848 {
849 if ( !wxTmemchr(sz, *i, n) )
850 return idx;
851 }
852
853 return npos;
854 }
855
856
857 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
858 {
859 return find_last_of(sz, nStart, wxStrlen(sz));
860 }
861
862 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
863 {
864 return find_last_not_of(sz, nStart, wxStrlen(sz));
865 }
866
867 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
868 {
869 size_t len = length();
870
871 if ( nStart == npos )
872 {
873 nStart = len - 1;
874 }
875 else
876 {
877 wxASSERT_MSG( nStart <= len, _T("invalid index") );
878 }
879
880 size_t idx = nStart;
881 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
882 i != rend(); --idx, ++i )
883 {
884 if ( wxTmemchr(sz, *i, n) )
885 return idx;
886 }
887
888 return npos;
889 }
890
891 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
892 {
893 size_t len = length();
894
895 if ( nStart == npos )
896 {
897 nStart = len - 1;
898 }
899 else
900 {
901 wxASSERT_MSG( nStart <= len, _T("invalid index") );
902 }
903
904 size_t idx = nStart;
905 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
906 i != rend(); --idx, ++i )
907 {
908 if ( !wxTmemchr(sz, *i, n) )
909 return idx;
910 }
911
912 return npos;
913 }
914
915 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
916 {
917 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
918
919 size_t idx = nStart;
920 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
921 {
922 if ( *i != ch )
923 return idx;
924 }
925
926 return npos;
927 }
928
929 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
930 {
931 size_t len = length();
932
933 if ( nStart == npos )
934 {
935 nStart = len - 1;
936 }
937 else
938 {
939 wxASSERT_MSG( nStart <= len, _T("invalid index") );
940 }
941
942 size_t idx = nStart;
943 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
944 i != rend(); --idx, ++i )
945 {
946 if ( *i != ch )
947 return idx;
948 }
949
950 return npos;
951 }
952
953 // the functions above were implemented for wchar_t* arguments in Unicode
954 // build and char* in ANSI build; below are implementations for the other
955 // version:
956 #if wxUSE_UNICODE
957 #define wxOtherCharType char
958 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
959 #else
960 #define wxOtherCharType wchar_t
961 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
962 #endif
963
964 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
965 { return find_first_of(STRCONV(sz), nStart); }
966
967 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
968 size_t n) const
969 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
970 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
971 { return find_last_of(STRCONV(sz), nStart); }
972 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
973 size_t n) const
974 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
975 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
976 { return find_first_not_of(STRCONV(sz), nStart); }
977 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
978 size_t n) const
979 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
980 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
981 { return find_last_not_of(STRCONV(sz), nStart); }
982 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
983 size_t n) const
984 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
985
986 #undef wxOtherCharType
987 #undef STRCONV
988
989 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
990
991 // ===========================================================================
992 // other common string functions
993 // ===========================================================================
994
995 int wxString::CmpNoCase(const wxString& s) const
996 {
997 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
998
999 const_iterator i1 = begin();
1000 const_iterator end1 = end();
1001 const_iterator i2 = s.begin();
1002 const_iterator end2 = s.end();
1003
1004 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1005 {
1006 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1007 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1008 if ( lower1 != lower2 )
1009 return lower1 < lower2 ? -1 : 1;
1010 }
1011
1012 size_t len1 = length();
1013 size_t len2 = s.length();
1014
1015 if ( len1 < len2 )
1016 return -1;
1017 else if ( len1 > len2 )
1018 return 1;
1019 return 0;
1020 }
1021
1022
1023 #if wxUSE_UNICODE
1024
1025 #ifdef __MWERKS__
1026 #ifndef __SCHAR_MAX__
1027 #define __SCHAR_MAX__ 127
1028 #endif
1029 #endif
1030
1031 wxString wxString::FromAscii(const char *ascii, size_t len)
1032 {
1033 if (!ascii || len == 0)
1034 return wxEmptyString;
1035
1036 wxString res;
1037
1038 {
1039 wxStringInternalBuffer buf(res, len);
1040 wxStringCharType *dest = buf;
1041
1042 for ( ; len > 0; --len )
1043 {
1044 unsigned char c = (unsigned char)*ascii++;
1045 wxASSERT_MSG( c < 0x80,
1046 _T("Non-ASCII value passed to FromAscii().") );
1047
1048 *dest++ = (wchar_t)c;
1049 }
1050 }
1051
1052 return res;
1053 }
1054
1055 wxString wxString::FromAscii(const char *ascii)
1056 {
1057 return FromAscii(ascii, wxStrlen(ascii));
1058 }
1059
1060 wxString wxString::FromAscii(char ascii)
1061 {
1062 // What do we do with '\0' ?
1063
1064 unsigned char c = (unsigned char)ascii;
1065
1066 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1067
1068 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1069 return wxString(wxUniChar((wchar_t)c));
1070 }
1071
1072 const wxCharBuffer wxString::ToAscii() const
1073 {
1074 // this will allocate enough space for the terminating NUL too
1075 wxCharBuffer buffer(length());
1076 char *dest = buffer.data();
1077
1078 for ( const_iterator i = begin(); i != end(); ++i )
1079 {
1080 wxUniChar c(*i);
1081 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1082 *dest++ = c.IsAscii() ? (char)c : '_';
1083
1084 // the output string can't have embedded NULs anyhow, so we can safely
1085 // stop at first of them even if we do have any
1086 if ( !c )
1087 break;
1088 }
1089
1090 return buffer;
1091 }
1092
1093 #endif // wxUSE_UNICODE
1094
1095 // extract string of length nCount starting at nFirst
1096 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1097 {
1098 size_t nLen = length();
1099
1100 // default value of nCount is npos and means "till the end"
1101 if ( nCount == npos )
1102 {
1103 nCount = nLen - nFirst;
1104 }
1105
1106 // out-of-bounds requests return sensible things
1107 if ( nFirst + nCount > nLen )
1108 {
1109 nCount = nLen - nFirst;
1110 }
1111
1112 if ( nFirst > nLen )
1113 {
1114 // AllocCopy() will return empty string
1115 return wxEmptyString;
1116 }
1117
1118 wxString dest(*this, nFirst, nCount);
1119 if ( dest.length() != nCount )
1120 {
1121 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1122 }
1123
1124 return dest;
1125 }
1126
1127 // check that the string starts with prefix and return the rest of the string
1128 // in the provided pointer if it is not NULL, otherwise return false
1129 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1130 {
1131 if ( compare(0, prefix.length(), prefix) != 0 )
1132 return false;
1133
1134 if ( rest )
1135 {
1136 // put the rest of the string into provided pointer
1137 rest->assign(*this, prefix.length(), npos);
1138 }
1139
1140 return true;
1141 }
1142
1143
1144 // check that the string ends with suffix and return the rest of it in the
1145 // provided pointer if it is not NULL, otherwise return false
1146 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1147 {
1148 int start = length() - suffix.length();
1149
1150 if ( start < 0 || compare(start, npos, suffix) != 0 )
1151 return false;
1152
1153 if ( rest )
1154 {
1155 // put the rest of the string into provided pointer
1156 rest->assign(*this, 0, start);
1157 }
1158
1159 return true;
1160 }
1161
1162
1163 // extract nCount last (rightmost) characters
1164 wxString wxString::Right(size_t nCount) const
1165 {
1166 if ( nCount > length() )
1167 nCount = length();
1168
1169 wxString dest(*this, length() - nCount, nCount);
1170 if ( dest.length() != nCount ) {
1171 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1172 }
1173 return dest;
1174 }
1175
1176 // get all characters after the last occurence of ch
1177 // (returns the whole string if ch not found)
1178 wxString wxString::AfterLast(wxUniChar ch) const
1179 {
1180 wxString str;
1181 int iPos = Find(ch, true);
1182 if ( iPos == wxNOT_FOUND )
1183 str = *this;
1184 else
1185 str = wx_str() + iPos + 1;
1186
1187 return str;
1188 }
1189
1190 // extract nCount first (leftmost) characters
1191 wxString wxString::Left(size_t nCount) const
1192 {
1193 if ( nCount > length() )
1194 nCount = length();
1195
1196 wxString dest(*this, 0, nCount);
1197 if ( dest.length() != nCount ) {
1198 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1199 }
1200 return dest;
1201 }
1202
1203 // get all characters before the first occurence of ch
1204 // (returns the whole string if ch not found)
1205 wxString wxString::BeforeFirst(wxUniChar ch) const
1206 {
1207 int iPos = Find(ch);
1208 if ( iPos == wxNOT_FOUND ) iPos = length();
1209 return wxString(*this, 0, iPos);
1210 }
1211
1212 /// get all characters before the last occurence of ch
1213 /// (returns empty string if ch not found)
1214 wxString wxString::BeforeLast(wxUniChar ch) const
1215 {
1216 wxString str;
1217 int iPos = Find(ch, true);
1218 if ( iPos != wxNOT_FOUND && iPos != 0 )
1219 str = wxString(c_str(), iPos);
1220
1221 return str;
1222 }
1223
1224 /// get all characters after the first occurence of ch
1225 /// (returns empty string if ch not found)
1226 wxString wxString::AfterFirst(wxUniChar ch) const
1227 {
1228 wxString str;
1229 int iPos = Find(ch);
1230 if ( iPos != wxNOT_FOUND )
1231 str = wx_str() + iPos + 1;
1232
1233 return str;
1234 }
1235
1236 // replace first (or all) occurences of some substring with another one
1237 size_t wxString::Replace(const wxString& strOld,
1238 const wxString& strNew, bool bReplaceAll)
1239 {
1240 // if we tried to replace an empty string we'd enter an infinite loop below
1241 wxCHECK_MSG( !strOld.empty(), 0,
1242 _T("wxString::Replace(): invalid parameter") );
1243
1244 wxSTRING_INVALIDATE_INDEX_CACHE();
1245
1246 size_t uiCount = 0; // count of replacements made
1247
1248 // optimize the special common case: replacement of one character by
1249 // another one (in UTF-8 case we can only do this for ASCII characters)
1250 //
1251 // benchmarks show that this special version is around 3 times faster
1252 // (depending on the proportion of matching characters and UTF-8/wchar_t
1253 // build)
1254 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1255 {
1256 const wxStringCharType chOld = strOld.m_impl[0],
1257 chNew = strNew.m_impl[0];
1258
1259 // this loop is the simplified version of the one below
1260 for ( size_t pos = 0; ; )
1261 {
1262 pos = m_impl.find(chOld, pos);
1263 if ( pos == npos )
1264 break;
1265
1266 m_impl[pos++] = chNew;
1267
1268 uiCount++;
1269
1270 if ( !bReplaceAll )
1271 break;
1272 }
1273 }
1274 else // general case
1275 {
1276 const size_t uiOldLen = strOld.m_impl.length();
1277 const size_t uiNewLen = strNew.m_impl.length();
1278
1279 for ( size_t pos = 0; ; )
1280 {
1281 pos = m_impl.find(strOld.m_impl, pos);
1282 if ( pos == npos )
1283 break;
1284
1285 // replace this occurrence of the old string with the new one
1286 m_impl.replace(pos, uiOldLen, strNew.m_impl);
1287
1288 // move up pos past the string that was replaced
1289 pos += uiNewLen;
1290
1291 // increase replace count
1292 uiCount++;
1293
1294 // stop after the first one?
1295 if ( !bReplaceAll )
1296 break;
1297 }
1298 }
1299
1300 return uiCount;
1301 }
1302
1303 bool wxString::IsAscii() const
1304 {
1305 for ( const_iterator i = begin(); i != end(); ++i )
1306 {
1307 if ( !(*i).IsAscii() )
1308 return false;
1309 }
1310
1311 return true;
1312 }
1313
1314 bool wxString::IsWord() const
1315 {
1316 for ( const_iterator i = begin(); i != end(); ++i )
1317 {
1318 if ( !wxIsalpha(*i) )
1319 return false;
1320 }
1321
1322 return true;
1323 }
1324
1325 bool wxString::IsNumber() const
1326 {
1327 if ( empty() )
1328 return true;
1329
1330 const_iterator i = begin();
1331
1332 if ( *i == _T('-') || *i == _T('+') )
1333 ++i;
1334
1335 for ( ; i != end(); ++i )
1336 {
1337 if ( !wxIsdigit(*i) )
1338 return false;
1339 }
1340
1341 return true;
1342 }
1343
1344 wxString wxString::Strip(stripType w) const
1345 {
1346 wxString s = *this;
1347 if ( w & leading ) s.Trim(false);
1348 if ( w & trailing ) s.Trim(true);
1349 return s;
1350 }
1351
1352 // ---------------------------------------------------------------------------
1353 // case conversion
1354 // ---------------------------------------------------------------------------
1355
1356 wxString& wxString::MakeUpper()
1357 {
1358 for ( iterator it = begin(), en = end(); it != en; ++it )
1359 *it = (wxChar)wxToupper(*it);
1360
1361 return *this;
1362 }
1363
1364 wxString& wxString::MakeLower()
1365 {
1366 for ( iterator it = begin(), en = end(); it != en; ++it )
1367 *it = (wxChar)wxTolower(*it);
1368
1369 return *this;
1370 }
1371
1372 wxString& wxString::MakeCapitalized()
1373 {
1374 const iterator en = end();
1375 iterator it = begin();
1376 if ( it != en )
1377 {
1378 *it = (wxChar)wxToupper(*it);
1379 for ( ++it; it != en; ++it )
1380 *it = (wxChar)wxTolower(*it);
1381 }
1382
1383 return *this;
1384 }
1385
1386 // ---------------------------------------------------------------------------
1387 // trimming and padding
1388 // ---------------------------------------------------------------------------
1389
1390 // some compilers (VC++ 6.0 not to name them) return true for a call to
1391 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1392 // to live with this by checking that the character is a 7 bit one - even if
1393 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1394 // space-like symbols somewhere except in the first 128 chars), it is arguably
1395 // still better than trimming away accented letters
1396 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1397
1398 // trims spaces (in the sense of isspace) from left or right side
1399 wxString& wxString::Trim(bool bFromRight)
1400 {
1401 // first check if we're going to modify the string at all
1402 if ( !empty() &&
1403 (
1404 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1405 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1406 )
1407 )
1408 {
1409 if ( bFromRight )
1410 {
1411 // find last non-space character
1412 reverse_iterator psz = rbegin();
1413 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1414 ++psz;
1415
1416 // truncate at trailing space start
1417 erase(psz.base(), end());
1418 }
1419 else
1420 {
1421 // find first non-space character
1422 iterator psz = begin();
1423 while ( (psz != end()) && wxSafeIsspace(*psz) )
1424 ++psz;
1425
1426 // fix up data and length
1427 erase(begin(), psz);
1428 }
1429 }
1430
1431 return *this;
1432 }
1433
1434 // adds nCount characters chPad to the string from either side
1435 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1436 {
1437 wxString s(chPad, nCount);
1438
1439 if ( bFromRight )
1440 *this += s;
1441 else
1442 {
1443 s += *this;
1444 swap(s);
1445 }
1446
1447 return *this;
1448 }
1449
1450 // truncate the string
1451 wxString& wxString::Truncate(size_t uiLen)
1452 {
1453 if ( uiLen < length() )
1454 {
1455 erase(begin() + uiLen, end());
1456 }
1457 //else: nothing to do, string is already short enough
1458
1459 return *this;
1460 }
1461
1462 // ---------------------------------------------------------------------------
1463 // finding (return wxNOT_FOUND if not found and index otherwise)
1464 // ---------------------------------------------------------------------------
1465
1466 // find a character
1467 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1468 {
1469 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1470
1471 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1472 }
1473
1474 // ----------------------------------------------------------------------------
1475 // conversion to numbers
1476 // ----------------------------------------------------------------------------
1477
1478 // The implementation of all the functions below is exactly the same so factor
1479 // it out. Note that number extraction works correctly on UTF-8 strings, so
1480 // we can use wxStringCharType and wx_str() for maximum efficiency.
1481
1482 #ifndef __WXWINCE__
1483 #define DO_IF_NOT_WINCE(x) x
1484 #else
1485 #define DO_IF_NOT_WINCE(x)
1486 #endif
1487
1488 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1489 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1490 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1491 \
1492 DO_IF_NOT_WINCE( errno = 0; ) \
1493 \
1494 const wxStringCharType *start = wx_str(); \
1495 wxStringCharType *end; \
1496 T val = func(start, &end, base); \
1497 \
1498 /* return true only if scan was stopped by the terminating NUL and */ \
1499 /* if the string was not empty to start with and no under/overflow */ \
1500 /* occurred: */ \
1501 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1502 return false; \
1503 *out = val; \
1504 return true
1505
1506 bool wxString::ToLong(long *pVal, int base) const
1507 {
1508 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
1509 }
1510
1511 bool wxString::ToULong(unsigned long *pVal, int base) const
1512 {
1513 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
1514 }
1515
1516 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1517 {
1518 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
1519 }
1520
1521 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1522 {
1523 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
1524 }
1525
1526 bool wxString::ToDouble(double *pVal) const
1527 {
1528 wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
1529
1530 DO_IF_NOT_WINCE( errno = 0; )
1531
1532 const wxChar *start = c_str();
1533 wxChar *end;
1534 double val = wxStrtod(start, &end);
1535
1536 // return true only if scan was stopped by the terminating NUL and if the
1537 // string was not empty to start with and no under/overflow occurred
1538 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1539 return false;
1540
1541 *pVal = val;
1542
1543 return true;
1544 }
1545
1546 // ---------------------------------------------------------------------------
1547 // formatted output
1548 // ---------------------------------------------------------------------------
1549
1550 #if !wxUSE_UTF8_LOCALE_ONLY
1551 /* static */
1552 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1553 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1554 #else
1555 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1556 #endif
1557 {
1558 va_list argptr;
1559 va_start(argptr, format);
1560
1561 wxString s;
1562 s.PrintfV(format, argptr);
1563
1564 va_end(argptr);
1565
1566 return s;
1567 }
1568 #endif // !wxUSE_UTF8_LOCALE_ONLY
1569
1570 #if wxUSE_UNICODE_UTF8
1571 /* static */
1572 wxString wxString::DoFormatUtf8(const char *format, ...)
1573 {
1574 va_list argptr;
1575 va_start(argptr, format);
1576
1577 wxString s;
1578 s.PrintfV(format, argptr);
1579
1580 va_end(argptr);
1581
1582 return s;
1583 }
1584 #endif // wxUSE_UNICODE_UTF8
1585
1586 /* static */
1587 wxString wxString::FormatV(const wxString& format, va_list argptr)
1588 {
1589 wxString s;
1590 s.PrintfV(format, argptr);
1591 return s;
1592 }
1593
1594 #if !wxUSE_UTF8_LOCALE_ONLY
1595 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1596 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1597 #else
1598 int wxString::DoPrintfWchar(const wxChar *format, ...)
1599 #endif
1600 {
1601 va_list argptr;
1602 va_start(argptr, format);
1603
1604 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1605 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1606 // because it's the only cast that works safely for downcasting when
1607 // multiple inheritance is used:
1608 wxString *str = static_cast<wxString*>(this);
1609 #else
1610 wxString *str = this;
1611 #endif
1612
1613 int iLen = str->PrintfV(format, argptr);
1614
1615 va_end(argptr);
1616
1617 return iLen;
1618 }
1619 #endif // !wxUSE_UTF8_LOCALE_ONLY
1620
1621 #if wxUSE_UNICODE_UTF8
1622 int wxString::DoPrintfUtf8(const char *format, ...)
1623 {
1624 va_list argptr;
1625 va_start(argptr, format);
1626
1627 int iLen = PrintfV(format, argptr);
1628
1629 va_end(argptr);
1630
1631 return iLen;
1632 }
1633 #endif // wxUSE_UNICODE_UTF8
1634
1635 /*
1636 Uses wxVsnprintf and places the result into the this string.
1637
1638 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1639 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1640 the ISO C99 (and thus SUSv3) standard the return value for the case of
1641 an undersized buffer is inconsistent. For conforming vsnprintf
1642 implementations the function must return the number of characters that
1643 would have been printed had the buffer been large enough. For conforming
1644 vswprintf implementations the function must return a negative number
1645 and set errno.
1646
1647 What vswprintf sets errno to is undefined but Darwin seems to set it to
1648 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1649 those are defined in the standard and backed up by several conformance
1650 statements. Note that ENOMEM mentioned in the manual page does not
1651 apply to swprintf, only wprintf and fwprintf.
1652
1653 Official manual page:
1654 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1655
1656 Some conformance statements (AIX, Solaris):
1657 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1658 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1659
1660 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1661 EILSEQ and EINVAL are specifically defined to mean the error is other than
1662 an undersized buffer and no other errno are defined we treat those two
1663 as meaning hard errors and everything else gets the old behavior which
1664 is to keep looping and increasing buffer size until the function succeeds.
1665
1666 In practice it's impossible to determine before compilation which behavior
1667 may be used. The vswprintf function may have vsnprintf-like behavior or
1668 vice-versa. Behavior detected on one release can theoretically change
1669 with an updated release. Not to mention that configure testing for it
1670 would require the test to be run on the host system, not the build system
1671 which makes cross compilation difficult. Therefore, we make no assumptions
1672 about behavior and try our best to handle every known case, including the
1673 case where wxVsnprintf returns a negative number and fails to set errno.
1674
1675 There is yet one more non-standard implementation and that is our own.
1676 Fortunately, that can be detected at compile-time.
1677
1678 On top of all that, ISO C99 explicitly defines snprintf to write a null
1679 character to the last position of the specified buffer. That would be at
1680 at the given buffer size minus 1. It is supposed to do this even if it
1681 turns out that the buffer is sized too small.
1682
1683 Darwin (tested on 10.5) follows the C99 behavior exactly.
1684
1685 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1686 errno even when it fails. However, it only seems to ever fail due
1687 to an undersized buffer.
1688 */
1689 #if wxUSE_UNICODE_UTF8
1690 template<typename BufferType>
1691 #else
1692 // we only need one version in non-UTF8 builds and at least two Windows
1693 // compilers have problems with this function template, so use just one
1694 // normal function here
1695 #endif
1696 static int DoStringPrintfV(wxString& str,
1697 const wxString& format, va_list argptr)
1698 {
1699 int size = 1024;
1700
1701 for ( ;; )
1702 {
1703 #if wxUSE_UNICODE_UTF8
1704 BufferType tmp(str, size + 1);
1705 typename BufferType::CharType *buf = tmp;
1706 #else
1707 wxStringBuffer tmp(str, size + 1);
1708 wxChar *buf = tmp;
1709 #endif
1710
1711 if ( !buf )
1712 {
1713 // out of memory
1714
1715 // in UTF-8 build, leaving uninitialized junk in the buffer
1716 // could result in invalid non-empty UTF-8 string, so just
1717 // reset the string to empty on failure:
1718 buf[0] = '\0';
1719 return -1;
1720 }
1721
1722 // wxVsnprintf() may modify the original arg pointer, so pass it
1723 // only a copy
1724 va_list argptrcopy;
1725 wxVaCopy(argptrcopy, argptr);
1726
1727 #ifndef __WXWINCE__
1728 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1729 errno = 0;
1730 #endif
1731 int len = wxVsnprintf(buf, size, format, argptrcopy);
1732 va_end(argptrcopy);
1733
1734 // some implementations of vsnprintf() don't NUL terminate
1735 // the string if there is not enough space for it so
1736 // always do it manually
1737 // FIXME: This really seems to be the wrong and would be an off-by-one
1738 // bug except the code above allocates an extra character.
1739 buf[size] = _T('\0');
1740
1741 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1742 // total number of characters which would have been written if the
1743 // buffer were large enough (newer standards such as Unix98)
1744 if ( len < 0 )
1745 {
1746 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1747 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1748 // is true if *both* of them use our own implementation,
1749 // otherwise we can't be sure
1750 #if wxUSE_WXVSNPRINTF
1751 // we know that our own implementation of wxVsnprintf() returns -1
1752 // only for a format error - thus there's something wrong with
1753 // the user's format string
1754 buf[0] = '\0';
1755 return -1;
1756 #else // possibly using system version
1757 // assume it only returns error if there is not enough space, but
1758 // as we don't know how much we need, double the current size of
1759 // the buffer
1760 #ifndef __WXWINCE__
1761 if( (errno == EILSEQ) || (errno == EINVAL) )
1762 // If errno was set to one of the two well-known hard errors
1763 // then fail immediately to avoid an infinite loop.
1764 return -1;
1765 else
1766 #endif // __WXWINCE__
1767 // still not enough, as we don't know how much we need, double the
1768 // current size of the buffer
1769 size *= 2;
1770 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1771 }
1772 else if ( len >= size )
1773 {
1774 #if wxUSE_WXVSNPRINTF
1775 // we know that our own implementation of wxVsnprintf() returns
1776 // size+1 when there's not enough space but that's not the size
1777 // of the required buffer!
1778 size *= 2; // so we just double the current size of the buffer
1779 #else
1780 // some vsnprintf() implementations NUL-terminate the buffer and
1781 // some don't in len == size case, to be safe always add 1
1782 // FIXME: I don't quite understand this comment. The vsnprintf
1783 // function is specifically defined to return the number of
1784 // characters printed not including the null terminator.
1785 // So OF COURSE you need to add 1 to get the right buffer size.
1786 // The following line is definitely correct, no question.
1787 size = len + 1;
1788 #endif
1789 }
1790 else // ok, there was enough space
1791 {
1792 break;
1793 }
1794 }
1795
1796 // we could have overshot
1797 str.Shrink();
1798
1799 return str.length();
1800 }
1801
1802 int wxString::PrintfV(const wxString& format, va_list argptr)
1803 {
1804 #if wxUSE_UNICODE_UTF8
1805 #if wxUSE_STL_BASED_WXSTRING
1806 typedef wxStringTypeBuffer<char> Utf8Buffer;
1807 #else
1808 typedef wxStringInternalBuffer Utf8Buffer;
1809 #endif
1810 #endif
1811
1812 #if wxUSE_UTF8_LOCALE_ONLY
1813 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1814 #else
1815 #if wxUSE_UNICODE_UTF8
1816 if ( wxLocaleIsUtf8 )
1817 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1818 else
1819 // wxChar* version
1820 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1821 #else
1822 return DoStringPrintfV(*this, format, argptr);
1823 #endif // UTF8/WCHAR
1824 #endif
1825 }
1826
1827 // ----------------------------------------------------------------------------
1828 // misc other operations
1829 // ----------------------------------------------------------------------------
1830
1831 // returns true if the string matches the pattern which may contain '*' and
1832 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1833 // of them)
1834 bool wxString::Matches(const wxString& mask) const
1835 {
1836 // I disable this code as it doesn't seem to be faster (in fact, it seems
1837 // to be much slower) than the old, hand-written code below and using it
1838 // here requires always linking with libregex even if the user code doesn't
1839 // use it
1840 #if 0 // wxUSE_REGEX
1841 // first translate the shell-like mask into a regex
1842 wxString pattern;
1843 pattern.reserve(wxStrlen(pszMask));
1844
1845 pattern += _T('^');
1846 while ( *pszMask )
1847 {
1848 switch ( *pszMask )
1849 {
1850 case _T('?'):
1851 pattern += _T('.');
1852 break;
1853
1854 case _T('*'):
1855 pattern += _T(".*");
1856 break;
1857
1858 case _T('^'):
1859 case _T('.'):
1860 case _T('$'):
1861 case _T('('):
1862 case _T(')'):
1863 case _T('|'):
1864 case _T('+'):
1865 case _T('\\'):
1866 // these characters are special in a RE, quote them
1867 // (however note that we don't quote '[' and ']' to allow
1868 // using them for Unix shell like matching)
1869 pattern += _T('\\');
1870 // fall through
1871
1872 default:
1873 pattern += *pszMask;
1874 }
1875
1876 pszMask++;
1877 }
1878 pattern += _T('$');
1879
1880 // and now use it
1881 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1882 #else // !wxUSE_REGEX
1883 // TODO: this is, of course, awfully inefficient...
1884
1885 // FIXME-UTF8: implement using iterators, remove #if
1886 #if wxUSE_UNICODE_UTF8
1887 wxWCharBuffer maskBuf = mask.wc_str();
1888 wxWCharBuffer txtBuf = wc_str();
1889 const wxChar *pszMask = maskBuf.data();
1890 const wxChar *pszTxt = txtBuf.data();
1891 #else
1892 const wxChar *pszMask = mask.wx_str();
1893 // the char currently being checked
1894 const wxChar *pszTxt = wx_str();
1895 #endif
1896
1897 // the last location where '*' matched
1898 const wxChar *pszLastStarInText = NULL;
1899 const wxChar *pszLastStarInMask = NULL;
1900
1901 match:
1902 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1903 switch ( *pszMask ) {
1904 case wxT('?'):
1905 if ( *pszTxt == wxT('\0') )
1906 return false;
1907
1908 // pszTxt and pszMask will be incremented in the loop statement
1909
1910 break;
1911
1912 case wxT('*'):
1913 {
1914 // remember where we started to be able to backtrack later
1915 pszLastStarInText = pszTxt;
1916 pszLastStarInMask = pszMask;
1917
1918 // ignore special chars immediately following this one
1919 // (should this be an error?)
1920 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1921 pszMask++;
1922
1923 // if there is nothing more, match
1924 if ( *pszMask == wxT('\0') )
1925 return true;
1926
1927 // are there any other metacharacters in the mask?
1928 size_t uiLenMask;
1929 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1930
1931 if ( pEndMask != NULL ) {
1932 // we have to match the string between two metachars
1933 uiLenMask = pEndMask - pszMask;
1934 }
1935 else {
1936 // we have to match the remainder of the string
1937 uiLenMask = wxStrlen(pszMask);
1938 }
1939
1940 wxString strToMatch(pszMask, uiLenMask);
1941 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1942 if ( pMatch == NULL )
1943 return false;
1944
1945 // -1 to compensate "++" in the loop
1946 pszTxt = pMatch + uiLenMask - 1;
1947 pszMask += uiLenMask - 1;
1948 }
1949 break;
1950
1951 default:
1952 if ( *pszMask != *pszTxt )
1953 return false;
1954 break;
1955 }
1956 }
1957
1958 // match only if nothing left
1959 if ( *pszTxt == wxT('\0') )
1960 return true;
1961
1962 // if we failed to match, backtrack if we can
1963 if ( pszLastStarInText ) {
1964 pszTxt = pszLastStarInText + 1;
1965 pszMask = pszLastStarInMask;
1966
1967 pszLastStarInText = NULL;
1968
1969 // don't bother resetting pszLastStarInMask, it's unnecessary
1970
1971 goto match;
1972 }
1973
1974 return false;
1975 #endif // wxUSE_REGEX/!wxUSE_REGEX
1976 }
1977
1978 // Count the number of chars
1979 int wxString::Freq(wxUniChar ch) const
1980 {
1981 int count = 0;
1982 for ( const_iterator i = begin(); i != end(); ++i )
1983 {
1984 if ( *i == ch )
1985 count ++;
1986 }
1987 return count;
1988 }
1989
1990 // ----------------------------------------------------------------------------
1991 // wxUTF8StringBuffer
1992 // ----------------------------------------------------------------------------
1993
1994 #if wxUSE_UNICODE_WCHAR
1995 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1996 {
1997 wxMBConvStrictUTF8 conv;
1998 size_t wlen = conv.ToWChar(NULL, 0, m_buf);
1999 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2000
2001 wxStringInternalBuffer wbuf(m_str, wlen);
2002 conv.ToWChar(wbuf, wlen, m_buf);
2003 }
2004
2005 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2006 {
2007 wxCHECK_RET(m_lenSet, "length not set");
2008
2009 wxMBConvStrictUTF8 conv;
2010 size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
2011 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2012
2013 wxStringInternalBufferLength wbuf(m_str, wlen);
2014 conv.ToWChar(wbuf, wlen, m_buf, m_len);
2015 wbuf.SetLength(wlen);
2016 }
2017 #endif // wxUSE_UNICODE_WCHAR