]> git.saurik.com Git - wxWidgets.git/blob - src/common/string.cpp
remove wxSETranslator() entirely from wxUSE_ON_FATAL_EXCEPTION==0 build (part of...
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
16
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #endif
28
29 #include <ctype.h>
30
31 #ifndef __WXWINCE__
32 #include <errno.h>
33 #endif
34
35 #include <string.h>
36 #include <stdlib.h>
37
38 #include "wx/hashmap.h"
39
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
46 #else
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
51 #endif
52
53
54 // ---------------------------------------------------------------------------
55 // static class variables definition
56 // ---------------------------------------------------------------------------
57
58 //According to STL _must_ be a -1 size_t
59 const size_t wxString::npos = (size_t) -1;
60
61 // ----------------------------------------------------------------------------
62 // global functions
63 // ----------------------------------------------------------------------------
64
65 #if wxUSE_STD_IOSTREAM
66
67 #include <iostream>
68
69 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
70 {
71 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
72 return os << (const char *)str.AsCharBuf();
73 #else
74 return os << str.AsInternal();
75 #endif
76 }
77
78 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
79 {
80 return os << str.c_str();
81 }
82
83 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
84 {
85 return os << str.data();
86 }
87
88 #ifndef __BORLANDC__
89 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
90 {
91 return os << str.data();
92 }
93 #endif
94
95 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
96
97 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
98 {
99 return wos << str.wc_str();
100 }
101
102 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
103 {
104 return wos << str.AsWChar();
105 }
106
107 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
108 {
109 return wos << str.data();
110 }
111
112 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
113
114 #endif // wxUSE_STD_IOSTREAM
115
116 // ===========================================================================
117 // wxString class core
118 // ===========================================================================
119
120 #if wxUSE_UNICODE_UTF8
121
122 void wxString::PosLenToImpl(size_t pos, size_t len,
123 size_t *implPos, size_t *implLen) const
124 {
125 if ( pos == npos )
126 *implPos = npos;
127 else
128 {
129 const_iterator i = begin() + pos;
130 *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
131 if ( len == npos )
132 *implLen = npos;
133 else
134 {
135 // too large length is interpreted as "to the end of the string"
136 // FIXME-UTF8: verify this is the case in std::string, assert
137 // otherwise
138 if ( pos + len > length() )
139 len = length() - pos;
140
141 *implLen = (i + len).impl() - i.impl();
142 }
143 }
144 }
145
146 #endif // wxUSE_UNICODE_UTF8
147
148 // ----------------------------------------------------------------------------
149 // wxCStrData converted strings caching
150 // ----------------------------------------------------------------------------
151
152 // FIXME-UTF8: temporarily disabled because it doesn't work with global
153 // string objects; re-enable after fixing this bug and benchmarking
154 // performance to see if using a hash is a good idea at all
155 #if 0
156
157 // For backward compatibility reasons, it must be possible to assign the value
158 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
159 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
160 // because the memory would be freed immediately, but it has to be valid as long
161 // as the string is not modified, so that code like this still works:
162 //
163 // const wxChar *s = str.c_str();
164 // while ( s ) { ... }
165
166 // FIXME-UTF8: not thread safe!
167 // FIXME-UTF8: we currently clear the cached conversion only when the string is
168 // destroyed, but we should do it when the string is modified, to
169 // keep memory usage down
170 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
171 // invalidated the cache on every change, we could keep the previous
172 // conversion
173 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
174 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
175
176 template<typename T>
177 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
178 {
179 typename T::iterator i = hash.find(wxConstCast(s, wxString));
180 if ( i != hash.end() )
181 {
182 free(i->second);
183 hash.erase(i);
184 }
185 }
186
187 #if wxUSE_UNICODE
188 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
189 // so we have to use wxString* here and const-cast when used
190 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
191 wxStringCharConversionCache);
192 static wxStringCharConversionCache gs_stringsCharCache;
193
194 const char* wxCStrData::AsChar() const
195 {
196 // remove previously cache value, if any (see FIXMEs above):
197 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
198
199 // convert the string and keep it:
200 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
201 m_str->mb_str().release();
202
203 return s + m_offset;
204 }
205 #endif // wxUSE_UNICODE
206
207 #if !wxUSE_UNICODE_WCHAR
208 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
209 wxStringWCharConversionCache);
210 static wxStringWCharConversionCache gs_stringsWCharCache;
211
212 const wchar_t* wxCStrData::AsWChar() const
213 {
214 // remove previously cache value, if any (see FIXMEs above):
215 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
216
217 // convert the string and keep it:
218 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
219 m_str->wc_str().release();
220
221 return s + m_offset;
222 }
223 #endif // !wxUSE_UNICODE_WCHAR
224
225 wxString::~wxString()
226 {
227 #if wxUSE_UNICODE
228 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
229 DeleteStringFromConversionCache(gs_stringsCharCache, this);
230 #endif
231 #if !wxUSE_UNICODE_WCHAR
232 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
233 #endif
234 }
235 #endif
236
237 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
238 const char* wxCStrData::AsChar() const
239 {
240 #if wxUSE_UNICODE_UTF8
241 if ( wxLocaleIsUtf8 )
242 return AsInternal();
243 #endif
244 // under non-UTF8 locales, we have to convert the internal UTF-8
245 // representation using wxConvLibc and cache the result
246
247 wxString *str = wxConstCast(m_str, wxString);
248
249 // convert the string:
250 //
251 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
252 // have it) but it's unfortunately not obvious to implement
253 // because we don't know how big buffer do we need for the
254 // given string length (in case of multibyte encodings, e.g.
255 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
256 //
257 // One idea would be to store more than just m_convertedToChar
258 // in wxString: then we could record the length of the string
259 // which was converted the last time and try to reuse the same
260 // buffer if the current length is not greater than it (this
261 // could still fail because string could have been modified in
262 // place but it would work most of the time, so we'd do it and
263 // only allocate the new buffer if in-place conversion returned
264 // an error). We could also store a bit saying if the string
265 // was modified since the last conversion (and update it in all
266 // operation modifying the string, of course) to avoid unneeded
267 // consequential conversions. But both of these ideas require
268 // adding more fields to wxString and require profiling results
269 // to be sure that we really gain enough from them to justify
270 // doing it.
271 wxCharBuffer buf(str->mb_str());
272
273 // if it failed, return empty string and not NULL to avoid crashes in code
274 // written with either wxWidgets 2 wxString or std::string behaviour in
275 // mind: neither of them ever returns NULL and so we shouldn't neither
276 if ( !buf )
277 return "";
278
279 if ( str->m_convertedToChar &&
280 strlen(buf) == strlen(str->m_convertedToChar) )
281 {
282 // keep the same buffer for as long as possible, so that several calls
283 // to c_str() in a row still work:
284 strcpy(str->m_convertedToChar, buf);
285 }
286 else
287 {
288 str->m_convertedToChar = buf.release();
289 }
290
291 // and keep it:
292 return str->m_convertedToChar + m_offset;
293 }
294 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
295
296 #if !wxUSE_UNICODE_WCHAR
297 const wchar_t* wxCStrData::AsWChar() const
298 {
299 wxString *str = wxConstCast(m_str, wxString);
300
301 // convert the string:
302 wxWCharBuffer buf(str->wc_str());
303
304 // notice that here, unlike above in AsChar(), conversion can't fail as our
305 // internal UTF-8 is always well-formed -- or the string was corrupted and
306 // all bets are off anyhow
307
308 // FIXME-UTF8: do the conversion in-place in the existing buffer
309 if ( str->m_convertedToWChar &&
310 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
311 {
312 // keep the same buffer for as long as possible, so that several calls
313 // to c_str() in a row still work:
314 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
315 }
316 else
317 {
318 str->m_convertedToWChar = buf.release();
319 }
320
321 // and keep it:
322 return str->m_convertedToWChar + m_offset;
323 }
324 #endif // !wxUSE_UNICODE_WCHAR
325
326 // ===========================================================================
327 // wxString class core
328 // ===========================================================================
329
330 // ---------------------------------------------------------------------------
331 // construction and conversion
332 // ---------------------------------------------------------------------------
333
334 #if wxUSE_UNICODE_WCHAR
335 /* static */
336 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
337 const wxMBConv& conv)
338 {
339 // anything to do?
340 if ( !psz || nLength == 0 )
341 return SubstrBufFromMB(L"", 0);
342
343 if ( nLength == npos )
344 nLength = wxNO_LEN;
345
346 size_t wcLen;
347 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
348 if ( !wcLen )
349 return SubstrBufFromMB(_T(""), 0);
350 else
351 return SubstrBufFromMB(wcBuf, wcLen);
352 }
353 #endif // wxUSE_UNICODE_WCHAR
354
355 #if wxUSE_UNICODE_UTF8
356 /* static */
357 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
358 const wxMBConv& conv)
359 {
360 // anything to do?
361 if ( !psz || nLength == 0 )
362 return SubstrBufFromMB("", 0);
363
364 // if psz is already in UTF-8, we don't have to do the roundtrip to
365 // wchar_t* and back:
366 if ( conv.IsUTF8() )
367 {
368 // we need to validate the input because UTF8 iterators assume valid
369 // UTF-8 sequence and psz may be invalid:
370 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
371 {
372 // we must pass the real string length to SubstrBufFromMB ctor
373 if ( nLength == npos )
374 nLength = psz ? strlen(psz) : 0;
375 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
376 }
377 // else: do the roundtrip through wchar_t*
378 }
379
380 if ( nLength == npos )
381 nLength = wxNO_LEN;
382
383 // first convert to wide string:
384 size_t wcLen;
385 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
386 if ( !wcLen )
387 return SubstrBufFromMB("", 0);
388
389 // and then to UTF-8:
390 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
391 // widechar -> UTF-8 conversion isn't supposed to ever fail:
392 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
393
394 return buf;
395 }
396 #endif // wxUSE_UNICODE_UTF8
397
398 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
399 /* static */
400 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
401 const wxMBConv& conv)
402 {
403 // anything to do?
404 if ( !pwz || nLength == 0 )
405 return SubstrBufFromWC("", 0);
406
407 if ( nLength == npos )
408 nLength = wxNO_LEN;
409
410 size_t mbLen;
411 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
412 if ( !mbLen )
413 return SubstrBufFromWC("", 0);
414 else
415 return SubstrBufFromWC(mbBuf, mbLen);
416 }
417 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
418
419
420 #if wxUSE_UNICODE_WCHAR
421
422 //Convert wxString in Unicode mode to a multi-byte string
423 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
424 {
425 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
426 }
427
428 #elif wxUSE_UNICODE_UTF8
429
430 const wxWCharBuffer wxString::wc_str() const
431 {
432 return wxMBConvStrictUTF8().cMB2WC
433 (
434 m_impl.c_str(),
435 m_impl.length() + 1, // size, not length
436 NULL
437 );
438 }
439
440 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
441 {
442 if ( conv.IsUTF8() )
443 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
444
445 // FIXME-UTF8: use wc_str() here once we have buffers with length
446
447 size_t wcLen;
448 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
449 (
450 m_impl.c_str(),
451 m_impl.length() + 1, // size
452 &wcLen
453 ));
454 if ( !wcLen )
455 return wxCharBuffer("");
456
457 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
458 }
459
460 #else // ANSI
461
462 //Converts this string to a wide character string if unicode
463 //mode is not enabled and wxUSE_WCHAR_T is enabled
464 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
465 {
466 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
467 }
468
469 #endif // Unicode/ANSI
470
471 // shrink to minimal size (releasing extra memory)
472 bool wxString::Shrink()
473 {
474 wxString tmp(begin(), end());
475 swap(tmp);
476 return tmp.length() == length();
477 }
478
479 // deprecated compatibility code:
480 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
481 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
482 {
483 return DoGetWriteBuf(nLen);
484 }
485
486 void wxString::UngetWriteBuf()
487 {
488 DoUngetWriteBuf();
489 }
490
491 void wxString::UngetWriteBuf(size_t nLen)
492 {
493 DoUngetWriteBuf(nLen);
494 }
495 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
496
497
498 // ---------------------------------------------------------------------------
499 // data access
500 // ---------------------------------------------------------------------------
501
502 // all functions are inline in string.h
503
504 // ---------------------------------------------------------------------------
505 // concatenation operators
506 // ---------------------------------------------------------------------------
507
508 /*
509 * concatenation functions come in 5 flavours:
510 * string + string
511 * char + string and string + char
512 * C str + string and string + C str
513 */
514
515 wxString operator+(const wxString& str1, const wxString& str2)
516 {
517 #if !wxUSE_STL_BASED_WXSTRING
518 wxASSERT( str1.IsValid() );
519 wxASSERT( str2.IsValid() );
520 #endif
521
522 wxString s = str1;
523 s += str2;
524
525 return s;
526 }
527
528 wxString operator+(const wxString& str, wxUniChar ch)
529 {
530 #if !wxUSE_STL_BASED_WXSTRING
531 wxASSERT( str.IsValid() );
532 #endif
533
534 wxString s = str;
535 s += ch;
536
537 return s;
538 }
539
540 wxString operator+(wxUniChar ch, const wxString& str)
541 {
542 #if !wxUSE_STL_BASED_WXSTRING
543 wxASSERT( str.IsValid() );
544 #endif
545
546 wxString s = ch;
547 s += str;
548
549 return s;
550 }
551
552 wxString operator+(const wxString& str, const char *psz)
553 {
554 #if !wxUSE_STL_BASED_WXSTRING
555 wxASSERT( str.IsValid() );
556 #endif
557
558 wxString s;
559 if ( !s.Alloc(strlen(psz) + str.length()) ) {
560 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
561 }
562 s += str;
563 s += psz;
564
565 return s;
566 }
567
568 wxString operator+(const wxString& str, const wchar_t *pwz)
569 {
570 #if !wxUSE_STL_BASED_WXSTRING
571 wxASSERT( str.IsValid() );
572 #endif
573
574 wxString s;
575 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
576 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
577 }
578 s += str;
579 s += pwz;
580
581 return s;
582 }
583
584 wxString operator+(const char *psz, const wxString& str)
585 {
586 #if !wxUSE_STL_BASED_WXSTRING
587 wxASSERT( str.IsValid() );
588 #endif
589
590 wxString s;
591 if ( !s.Alloc(strlen(psz) + str.length()) ) {
592 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
593 }
594 s = psz;
595 s += str;
596
597 return s;
598 }
599
600 wxString operator+(const wchar_t *pwz, const wxString& str)
601 {
602 #if !wxUSE_STL_BASED_WXSTRING
603 wxASSERT( str.IsValid() );
604 #endif
605
606 wxString s;
607 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
608 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
609 }
610 s = pwz;
611 s += str;
612
613 return s;
614 }
615
616 // ---------------------------------------------------------------------------
617 // string comparison
618 // ---------------------------------------------------------------------------
619
620 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
621 {
622 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
623 : wxToupper(GetChar(0u)) == wxToupper(c));
624 }
625
626 #ifdef HAVE_STD_STRING_COMPARE
627
628 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
629 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
630 // sort strings in characters code point order by sorting the byte sequence
631 // in byte values order (i.e. what strcmp() and memcmp() do).
632
633 int wxString::compare(const wxString& str) const
634 {
635 return m_impl.compare(str.m_impl);
636 }
637
638 int wxString::compare(size_t nStart, size_t nLen,
639 const wxString& str) const
640 {
641 size_t pos, len;
642 PosLenToImpl(nStart, nLen, &pos, &len);
643 return m_impl.compare(pos, len, str.m_impl);
644 }
645
646 int wxString::compare(size_t nStart, size_t nLen,
647 const wxString& str,
648 size_t nStart2, size_t nLen2) const
649 {
650 size_t pos, len;
651 PosLenToImpl(nStart, nLen, &pos, &len);
652
653 size_t pos2, len2;
654 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
655
656 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
657 }
658
659 int wxString::compare(const char* sz) const
660 {
661 return m_impl.compare(ImplStr(sz));
662 }
663
664 int wxString::compare(const wchar_t* sz) const
665 {
666 return m_impl.compare(ImplStr(sz));
667 }
668
669 int wxString::compare(size_t nStart, size_t nLen,
670 const char* sz, size_t nCount) const
671 {
672 size_t pos, len;
673 PosLenToImpl(nStart, nLen, &pos, &len);
674
675 SubstrBufFromMB str(ImplStr(sz, nCount));
676
677 return m_impl.compare(pos, len, str.data, str.len);
678 }
679
680 int wxString::compare(size_t nStart, size_t nLen,
681 const wchar_t* sz, size_t nCount) const
682 {
683 size_t pos, len;
684 PosLenToImpl(nStart, nLen, &pos, &len);
685
686 SubstrBufFromWC str(ImplStr(sz, nCount));
687
688 return m_impl.compare(pos, len, str.data, str.len);
689 }
690
691 #else // !HAVE_STD_STRING_COMPARE
692
693 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
694 const wxStringCharType* s2, size_t l2)
695 {
696 if( l1 == l2 )
697 return wxStringMemcmp(s1, s2, l1);
698 else if( l1 < l2 )
699 {
700 int ret = wxStringMemcmp(s1, s2, l1);
701 return ret == 0 ? -1 : ret;
702 }
703 else
704 {
705 int ret = wxStringMemcmp(s1, s2, l2);
706 return ret == 0 ? +1 : ret;
707 }
708 }
709
710 int wxString::compare(const wxString& str) const
711 {
712 return ::wxDoCmp(m_impl.data(), m_impl.length(),
713 str.m_impl.data(), str.m_impl.length());
714 }
715
716 int wxString::compare(size_t nStart, size_t nLen,
717 const wxString& str) const
718 {
719 wxASSERT(nStart <= length());
720 size_type strLen = length() - nStart;
721 nLen = strLen < nLen ? strLen : nLen;
722
723 size_t pos, len;
724 PosLenToImpl(nStart, nLen, &pos, &len);
725
726 return ::wxDoCmp(m_impl.data() + pos, len,
727 str.m_impl.data(), str.m_impl.length());
728 }
729
730 int wxString::compare(size_t nStart, size_t nLen,
731 const wxString& str,
732 size_t nStart2, size_t nLen2) const
733 {
734 wxASSERT(nStart <= length());
735 wxASSERT(nStart2 <= str.length());
736 size_type strLen = length() - nStart,
737 strLen2 = str.length() - nStart2;
738 nLen = strLen < nLen ? strLen : nLen;
739 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
740
741 size_t pos, len;
742 PosLenToImpl(nStart, nLen, &pos, &len);
743 size_t pos2, len2;
744 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
745
746 return ::wxDoCmp(m_impl.data() + pos, len,
747 str.m_impl.data() + pos2, len2);
748 }
749
750 int wxString::compare(const char* sz) const
751 {
752 SubstrBufFromMB str(ImplStr(sz, npos));
753 if ( str.len == npos )
754 str.len = wxStringStrlen(str.data);
755 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
756 }
757
758 int wxString::compare(const wchar_t* sz) const
759 {
760 SubstrBufFromWC str(ImplStr(sz, npos));
761 if ( str.len == npos )
762 str.len = wxStringStrlen(str.data);
763 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
764 }
765
766 int wxString::compare(size_t nStart, size_t nLen,
767 const char* sz, size_t nCount) const
768 {
769 wxASSERT(nStart <= length());
770 size_type strLen = length() - nStart;
771 nLen = strLen < nLen ? strLen : nLen;
772
773 size_t pos, len;
774 PosLenToImpl(nStart, nLen, &pos, &len);
775
776 SubstrBufFromMB str(ImplStr(sz, nCount));
777 if ( str.len == npos )
778 str.len = wxStringStrlen(str.data);
779
780 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
781 }
782
783 int wxString::compare(size_t nStart, size_t nLen,
784 const wchar_t* sz, size_t nCount) const
785 {
786 wxASSERT(nStart <= length());
787 size_type strLen = length() - nStart;
788 nLen = strLen < nLen ? strLen : nLen;
789
790 size_t pos, len;
791 PosLenToImpl(nStart, nLen, &pos, &len);
792
793 SubstrBufFromWC str(ImplStr(sz, nCount));
794 if ( str.len == npos )
795 str.len = wxStringStrlen(str.data);
796
797 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
798 }
799
800 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
801
802
803 // ---------------------------------------------------------------------------
804 // find_{first,last}_[not]_of functions
805 // ---------------------------------------------------------------------------
806
807 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
808
809 // NB: All these functions are implemented with the argument being wxChar*,
810 // i.e. widechar string in any Unicode build, even though native string
811 // representation is char* in the UTF-8 build. This is because we couldn't
812 // use memchr() to determine if a character is in a set encoded as UTF-8.
813
814 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
815 {
816 return find_first_of(sz, nStart, wxStrlen(sz));
817 }
818
819 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
820 {
821 return find_first_not_of(sz, nStart, wxStrlen(sz));
822 }
823
824 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
825 {
826 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
827
828 size_t idx = nStart;
829 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
830 {
831 if ( wxTmemchr(sz, *i, n) )
832 return idx;
833 }
834
835 return npos;
836 }
837
838 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
839 {
840 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
841
842 size_t idx = nStart;
843 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
844 {
845 if ( !wxTmemchr(sz, *i, n) )
846 return idx;
847 }
848
849 return npos;
850 }
851
852
853 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
854 {
855 return find_last_of(sz, nStart, wxStrlen(sz));
856 }
857
858 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
859 {
860 return find_last_not_of(sz, nStart, wxStrlen(sz));
861 }
862
863 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
864 {
865 size_t len = length();
866
867 if ( nStart == npos )
868 {
869 nStart = len - 1;
870 }
871 else
872 {
873 wxASSERT_MSG( nStart <= len, _T("invalid index") );
874 }
875
876 size_t idx = nStart;
877 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
878 i != rend(); --idx, ++i )
879 {
880 if ( wxTmemchr(sz, *i, n) )
881 return idx;
882 }
883
884 return npos;
885 }
886
887 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
888 {
889 size_t len = length();
890
891 if ( nStart == npos )
892 {
893 nStart = len - 1;
894 }
895 else
896 {
897 wxASSERT_MSG( nStart <= len, _T("invalid index") );
898 }
899
900 size_t idx = nStart;
901 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
902 i != rend(); --idx, ++i )
903 {
904 if ( !wxTmemchr(sz, *i, n) )
905 return idx;
906 }
907
908 return npos;
909 }
910
911 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
912 {
913 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
914
915 size_t idx = nStart;
916 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
917 {
918 if ( *i != ch )
919 return idx;
920 }
921
922 return npos;
923 }
924
925 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
926 {
927 size_t len = length();
928
929 if ( nStart == npos )
930 {
931 nStart = len - 1;
932 }
933 else
934 {
935 wxASSERT_MSG( nStart <= len, _T("invalid index") );
936 }
937
938 size_t idx = nStart;
939 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
940 i != rend(); --idx, ++i )
941 {
942 if ( *i != ch )
943 return idx;
944 }
945
946 return npos;
947 }
948
949 // the functions above were implemented for wchar_t* arguments in Unicode
950 // build and char* in ANSI build; below are implementations for the other
951 // version:
952 #if wxUSE_UNICODE
953 #define wxOtherCharType char
954 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
955 #else
956 #define wxOtherCharType wchar_t
957 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
958 #endif
959
960 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
961 { return find_first_of(STRCONV(sz), nStart); }
962
963 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
964 size_t n) const
965 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
966 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
967 { return find_last_of(STRCONV(sz), nStart); }
968 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
969 size_t n) const
970 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
971 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
972 { return find_first_not_of(STRCONV(sz), nStart); }
973 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
974 size_t n) const
975 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
976 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
977 { return find_last_not_of(STRCONV(sz), nStart); }
978 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
979 size_t n) const
980 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
981
982 #undef wxOtherCharType
983 #undef STRCONV
984
985 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
986
987 // ===========================================================================
988 // other common string functions
989 // ===========================================================================
990
991 int wxString::CmpNoCase(const wxString& s) const
992 {
993 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
994
995 const_iterator i1 = begin();
996 const_iterator end1 = end();
997 const_iterator i2 = s.begin();
998 const_iterator end2 = s.end();
999
1000 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1001 {
1002 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1003 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1004 if ( lower1 != lower2 )
1005 return lower1 < lower2 ? -1 : 1;
1006 }
1007
1008 size_t len1 = length();
1009 size_t len2 = s.length();
1010
1011 if ( len1 < len2 )
1012 return -1;
1013 else if ( len1 > len2 )
1014 return 1;
1015 return 0;
1016 }
1017
1018
1019 #if wxUSE_UNICODE
1020
1021 #ifdef __MWERKS__
1022 #ifndef __SCHAR_MAX__
1023 #define __SCHAR_MAX__ 127
1024 #endif
1025 #endif
1026
1027 wxString wxString::FromAscii(const char *ascii, size_t len)
1028 {
1029 if (!ascii || len == 0)
1030 return wxEmptyString;
1031
1032 wxString res;
1033
1034 {
1035 wxStringInternalBuffer buf(res, len);
1036 wxStringCharType *dest = buf;
1037
1038 for ( ; len > 0; --len )
1039 {
1040 unsigned char c = (unsigned char)*ascii++;
1041 wxASSERT_MSG( c < 0x80,
1042 _T("Non-ASCII value passed to FromAscii().") );
1043
1044 *dest++ = (wchar_t)c;
1045 }
1046 }
1047
1048 return res;
1049 }
1050
1051 wxString wxString::FromAscii(const char *ascii)
1052 {
1053 return FromAscii(ascii, wxStrlen(ascii));
1054 }
1055
1056 wxString wxString::FromAscii(char ascii)
1057 {
1058 // What do we do with '\0' ?
1059
1060 unsigned char c = (unsigned char)ascii;
1061
1062 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1063
1064 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1065 return wxString(wxUniChar((wchar_t)c));
1066 }
1067
1068 const wxCharBuffer wxString::ToAscii() const
1069 {
1070 // this will allocate enough space for the terminating NUL too
1071 wxCharBuffer buffer(length());
1072 char *dest = buffer.data();
1073
1074 for ( const_iterator i = begin(); i != end(); ++i )
1075 {
1076 wxUniChar c(*i);
1077 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1078 *dest++ = c.IsAscii() ? (char)c : '_';
1079
1080 // the output string can't have embedded NULs anyhow, so we can safely
1081 // stop at first of them even if we do have any
1082 if ( !c )
1083 break;
1084 }
1085
1086 return buffer;
1087 }
1088
1089 #endif // wxUSE_UNICODE
1090
1091 // extract string of length nCount starting at nFirst
1092 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1093 {
1094 size_t nLen = length();
1095
1096 // default value of nCount is npos and means "till the end"
1097 if ( nCount == npos )
1098 {
1099 nCount = nLen - nFirst;
1100 }
1101
1102 // out-of-bounds requests return sensible things
1103 if ( nFirst + nCount > nLen )
1104 {
1105 nCount = nLen - nFirst;
1106 }
1107
1108 if ( nFirst > nLen )
1109 {
1110 // AllocCopy() will return empty string
1111 return wxEmptyString;
1112 }
1113
1114 wxString dest(*this, nFirst, nCount);
1115 if ( dest.length() != nCount )
1116 {
1117 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1118 }
1119
1120 return dest;
1121 }
1122
1123 // check that the string starts with prefix and return the rest of the string
1124 // in the provided pointer if it is not NULL, otherwise return false
1125 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1126 {
1127 if ( compare(0, prefix.length(), prefix) != 0 )
1128 return false;
1129
1130 if ( rest )
1131 {
1132 // put the rest of the string into provided pointer
1133 rest->assign(*this, prefix.length(), npos);
1134 }
1135
1136 return true;
1137 }
1138
1139
1140 // check that the string ends with suffix and return the rest of it in the
1141 // provided pointer if it is not NULL, otherwise return false
1142 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1143 {
1144 int start = length() - suffix.length();
1145
1146 if ( start < 0 || compare(start, npos, suffix) != 0 )
1147 return false;
1148
1149 if ( rest )
1150 {
1151 // put the rest of the string into provided pointer
1152 rest->assign(*this, 0, start);
1153 }
1154
1155 return true;
1156 }
1157
1158
1159 // extract nCount last (rightmost) characters
1160 wxString wxString::Right(size_t nCount) const
1161 {
1162 if ( nCount > length() )
1163 nCount = length();
1164
1165 wxString dest(*this, length() - nCount, nCount);
1166 if ( dest.length() != nCount ) {
1167 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1168 }
1169 return dest;
1170 }
1171
1172 // get all characters after the last occurence of ch
1173 // (returns the whole string if ch not found)
1174 wxString wxString::AfterLast(wxUniChar ch) const
1175 {
1176 wxString str;
1177 int iPos = Find(ch, true);
1178 if ( iPos == wxNOT_FOUND )
1179 str = *this;
1180 else
1181 str = wx_str() + iPos + 1;
1182
1183 return str;
1184 }
1185
1186 // extract nCount first (leftmost) characters
1187 wxString wxString::Left(size_t nCount) const
1188 {
1189 if ( nCount > length() )
1190 nCount = length();
1191
1192 wxString dest(*this, 0, nCount);
1193 if ( dest.length() != nCount ) {
1194 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1195 }
1196 return dest;
1197 }
1198
1199 // get all characters before the first occurence of ch
1200 // (returns the whole string if ch not found)
1201 wxString wxString::BeforeFirst(wxUniChar ch) const
1202 {
1203 int iPos = Find(ch);
1204 if ( iPos == wxNOT_FOUND ) iPos = length();
1205 return wxString(*this, 0, iPos);
1206 }
1207
1208 /// get all characters before the last occurence of ch
1209 /// (returns empty string if ch not found)
1210 wxString wxString::BeforeLast(wxUniChar ch) const
1211 {
1212 wxString str;
1213 int iPos = Find(ch, true);
1214 if ( iPos != wxNOT_FOUND && iPos != 0 )
1215 str = wxString(c_str(), iPos);
1216
1217 return str;
1218 }
1219
1220 /// get all characters after the first occurence of ch
1221 /// (returns empty string if ch not found)
1222 wxString wxString::AfterFirst(wxUniChar ch) const
1223 {
1224 wxString str;
1225 int iPos = Find(ch);
1226 if ( iPos != wxNOT_FOUND )
1227 str = wx_str() + iPos + 1;
1228
1229 return str;
1230 }
1231
1232 // replace first (or all) occurences of some substring with another one
1233 size_t wxString::Replace(const wxString& strOld,
1234 const wxString& strNew, bool bReplaceAll)
1235 {
1236 // if we tried to replace an empty string we'd enter an infinite loop below
1237 wxCHECK_MSG( !strOld.empty(), 0,
1238 _T("wxString::Replace(): invalid parameter") );
1239
1240 size_t uiCount = 0; // count of replacements made
1241
1242 // optimize the special common case: replacement of one character by
1243 // another one (in UTF-8 case we can only do this for ASCII characters)
1244 //
1245 // benchmarks show that this special version is around 3 times faster
1246 // (depending on the proportion of matching characters and UTF-8/wchar_t
1247 // build)
1248 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1249 {
1250 const wxStringCharType chOld = strOld.m_impl[0],
1251 chNew = strNew.m_impl[0];
1252
1253 // this loop is the simplified version of the one below
1254 for ( size_t pos = 0; ; )
1255 {
1256 pos = m_impl.find(chOld, pos);
1257 if ( pos == npos )
1258 break;
1259
1260 m_impl[pos++] = chNew;
1261
1262 uiCount++;
1263
1264 if ( !bReplaceAll )
1265 break;
1266 }
1267 }
1268 else // general case
1269 {
1270 const size_t uiOldLen = strOld.m_impl.length();
1271 const size_t uiNewLen = strNew.m_impl.length();
1272
1273 for ( size_t pos = 0; ; )
1274 {
1275 pos = m_impl.find(strOld.m_impl, pos);
1276 if ( pos == npos )
1277 break;
1278
1279 // replace this occurrence of the old string with the new one
1280 m_impl.replace(pos, uiOldLen, strNew.m_impl);
1281
1282 // move up pos past the string that was replaced
1283 pos += uiNewLen;
1284
1285 // increase replace count
1286 uiCount++;
1287
1288 // stop after the first one?
1289 if ( !bReplaceAll )
1290 break;
1291 }
1292 }
1293
1294 return uiCount;
1295 }
1296
1297 bool wxString::IsAscii() const
1298 {
1299 for ( const_iterator i = begin(); i != end(); ++i )
1300 {
1301 if ( !(*i).IsAscii() )
1302 return false;
1303 }
1304
1305 return true;
1306 }
1307
1308 bool wxString::IsWord() const
1309 {
1310 for ( const_iterator i = begin(); i != end(); ++i )
1311 {
1312 if ( !wxIsalpha(*i) )
1313 return false;
1314 }
1315
1316 return true;
1317 }
1318
1319 bool wxString::IsNumber() const
1320 {
1321 if ( empty() )
1322 return true;
1323
1324 const_iterator i = begin();
1325
1326 if ( *i == _T('-') || *i == _T('+') )
1327 ++i;
1328
1329 for ( ; i != end(); ++i )
1330 {
1331 if ( !wxIsdigit(*i) )
1332 return false;
1333 }
1334
1335 return true;
1336 }
1337
1338 wxString wxString::Strip(stripType w) const
1339 {
1340 wxString s = *this;
1341 if ( w & leading ) s.Trim(false);
1342 if ( w & trailing ) s.Trim(true);
1343 return s;
1344 }
1345
1346 // ---------------------------------------------------------------------------
1347 // case conversion
1348 // ---------------------------------------------------------------------------
1349
1350 wxString& wxString::MakeUpper()
1351 {
1352 for ( iterator it = begin(), en = end(); it != en; ++it )
1353 *it = (wxChar)wxToupper(*it);
1354
1355 return *this;
1356 }
1357
1358 wxString& wxString::MakeLower()
1359 {
1360 for ( iterator it = begin(), en = end(); it != en; ++it )
1361 *it = (wxChar)wxTolower(*it);
1362
1363 return *this;
1364 }
1365
1366 // ---------------------------------------------------------------------------
1367 // trimming and padding
1368 // ---------------------------------------------------------------------------
1369
1370 // some compilers (VC++ 6.0 not to name them) return true for a call to
1371 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1372 // to live with this by checking that the character is a 7 bit one - even if
1373 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1374 // space-like symbols somewhere except in the first 128 chars), it is arguably
1375 // still better than trimming away accented letters
1376 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1377
1378 // trims spaces (in the sense of isspace) from left or right side
1379 wxString& wxString::Trim(bool bFromRight)
1380 {
1381 // first check if we're going to modify the string at all
1382 if ( !empty() &&
1383 (
1384 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1385 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1386 )
1387 )
1388 {
1389 if ( bFromRight )
1390 {
1391 // find last non-space character
1392 reverse_iterator psz = rbegin();
1393 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1394 ++psz;
1395
1396 // truncate at trailing space start
1397 erase(psz.base(), end());
1398 }
1399 else
1400 {
1401 // find first non-space character
1402 iterator psz = begin();
1403 while ( (psz != end()) && wxSafeIsspace(*psz) )
1404 ++psz;
1405
1406 // fix up data and length
1407 erase(begin(), psz);
1408 }
1409 }
1410
1411 return *this;
1412 }
1413
1414 // adds nCount characters chPad to the string from either side
1415 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1416 {
1417 wxString s(chPad, nCount);
1418
1419 if ( bFromRight )
1420 *this += s;
1421 else
1422 {
1423 s += *this;
1424 swap(s);
1425 }
1426
1427 return *this;
1428 }
1429
1430 // truncate the string
1431 wxString& wxString::Truncate(size_t uiLen)
1432 {
1433 if ( uiLen < length() )
1434 {
1435 erase(begin() + uiLen, end());
1436 }
1437 //else: nothing to do, string is already short enough
1438
1439 return *this;
1440 }
1441
1442 // ---------------------------------------------------------------------------
1443 // finding (return wxNOT_FOUND if not found and index otherwise)
1444 // ---------------------------------------------------------------------------
1445
1446 // find a character
1447 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1448 {
1449 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1450
1451 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1452 }
1453
1454 // ----------------------------------------------------------------------------
1455 // conversion to numbers
1456 // ----------------------------------------------------------------------------
1457
1458 // The implementation of all the functions below is exactly the same so factor
1459 // it out. Note that number extraction works correctly on UTF-8 strings, so
1460 // we can use wxStringCharType and wx_str() for maximum efficiency.
1461
1462 #ifndef __WXWINCE__
1463 #define DO_IF_NOT_WINCE(x) x
1464 #else
1465 #define DO_IF_NOT_WINCE(x)
1466 #endif
1467
1468 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1469 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1470 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1471 \
1472 DO_IF_NOT_WINCE( errno = 0; ) \
1473 \
1474 const wxStringCharType *start = wx_str(); \
1475 wxStringCharType *end; \
1476 T val = func(start, &end, base); \
1477 \
1478 /* return true only if scan was stopped by the terminating NUL and */ \
1479 /* if the string was not empty to start with and no under/overflow */ \
1480 /* occurred: */ \
1481 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1482 return false; \
1483 *out = val; \
1484 return true
1485
1486 bool wxString::ToLong(long *pVal, int base) const
1487 {
1488 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
1489 }
1490
1491 bool wxString::ToULong(unsigned long *pVal, int base) const
1492 {
1493 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
1494 }
1495
1496 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1497 {
1498 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
1499 }
1500
1501 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1502 {
1503 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
1504 }
1505
1506 bool wxString::ToDouble(double *pVal) const
1507 {
1508 wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
1509
1510 DO_IF_NOT_WINCE( errno = 0; )
1511
1512 const wxChar *start = c_str();
1513 wxChar *end;
1514 double val = wxStrtod(start, &end);
1515
1516 // return true only if scan was stopped by the terminating NUL and if the
1517 // string was not empty to start with and no under/overflow occurred
1518 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1519 return false;
1520
1521 *pVal = val;
1522
1523 return true;
1524 }
1525
1526 // ---------------------------------------------------------------------------
1527 // formatted output
1528 // ---------------------------------------------------------------------------
1529
1530 #if !wxUSE_UTF8_LOCALE_ONLY
1531 /* static */
1532 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1533 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1534 #else
1535 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1536 #endif
1537 {
1538 va_list argptr;
1539 va_start(argptr, format);
1540
1541 wxString s;
1542 s.PrintfV(format, argptr);
1543
1544 va_end(argptr);
1545
1546 return s;
1547 }
1548 #endif // !wxUSE_UTF8_LOCALE_ONLY
1549
1550 #if wxUSE_UNICODE_UTF8
1551 /* static */
1552 wxString wxString::DoFormatUtf8(const char *format, ...)
1553 {
1554 va_list argptr;
1555 va_start(argptr, format);
1556
1557 wxString s;
1558 s.PrintfV(format, argptr);
1559
1560 va_end(argptr);
1561
1562 return s;
1563 }
1564 #endif // wxUSE_UNICODE_UTF8
1565
1566 /* static */
1567 wxString wxString::FormatV(const wxString& format, va_list argptr)
1568 {
1569 wxString s;
1570 s.PrintfV(format, argptr);
1571 return s;
1572 }
1573
1574 #if !wxUSE_UTF8_LOCALE_ONLY
1575 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1576 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1577 #else
1578 int wxString::DoPrintfWchar(const wxChar *format, ...)
1579 #endif
1580 {
1581 va_list argptr;
1582 va_start(argptr, format);
1583
1584 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1585 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1586 // because it's the only cast that works safely for downcasting when
1587 // multiple inheritance is used:
1588 wxString *str = static_cast<wxString*>(this);
1589 #else
1590 wxString *str = this;
1591 #endif
1592
1593 int iLen = str->PrintfV(format, argptr);
1594
1595 va_end(argptr);
1596
1597 return iLen;
1598 }
1599 #endif // !wxUSE_UTF8_LOCALE_ONLY
1600
1601 #if wxUSE_UNICODE_UTF8
1602 int wxString::DoPrintfUtf8(const char *format, ...)
1603 {
1604 va_list argptr;
1605 va_start(argptr, format);
1606
1607 int iLen = PrintfV(format, argptr);
1608
1609 va_end(argptr);
1610
1611 return iLen;
1612 }
1613 #endif // wxUSE_UNICODE_UTF8
1614
1615 /*
1616 Uses wxVsnprintf and places the result into the this string.
1617
1618 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1619 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1620 the ISO C99 (and thus SUSv3) standard the return value for the case of
1621 an undersized buffer is inconsistent. For conforming vsnprintf
1622 implementations the function must return the number of characters that
1623 would have been printed had the buffer been large enough. For conforming
1624 vswprintf implementations the function must return a negative number
1625 and set errno.
1626
1627 What vswprintf sets errno to is undefined but Darwin seems to set it to
1628 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1629 those are defined in the standard and backed up by several conformance
1630 statements. Note that ENOMEM mentioned in the manual page does not
1631 apply to swprintf, only wprintf and fwprintf.
1632
1633 Official manual page:
1634 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1635
1636 Some conformance statements (AIX, Solaris):
1637 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1638 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1639
1640 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1641 EILSEQ and EINVAL are specifically defined to mean the error is other than
1642 an undersized buffer and no other errno are defined we treat those two
1643 as meaning hard errors and everything else gets the old behavior which
1644 is to keep looping and increasing buffer size until the function succeeds.
1645
1646 In practice it's impossible to determine before compilation which behavior
1647 may be used. The vswprintf function may have vsnprintf-like behavior or
1648 vice-versa. Behavior detected on one release can theoretically change
1649 with an updated release. Not to mention that configure testing for it
1650 would require the test to be run on the host system, not the build system
1651 which makes cross compilation difficult. Therefore, we make no assumptions
1652 about behavior and try our best to handle every known case, including the
1653 case where wxVsnprintf returns a negative number and fails to set errno.
1654
1655 There is yet one more non-standard implementation and that is our own.
1656 Fortunately, that can be detected at compile-time.
1657
1658 On top of all that, ISO C99 explicitly defines snprintf to write a null
1659 character to the last position of the specified buffer. That would be at
1660 at the given buffer size minus 1. It is supposed to do this even if it
1661 turns out that the buffer is sized too small.
1662
1663 Darwin (tested on 10.5) follows the C99 behavior exactly.
1664
1665 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1666 errno even when it fails. However, it only seems to ever fail due
1667 to an undersized buffer.
1668 */
1669 #if wxUSE_UNICODE_UTF8
1670 template<typename BufferType>
1671 #else
1672 // we only need one version in non-UTF8 builds and at least two Windows
1673 // compilers have problems with this function template, so use just one
1674 // normal function here
1675 #endif
1676 static int DoStringPrintfV(wxString& str,
1677 const wxString& format, va_list argptr)
1678 {
1679 int size = 1024;
1680
1681 for ( ;; )
1682 {
1683 #if wxUSE_UNICODE_UTF8
1684 BufferType tmp(str, size + 1);
1685 typename BufferType::CharType *buf = tmp;
1686 #else
1687 wxStringBuffer tmp(str, size + 1);
1688 wxChar *buf = tmp;
1689 #endif
1690
1691 if ( !buf )
1692 {
1693 // out of memory
1694
1695 // in UTF-8 build, leaving uninitialized junk in the buffer
1696 // could result in invalid non-empty UTF-8 string, so just
1697 // reset the string to empty on failure:
1698 buf[0] = '\0';
1699 return -1;
1700 }
1701
1702 // wxVsnprintf() may modify the original arg pointer, so pass it
1703 // only a copy
1704 va_list argptrcopy;
1705 wxVaCopy(argptrcopy, argptr);
1706
1707 #ifndef __WXWINCE__
1708 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1709 errno = 0;
1710 #endif
1711 int len = wxVsnprintf(buf, size, format, argptrcopy);
1712 va_end(argptrcopy);
1713
1714 // some implementations of vsnprintf() don't NUL terminate
1715 // the string if there is not enough space for it so
1716 // always do it manually
1717 // FIXME: This really seems to be the wrong and would be an off-by-one
1718 // bug except the code above allocates an extra character.
1719 buf[size] = _T('\0');
1720
1721 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1722 // total number of characters which would have been written if the
1723 // buffer were large enough (newer standards such as Unix98)
1724 if ( len < 0 )
1725 {
1726 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1727 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1728 // is true if *both* of them use our own implementation,
1729 // otherwise we can't be sure
1730 #if wxUSE_WXVSNPRINTF
1731 // we know that our own implementation of wxVsnprintf() returns -1
1732 // only for a format error - thus there's something wrong with
1733 // the user's format string
1734 buf[0] = '\0';
1735 return -1;
1736 #else // possibly using system version
1737 // assume it only returns error if there is not enough space, but
1738 // as we don't know how much we need, double the current size of
1739 // the buffer
1740 #ifndef __WXWINCE__
1741 if( (errno == EILSEQ) || (errno == EINVAL) )
1742 // If errno was set to one of the two well-known hard errors
1743 // then fail immediately to avoid an infinite loop.
1744 return -1;
1745 else
1746 #endif // __WXWINCE__
1747 // still not enough, as we don't know how much we need, double the
1748 // current size of the buffer
1749 size *= 2;
1750 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1751 }
1752 else if ( len >= size )
1753 {
1754 #if wxUSE_WXVSNPRINTF
1755 // we know that our own implementation of wxVsnprintf() returns
1756 // size+1 when there's not enough space but that's not the size
1757 // of the required buffer!
1758 size *= 2; // so we just double the current size of the buffer
1759 #else
1760 // some vsnprintf() implementations NUL-terminate the buffer and
1761 // some don't in len == size case, to be safe always add 1
1762 // FIXME: I don't quite understand this comment. The vsnprintf
1763 // function is specifically defined to return the number of
1764 // characters printed not including the null terminator.
1765 // So OF COURSE you need to add 1 to get the right buffer size.
1766 // The following line is definitely correct, no question.
1767 size = len + 1;
1768 #endif
1769 }
1770 else // ok, there was enough space
1771 {
1772 break;
1773 }
1774 }
1775
1776 // we could have overshot
1777 str.Shrink();
1778
1779 return str.length();
1780 }
1781
1782 int wxString::PrintfV(const wxString& format, va_list argptr)
1783 {
1784 #if wxUSE_UNICODE_UTF8
1785 #if wxUSE_STL_BASED_WXSTRING
1786 typedef wxStringTypeBuffer<char> Utf8Buffer;
1787 #else
1788 typedef wxStringInternalBuffer Utf8Buffer;
1789 #endif
1790 #endif
1791
1792 #if wxUSE_UTF8_LOCALE_ONLY
1793 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1794 #else
1795 #if wxUSE_UNICODE_UTF8
1796 if ( wxLocaleIsUtf8 )
1797 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1798 else
1799 // wxChar* version
1800 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1801 #else
1802 return DoStringPrintfV(*this, format, argptr);
1803 #endif // UTF8/WCHAR
1804 #endif
1805 }
1806
1807 // ----------------------------------------------------------------------------
1808 // misc other operations
1809 // ----------------------------------------------------------------------------
1810
1811 // returns true if the string matches the pattern which may contain '*' and
1812 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1813 // of them)
1814 bool wxString::Matches(const wxString& mask) const
1815 {
1816 // I disable this code as it doesn't seem to be faster (in fact, it seems
1817 // to be much slower) than the old, hand-written code below and using it
1818 // here requires always linking with libregex even if the user code doesn't
1819 // use it
1820 #if 0 // wxUSE_REGEX
1821 // first translate the shell-like mask into a regex
1822 wxString pattern;
1823 pattern.reserve(wxStrlen(pszMask));
1824
1825 pattern += _T('^');
1826 while ( *pszMask )
1827 {
1828 switch ( *pszMask )
1829 {
1830 case _T('?'):
1831 pattern += _T('.');
1832 break;
1833
1834 case _T('*'):
1835 pattern += _T(".*");
1836 break;
1837
1838 case _T('^'):
1839 case _T('.'):
1840 case _T('$'):
1841 case _T('('):
1842 case _T(')'):
1843 case _T('|'):
1844 case _T('+'):
1845 case _T('\\'):
1846 // these characters are special in a RE, quote them
1847 // (however note that we don't quote '[' and ']' to allow
1848 // using them for Unix shell like matching)
1849 pattern += _T('\\');
1850 // fall through
1851
1852 default:
1853 pattern += *pszMask;
1854 }
1855
1856 pszMask++;
1857 }
1858 pattern += _T('$');
1859
1860 // and now use it
1861 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1862 #else // !wxUSE_REGEX
1863 // TODO: this is, of course, awfully inefficient...
1864
1865 // FIXME-UTF8: implement using iterators, remove #if
1866 #if wxUSE_UNICODE_UTF8
1867 wxWCharBuffer maskBuf = mask.wc_str();
1868 wxWCharBuffer txtBuf = wc_str();
1869 const wxChar *pszMask = maskBuf.data();
1870 const wxChar *pszTxt = txtBuf.data();
1871 #else
1872 const wxChar *pszMask = mask.wx_str();
1873 // the char currently being checked
1874 const wxChar *pszTxt = wx_str();
1875 #endif
1876
1877 // the last location where '*' matched
1878 const wxChar *pszLastStarInText = NULL;
1879 const wxChar *pszLastStarInMask = NULL;
1880
1881 match:
1882 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1883 switch ( *pszMask ) {
1884 case wxT('?'):
1885 if ( *pszTxt == wxT('\0') )
1886 return false;
1887
1888 // pszTxt and pszMask will be incremented in the loop statement
1889
1890 break;
1891
1892 case wxT('*'):
1893 {
1894 // remember where we started to be able to backtrack later
1895 pszLastStarInText = pszTxt;
1896 pszLastStarInMask = pszMask;
1897
1898 // ignore special chars immediately following this one
1899 // (should this be an error?)
1900 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1901 pszMask++;
1902
1903 // if there is nothing more, match
1904 if ( *pszMask == wxT('\0') )
1905 return true;
1906
1907 // are there any other metacharacters in the mask?
1908 size_t uiLenMask;
1909 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1910
1911 if ( pEndMask != NULL ) {
1912 // we have to match the string between two metachars
1913 uiLenMask = pEndMask - pszMask;
1914 }
1915 else {
1916 // we have to match the remainder of the string
1917 uiLenMask = wxStrlen(pszMask);
1918 }
1919
1920 wxString strToMatch(pszMask, uiLenMask);
1921 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1922 if ( pMatch == NULL )
1923 return false;
1924
1925 // -1 to compensate "++" in the loop
1926 pszTxt = pMatch + uiLenMask - 1;
1927 pszMask += uiLenMask - 1;
1928 }
1929 break;
1930
1931 default:
1932 if ( *pszMask != *pszTxt )
1933 return false;
1934 break;
1935 }
1936 }
1937
1938 // match only if nothing left
1939 if ( *pszTxt == wxT('\0') )
1940 return true;
1941
1942 // if we failed to match, backtrack if we can
1943 if ( pszLastStarInText ) {
1944 pszTxt = pszLastStarInText + 1;
1945 pszMask = pszLastStarInMask;
1946
1947 pszLastStarInText = NULL;
1948
1949 // don't bother resetting pszLastStarInMask, it's unnecessary
1950
1951 goto match;
1952 }
1953
1954 return false;
1955 #endif // wxUSE_REGEX/!wxUSE_REGEX
1956 }
1957
1958 // Count the number of chars
1959 int wxString::Freq(wxUniChar ch) const
1960 {
1961 int count = 0;
1962 for ( const_iterator i = begin(); i != end(); ++i )
1963 {
1964 if ( *i == ch )
1965 count ++;
1966 }
1967 return count;
1968 }
1969
1970 // convert to upper case, return the copy of the string
1971 wxString wxString::Upper() const
1972 { wxString s(*this); return s.MakeUpper(); }
1973
1974 // convert to lower case, return the copy of the string
1975 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
1976
1977 // ----------------------------------------------------------------------------
1978 // wxUTF8StringBuffer
1979 // ----------------------------------------------------------------------------
1980
1981 #if wxUSE_UNICODE_WCHAR
1982 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1983 {
1984 wxMBConvStrictUTF8 conv;
1985 size_t wlen = conv.ToWChar(NULL, 0, m_buf);
1986 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1987
1988 wxStringInternalBuffer wbuf(m_str, wlen);
1989 conv.ToWChar(wbuf, wlen, m_buf);
1990 }
1991
1992 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
1993 {
1994 wxCHECK_RET(m_lenSet, "length not set");
1995
1996 wxMBConvStrictUTF8 conv;
1997 size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
1998 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1999
2000 wxStringInternalBufferLength wbuf(m_str, wlen);
2001 conv.ToWChar(wbuf, wlen, m_buf, m_len);
2002 wbuf.SetLength(wlen);
2003 }
2004 #endif // wxUSE_UNICODE_WCHAR