]> git.saurik.com Git - wxWidgets.git/blob - src/common/string.cpp
fixed incorrect layout width caching in wxHtmlContainerCell (patch #1931479)
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
16
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #endif
28
29 #include <ctype.h>
30
31 #ifndef __WXWINCE__
32 #include <errno.h>
33 #endif
34
35 #include <string.h>
36 #include <stdlib.h>
37
38 #include "wx/hashmap.h"
39
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
46 #else
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
51 #endif
52
53
54 // ---------------------------------------------------------------------------
55 // static class variables definition
56 // ---------------------------------------------------------------------------
57
58 //According to STL _must_ be a -1 size_t
59 const size_t wxString::npos = (size_t) -1;
60
61 // ----------------------------------------------------------------------------
62 // global functions
63 // ----------------------------------------------------------------------------
64
65 #if wxUSE_STD_IOSTREAM
66
67 #include <iostream>
68
69 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
70 {
71 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
72 return os << (const char *)str.AsCharBuf();
73 #else
74 return os << str.AsInternal();
75 #endif
76 }
77
78 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
79 {
80 return os << str.c_str();
81 }
82
83 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
84 {
85 return os << str.data();
86 }
87
88 #ifndef __BORLANDC__
89 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
90 {
91 return os << str.data();
92 }
93 #endif
94
95 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
96
97 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
98 {
99 return wos << str.wc_str();
100 }
101
102 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
103 {
104 return wos << str.AsWChar();
105 }
106
107 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
108 {
109 return wos << str.data();
110 }
111
112 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
113
114 #endif // wxUSE_STD_IOSTREAM
115
116 // ===========================================================================
117 // wxString class core
118 // ===========================================================================
119
120 #if wxUSE_UNICODE_UTF8
121
122 void wxString::PosLenToImpl(size_t pos, size_t len,
123 size_t *implPos, size_t *implLen) const
124 {
125 if ( pos == npos )
126 *implPos = npos;
127 else
128 {
129 const_iterator i = begin() + pos;
130 *implPos = wxStringImpl::const_iterator(i.impl()) - m_impl.begin();
131 if ( len == npos )
132 *implLen = npos;
133 else
134 {
135 // too large length is interpreted as "to the end of the string"
136 // FIXME-UTF8: verify this is the case in std::string, assert
137 // otherwise
138 if ( pos + len > length() )
139 len = length() - pos;
140
141 *implLen = (i + len).impl() - i.impl();
142 }
143 }
144 }
145
146 #endif // wxUSE_UNICODE_UTF8
147
148 // ----------------------------------------------------------------------------
149 // wxCStrData converted strings caching
150 // ----------------------------------------------------------------------------
151
152 // FIXME-UTF8: temporarily disabled because it doesn't work with global
153 // string objects; re-enable after fixing this bug and benchmarking
154 // performance to see if using a hash is a good idea at all
155 #if 0
156
157 // For backward compatibility reasons, it must be possible to assign the value
158 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
159 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
160 // because the memory would be freed immediately, but it has to be valid as long
161 // as the string is not modified, so that code like this still works:
162 //
163 // const wxChar *s = str.c_str();
164 // while ( s ) { ... }
165
166 // FIXME-UTF8: not thread safe!
167 // FIXME-UTF8: we currently clear the cached conversion only when the string is
168 // destroyed, but we should do it when the string is modified, to
169 // keep memory usage down
170 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
171 // invalidated the cache on every change, we could keep the previous
172 // conversion
173 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
174 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
175
176 template<typename T>
177 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
178 {
179 typename T::iterator i = hash.find(wxConstCast(s, wxString));
180 if ( i != hash.end() )
181 {
182 free(i->second);
183 hash.erase(i);
184 }
185 }
186
187 #if wxUSE_UNICODE
188 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
189 // so we have to use wxString* here and const-cast when used
190 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
191 wxStringCharConversionCache);
192 static wxStringCharConversionCache gs_stringsCharCache;
193
194 const char* wxCStrData::AsChar() const
195 {
196 // remove previously cache value, if any (see FIXMEs above):
197 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
198
199 // convert the string and keep it:
200 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
201 m_str->mb_str().release();
202
203 return s + m_offset;
204 }
205 #endif // wxUSE_UNICODE
206
207 #if !wxUSE_UNICODE_WCHAR
208 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
209 wxStringWCharConversionCache);
210 static wxStringWCharConversionCache gs_stringsWCharCache;
211
212 const wchar_t* wxCStrData::AsWChar() const
213 {
214 // remove previously cache value, if any (see FIXMEs above):
215 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
216
217 // convert the string and keep it:
218 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
219 m_str->wc_str().release();
220
221 return s + m_offset;
222 }
223 #endif // !wxUSE_UNICODE_WCHAR
224
225 wxString::~wxString()
226 {
227 #if wxUSE_UNICODE
228 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
229 DeleteStringFromConversionCache(gs_stringsCharCache, this);
230 #endif
231 #if !wxUSE_UNICODE_WCHAR
232 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
233 #endif
234 }
235 #endif
236
237 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
238 const char* wxCStrData::AsChar() const
239 {
240 #if wxUSE_UNICODE_UTF8
241 if ( wxLocaleIsUtf8 )
242 return AsInternal();
243 #endif
244 // under non-UTF8 locales, we have to convert the internal UTF-8
245 // representation using wxConvLibc and cache the result
246
247 wxString *str = wxConstCast(m_str, wxString);
248
249 // convert the string:
250 //
251 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
252 // have it) but it's unfortunately not obvious to implement
253 // because we don't know how big buffer do we need for the
254 // given string length (in case of multibyte encodings, e.g.
255 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
256 //
257 // One idea would be to store more than just m_convertedToChar
258 // in wxString: then we could record the length of the string
259 // which was converted the last time and try to reuse the same
260 // buffer if the current length is not greater than it (this
261 // could still fail because string could have been modified in
262 // place but it would work most of the time, so we'd do it and
263 // only allocate the new buffer if in-place conversion returned
264 // an error). We could also store a bit saying if the string
265 // was modified since the last conversion (and update it in all
266 // operation modifying the string, of course) to avoid unneeded
267 // consequential conversions. But both of these ideas require
268 // adding more fields to wxString and require profiling results
269 // to be sure that we really gain enough from them to justify
270 // doing it.
271 wxCharBuffer buf(str->mb_str());
272
273 // if it failed, return empty string and not NULL to avoid crashes in code
274 // written with either wxWidgets 2 wxString or std::string behaviour in
275 // mind: neither of them ever returns NULL and so we shouldn't neither
276 if ( !buf )
277 return "";
278
279 if ( str->m_convertedToChar &&
280 strlen(buf) == strlen(str->m_convertedToChar) )
281 {
282 // keep the same buffer for as long as possible, so that several calls
283 // to c_str() in a row still work:
284 strcpy(str->m_convertedToChar, buf);
285 }
286 else
287 {
288 str->m_convertedToChar = buf.release();
289 }
290
291 // and keep it:
292 return str->m_convertedToChar + m_offset;
293 }
294 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
295
296 #if !wxUSE_UNICODE_WCHAR
297 const wchar_t* wxCStrData::AsWChar() const
298 {
299 wxString *str = wxConstCast(m_str, wxString);
300
301 // convert the string:
302 wxWCharBuffer buf(str->wc_str());
303
304 // notice that here, unlike above in AsChar(), conversion can't fail as our
305 // internal UTF-8 is always well-formed -- or the string was corrupted and
306 // all bets are off anyhow
307
308 // FIXME-UTF8: do the conversion in-place in the existing buffer
309 if ( str->m_convertedToWChar &&
310 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
311 {
312 // keep the same buffer for as long as possible, so that several calls
313 // to c_str() in a row still work:
314 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
315 }
316 else
317 {
318 str->m_convertedToWChar = buf.release();
319 }
320
321 // and keep it:
322 return str->m_convertedToWChar + m_offset;
323 }
324 #endif // !wxUSE_UNICODE_WCHAR
325
326 // ===========================================================================
327 // wxString class core
328 // ===========================================================================
329
330 // ---------------------------------------------------------------------------
331 // construction and conversion
332 // ---------------------------------------------------------------------------
333
334 #if wxUSE_UNICODE_WCHAR
335 /* static */
336 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
337 const wxMBConv& conv)
338 {
339 // anything to do?
340 if ( !psz || nLength == 0 )
341 return SubstrBufFromMB(L"", 0);
342
343 if ( nLength == npos )
344 nLength = wxNO_LEN;
345
346 size_t wcLen;
347 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
348 if ( !wcLen )
349 return SubstrBufFromMB(_T(""), 0);
350 else
351 return SubstrBufFromMB(wcBuf, wcLen);
352 }
353 #endif // wxUSE_UNICODE_WCHAR
354
355 #if wxUSE_UNICODE_UTF8
356 /* static */
357 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
358 const wxMBConv& conv)
359 {
360 // anything to do?
361 if ( !psz || nLength == 0 )
362 return SubstrBufFromMB("", 0);
363
364 // if psz is already in UTF-8, we don't have to do the roundtrip to
365 // wchar_t* and back:
366 if ( conv.IsUTF8() )
367 {
368 // we need to validate the input because UTF8 iterators assume valid
369 // UTF-8 sequence and psz may be invalid:
370 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
371 {
372 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
373 }
374 // else: do the roundtrip through wchar_t*
375 }
376
377 if ( nLength == npos )
378 nLength = wxNO_LEN;
379
380 // first convert to wide string:
381 size_t wcLen;
382 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
383 if ( !wcLen )
384 return SubstrBufFromMB("", 0);
385
386 // and then to UTF-8:
387 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
388 // widechar -> UTF-8 conversion isn't supposed to ever fail:
389 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
390
391 return buf;
392 }
393 #endif // wxUSE_UNICODE_UTF8
394
395 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
396 /* static */
397 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
398 const wxMBConv& conv)
399 {
400 // anything to do?
401 if ( !pwz || nLength == 0 )
402 return SubstrBufFromWC("", 0);
403
404 if ( nLength == npos )
405 nLength = wxNO_LEN;
406
407 size_t mbLen;
408 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
409 if ( !mbLen )
410 return SubstrBufFromWC("", 0);
411 else
412 return SubstrBufFromWC(mbBuf, mbLen);
413 }
414 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
415
416
417 #if wxUSE_UNICODE_WCHAR
418
419 //Convert wxString in Unicode mode to a multi-byte string
420 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
421 {
422 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
423 }
424
425 #elif wxUSE_UNICODE_UTF8
426
427 const wxWCharBuffer wxString::wc_str() const
428 {
429 return wxMBConvStrictUTF8().cMB2WC
430 (
431 m_impl.c_str(),
432 m_impl.length() + 1, // size, not length
433 NULL
434 );
435 }
436
437 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
438 {
439 if ( conv.IsUTF8() )
440 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
441
442 // FIXME-UTF8: use wc_str() here once we have buffers with length
443
444 size_t wcLen;
445 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
446 (
447 m_impl.c_str(),
448 m_impl.length() + 1, // size
449 &wcLen
450 ));
451 if ( !wcLen )
452 return wxCharBuffer("");
453
454 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
455 }
456
457 #else // ANSI
458
459 //Converts this string to a wide character string if unicode
460 //mode is not enabled and wxUSE_WCHAR_T is enabled
461 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
462 {
463 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
464 }
465
466 #endif // Unicode/ANSI
467
468 // shrink to minimal size (releasing extra memory)
469 bool wxString::Shrink()
470 {
471 wxString tmp(begin(), end());
472 swap(tmp);
473 return tmp.length() == length();
474 }
475
476 // deprecated compatibility code:
477 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
478 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
479 {
480 return DoGetWriteBuf(nLen);
481 }
482
483 void wxString::UngetWriteBuf()
484 {
485 DoUngetWriteBuf();
486 }
487
488 void wxString::UngetWriteBuf(size_t nLen)
489 {
490 DoUngetWriteBuf(nLen);
491 }
492 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
493
494
495 // ---------------------------------------------------------------------------
496 // data access
497 // ---------------------------------------------------------------------------
498
499 // all functions are inline in string.h
500
501 // ---------------------------------------------------------------------------
502 // concatenation operators
503 // ---------------------------------------------------------------------------
504
505 /*
506 * concatenation functions come in 5 flavours:
507 * string + string
508 * char + string and string + char
509 * C str + string and string + C str
510 */
511
512 wxString operator+(const wxString& str1, const wxString& str2)
513 {
514 #if !wxUSE_STL_BASED_WXSTRING
515 wxASSERT( str1.IsValid() );
516 wxASSERT( str2.IsValid() );
517 #endif
518
519 wxString s = str1;
520 s += str2;
521
522 return s;
523 }
524
525 wxString operator+(const wxString& str, wxUniChar ch)
526 {
527 #if !wxUSE_STL_BASED_WXSTRING
528 wxASSERT( str.IsValid() );
529 #endif
530
531 wxString s = str;
532 s += ch;
533
534 return s;
535 }
536
537 wxString operator+(wxUniChar ch, const wxString& str)
538 {
539 #if !wxUSE_STL_BASED_WXSTRING
540 wxASSERT( str.IsValid() );
541 #endif
542
543 wxString s = ch;
544 s += str;
545
546 return s;
547 }
548
549 wxString operator+(const wxString& str, const char *psz)
550 {
551 #if !wxUSE_STL_BASED_WXSTRING
552 wxASSERT( str.IsValid() );
553 #endif
554
555 wxString s;
556 if ( !s.Alloc(strlen(psz) + str.length()) ) {
557 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
558 }
559 s += str;
560 s += psz;
561
562 return s;
563 }
564
565 wxString operator+(const wxString& str, const wchar_t *pwz)
566 {
567 #if !wxUSE_STL_BASED_WXSTRING
568 wxASSERT( str.IsValid() );
569 #endif
570
571 wxString s;
572 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
573 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
574 }
575 s += str;
576 s += pwz;
577
578 return s;
579 }
580
581 wxString operator+(const char *psz, const wxString& str)
582 {
583 #if !wxUSE_STL_BASED_WXSTRING
584 wxASSERT( str.IsValid() );
585 #endif
586
587 wxString s;
588 if ( !s.Alloc(strlen(psz) + str.length()) ) {
589 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
590 }
591 s = psz;
592 s += str;
593
594 return s;
595 }
596
597 wxString operator+(const wchar_t *pwz, const wxString& str)
598 {
599 #if !wxUSE_STL_BASED_WXSTRING
600 wxASSERT( str.IsValid() );
601 #endif
602
603 wxString s;
604 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
605 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
606 }
607 s = pwz;
608 s += str;
609
610 return s;
611 }
612
613 // ---------------------------------------------------------------------------
614 // string comparison
615 // ---------------------------------------------------------------------------
616
617 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
618 {
619 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
620 : wxToupper(GetChar(0u)) == wxToupper(c));
621 }
622
623 #ifdef HAVE_STD_STRING_COMPARE
624
625 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
626 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
627 // sort strings in characters code point order by sorting the byte sequence
628 // in byte values order (i.e. what strcmp() and memcmp() do).
629
630 int wxString::compare(const wxString& str) const
631 {
632 return m_impl.compare(str.m_impl);
633 }
634
635 int wxString::compare(size_t nStart, size_t nLen,
636 const wxString& str) const
637 {
638 size_t pos, len;
639 PosLenToImpl(nStart, nLen, &pos, &len);
640 return m_impl.compare(pos, len, str.m_impl);
641 }
642
643 int wxString::compare(size_t nStart, size_t nLen,
644 const wxString& str,
645 size_t nStart2, size_t nLen2) const
646 {
647 size_t pos, len;
648 PosLenToImpl(nStart, nLen, &pos, &len);
649
650 size_t pos2, len2;
651 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
652
653 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
654 }
655
656 int wxString::compare(const char* sz) const
657 {
658 return m_impl.compare(ImplStr(sz));
659 }
660
661 int wxString::compare(const wchar_t* sz) const
662 {
663 return m_impl.compare(ImplStr(sz));
664 }
665
666 int wxString::compare(size_t nStart, size_t nLen,
667 const char* sz, size_t nCount) const
668 {
669 size_t pos, len;
670 PosLenToImpl(nStart, nLen, &pos, &len);
671
672 SubstrBufFromMB str(ImplStr(sz, nCount));
673
674 return m_impl.compare(pos, len, str.data, str.len);
675 }
676
677 int wxString::compare(size_t nStart, size_t nLen,
678 const wchar_t* sz, size_t nCount) const
679 {
680 size_t pos, len;
681 PosLenToImpl(nStart, nLen, &pos, &len);
682
683 SubstrBufFromWC str(ImplStr(sz, nCount));
684
685 return m_impl.compare(pos, len, str.data, str.len);
686 }
687
688 #else // !HAVE_STD_STRING_COMPARE
689
690 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
691 const wxStringCharType* s2, size_t l2)
692 {
693 if( l1 == l2 )
694 return wxStringMemcmp(s1, s2, l1);
695 else if( l1 < l2 )
696 {
697 int ret = wxStringMemcmp(s1, s2, l1);
698 return ret == 0 ? -1 : ret;
699 }
700 else
701 {
702 int ret = wxStringMemcmp(s1, s2, l2);
703 return ret == 0 ? +1 : ret;
704 }
705 }
706
707 int wxString::compare(const wxString& str) const
708 {
709 return ::wxDoCmp(m_impl.data(), m_impl.length(),
710 str.m_impl.data(), str.m_impl.length());
711 }
712
713 int wxString::compare(size_t nStart, size_t nLen,
714 const wxString& str) const
715 {
716 wxASSERT(nStart <= length());
717 size_type strLen = length() - nStart;
718 nLen = strLen < nLen ? strLen : nLen;
719
720 size_t pos, len;
721 PosLenToImpl(nStart, nLen, &pos, &len);
722
723 return ::wxDoCmp(m_impl.data() + pos, len,
724 str.m_impl.data(), str.m_impl.length());
725 }
726
727 int wxString::compare(size_t nStart, size_t nLen,
728 const wxString& str,
729 size_t nStart2, size_t nLen2) const
730 {
731 wxASSERT(nStart <= length());
732 wxASSERT(nStart2 <= str.length());
733 size_type strLen = length() - nStart,
734 strLen2 = str.length() - nStart2;
735 nLen = strLen < nLen ? strLen : nLen;
736 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
737
738 size_t pos, len;
739 PosLenToImpl(nStart, nLen, &pos, &len);
740 size_t pos2, len2;
741 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
742
743 return ::wxDoCmp(m_impl.data() + pos, len,
744 str.m_impl.data() + pos2, len2);
745 }
746
747 int wxString::compare(const char* sz) const
748 {
749 SubstrBufFromMB str(ImplStr(sz, npos));
750 if ( str.len == npos )
751 str.len = wxStringStrlen(str.data);
752 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
753 }
754
755 int wxString::compare(const wchar_t* sz) const
756 {
757 SubstrBufFromWC str(ImplStr(sz, npos));
758 if ( str.len == npos )
759 str.len = wxStringStrlen(str.data);
760 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
761 }
762
763 int wxString::compare(size_t nStart, size_t nLen,
764 const char* sz, size_t nCount) const
765 {
766 wxASSERT(nStart <= length());
767 size_type strLen = length() - nStart;
768 nLen = strLen < nLen ? strLen : nLen;
769
770 size_t pos, len;
771 PosLenToImpl(nStart, nLen, &pos, &len);
772
773 SubstrBufFromMB str(ImplStr(sz, nCount));
774 if ( str.len == npos )
775 str.len = wxStringStrlen(str.data);
776
777 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
778 }
779
780 int wxString::compare(size_t nStart, size_t nLen,
781 const wchar_t* sz, size_t nCount) const
782 {
783 wxASSERT(nStart <= length());
784 size_type strLen = length() - nStart;
785 nLen = strLen < nLen ? strLen : nLen;
786
787 size_t pos, len;
788 PosLenToImpl(nStart, nLen, &pos, &len);
789
790 SubstrBufFromWC str(ImplStr(sz, nCount));
791 if ( str.len == npos )
792 str.len = wxStringStrlen(str.data);
793
794 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
795 }
796
797 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
798
799
800 // ---------------------------------------------------------------------------
801 // find_{first,last}_[not]_of functions
802 // ---------------------------------------------------------------------------
803
804 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
805
806 // NB: All these functions are implemented with the argument being wxChar*,
807 // i.e. widechar string in any Unicode build, even though native string
808 // representation is char* in the UTF-8 build. This is because we couldn't
809 // use memchr() to determine if a character is in a set encoded as UTF-8.
810
811 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
812 {
813 return find_first_of(sz, nStart, wxStrlen(sz));
814 }
815
816 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
817 {
818 return find_first_not_of(sz, nStart, wxStrlen(sz));
819 }
820
821 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
822 {
823 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
824
825 size_t idx = nStart;
826 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
827 {
828 if ( wxTmemchr(sz, *i, n) )
829 return idx;
830 }
831
832 return npos;
833 }
834
835 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
836 {
837 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
838
839 size_t idx = nStart;
840 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
841 {
842 if ( !wxTmemchr(sz, *i, n) )
843 return idx;
844 }
845
846 return npos;
847 }
848
849
850 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
851 {
852 return find_last_of(sz, nStart, wxStrlen(sz));
853 }
854
855 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
856 {
857 return find_last_not_of(sz, nStart, wxStrlen(sz));
858 }
859
860 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
861 {
862 size_t len = length();
863
864 if ( nStart == npos )
865 {
866 nStart = len - 1;
867 }
868 else
869 {
870 wxASSERT_MSG( nStart <= len, _T("invalid index") );
871 }
872
873 size_t idx = nStart;
874 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
875 i != rend(); --idx, ++i )
876 {
877 if ( wxTmemchr(sz, *i, n) )
878 return idx;
879 }
880
881 return npos;
882 }
883
884 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
885 {
886 size_t len = length();
887
888 if ( nStart == npos )
889 {
890 nStart = len - 1;
891 }
892 else
893 {
894 wxASSERT_MSG( nStart <= len, _T("invalid index") );
895 }
896
897 size_t idx = nStart;
898 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
899 i != rend(); --idx, ++i )
900 {
901 if ( !wxTmemchr(sz, *i, n) )
902 return idx;
903 }
904
905 return npos;
906 }
907
908 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
909 {
910 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
911
912 size_t idx = nStart;
913 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
914 {
915 if ( *i != ch )
916 return idx;
917 }
918
919 return npos;
920 }
921
922 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
923 {
924 size_t len = length();
925
926 if ( nStart == npos )
927 {
928 nStart = len - 1;
929 }
930 else
931 {
932 wxASSERT_MSG( nStart <= len, _T("invalid index") );
933 }
934
935 size_t idx = nStart;
936 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
937 i != rend(); --idx, ++i )
938 {
939 if ( *i != ch )
940 return idx;
941 }
942
943 return npos;
944 }
945
946 // the functions above were implemented for wchar_t* arguments in Unicode
947 // build and char* in ANSI build; below are implementations for the other
948 // version:
949 #if wxUSE_UNICODE
950 #define wxOtherCharType char
951 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
952 #else
953 #define wxOtherCharType wchar_t
954 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
955 #endif
956
957 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
958 { return find_first_of(STRCONV(sz), nStart); }
959
960 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
961 size_t n) const
962 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
963 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
964 { return find_last_of(STRCONV(sz), nStart); }
965 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
966 size_t n) const
967 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
968 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
969 { return find_first_not_of(STRCONV(sz), nStart); }
970 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
971 size_t n) const
972 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
973 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
974 { return find_last_not_of(STRCONV(sz), nStart); }
975 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
976 size_t n) const
977 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
978
979 #undef wxOtherCharType
980 #undef STRCONV
981
982 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
983
984 // ===========================================================================
985 // other common string functions
986 // ===========================================================================
987
988 int wxString::CmpNoCase(const wxString& s) const
989 {
990 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
991
992 const_iterator i1 = begin();
993 const_iterator end1 = end();
994 const_iterator i2 = s.begin();
995 const_iterator end2 = s.end();
996
997 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
998 {
999 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1000 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1001 if ( lower1 != lower2 )
1002 return lower1 < lower2 ? -1 : 1;
1003 }
1004
1005 size_t len1 = length();
1006 size_t len2 = s.length();
1007
1008 if ( len1 < len2 )
1009 return -1;
1010 else if ( len1 > len2 )
1011 return 1;
1012 return 0;
1013 }
1014
1015
1016 #if wxUSE_UNICODE
1017
1018 #ifdef __MWERKS__
1019 #ifndef __SCHAR_MAX__
1020 #define __SCHAR_MAX__ 127
1021 #endif
1022 #endif
1023
1024 wxString wxString::FromAscii(const char *ascii, size_t len)
1025 {
1026 if (!ascii || len == 0)
1027 return wxEmptyString;
1028
1029 wxString res;
1030
1031 {
1032 wxStringInternalBuffer buf(res, len);
1033 wxStringCharType *dest = buf;
1034
1035 for ( ; len > 0; --len )
1036 {
1037 unsigned char c = (unsigned char)*ascii++;
1038 wxASSERT_MSG( c < 0x80,
1039 _T("Non-ASCII value passed to FromAscii().") );
1040
1041 *dest++ = (wchar_t)c;
1042 }
1043 }
1044
1045 return res;
1046 }
1047
1048 wxString wxString::FromAscii(const char *ascii)
1049 {
1050 return FromAscii(ascii, wxStrlen(ascii));
1051 }
1052
1053 wxString wxString::FromAscii(char ascii)
1054 {
1055 // What do we do with '\0' ?
1056
1057 unsigned char c = (unsigned char)ascii;
1058
1059 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1060
1061 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1062 return wxString(wxUniChar((wchar_t)c));
1063 }
1064
1065 const wxCharBuffer wxString::ToAscii() const
1066 {
1067 // this will allocate enough space for the terminating NUL too
1068 wxCharBuffer buffer(length());
1069 char *dest = buffer.data();
1070
1071 for ( const_iterator i = begin(); i != end(); ++i )
1072 {
1073 wxUniChar c(*i);
1074 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1075 *dest++ = c.IsAscii() ? (char)c : '_';
1076
1077 // the output string can't have embedded NULs anyhow, so we can safely
1078 // stop at first of them even if we do have any
1079 if ( !c )
1080 break;
1081 }
1082
1083 return buffer;
1084 }
1085
1086 #endif // wxUSE_UNICODE
1087
1088 // extract string of length nCount starting at nFirst
1089 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1090 {
1091 size_t nLen = length();
1092
1093 // default value of nCount is npos and means "till the end"
1094 if ( nCount == npos )
1095 {
1096 nCount = nLen - nFirst;
1097 }
1098
1099 // out-of-bounds requests return sensible things
1100 if ( nFirst + nCount > nLen )
1101 {
1102 nCount = nLen - nFirst;
1103 }
1104
1105 if ( nFirst > nLen )
1106 {
1107 // AllocCopy() will return empty string
1108 return wxEmptyString;
1109 }
1110
1111 wxString dest(*this, nFirst, nCount);
1112 if ( dest.length() != nCount )
1113 {
1114 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1115 }
1116
1117 return dest;
1118 }
1119
1120 // check that the string starts with prefix and return the rest of the string
1121 // in the provided pointer if it is not NULL, otherwise return false
1122 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1123 {
1124 if ( compare(0, prefix.length(), prefix) != 0 )
1125 return false;
1126
1127 if ( rest )
1128 {
1129 // put the rest of the string into provided pointer
1130 rest->assign(*this, prefix.length(), npos);
1131 }
1132
1133 return true;
1134 }
1135
1136
1137 // check that the string ends with suffix and return the rest of it in the
1138 // provided pointer if it is not NULL, otherwise return false
1139 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1140 {
1141 int start = length() - suffix.length();
1142
1143 if ( start < 0 || compare(start, npos, suffix) != 0 )
1144 return false;
1145
1146 if ( rest )
1147 {
1148 // put the rest of the string into provided pointer
1149 rest->assign(*this, 0, start);
1150 }
1151
1152 return true;
1153 }
1154
1155
1156 // extract nCount last (rightmost) characters
1157 wxString wxString::Right(size_t nCount) const
1158 {
1159 if ( nCount > length() )
1160 nCount = length();
1161
1162 wxString dest(*this, length() - nCount, nCount);
1163 if ( dest.length() != nCount ) {
1164 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1165 }
1166 return dest;
1167 }
1168
1169 // get all characters after the last occurence of ch
1170 // (returns the whole string if ch not found)
1171 wxString wxString::AfterLast(wxUniChar ch) const
1172 {
1173 wxString str;
1174 int iPos = Find(ch, true);
1175 if ( iPos == wxNOT_FOUND )
1176 str = *this;
1177 else
1178 str = wx_str() + iPos + 1;
1179
1180 return str;
1181 }
1182
1183 // extract nCount first (leftmost) characters
1184 wxString wxString::Left(size_t nCount) const
1185 {
1186 if ( nCount > length() )
1187 nCount = length();
1188
1189 wxString dest(*this, 0, nCount);
1190 if ( dest.length() != nCount ) {
1191 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1192 }
1193 return dest;
1194 }
1195
1196 // get all characters before the first occurence of ch
1197 // (returns the whole string if ch not found)
1198 wxString wxString::BeforeFirst(wxUniChar ch) const
1199 {
1200 int iPos = Find(ch);
1201 if ( iPos == wxNOT_FOUND ) iPos = length();
1202 return wxString(*this, 0, iPos);
1203 }
1204
1205 /// get all characters before the last occurence of ch
1206 /// (returns empty string if ch not found)
1207 wxString wxString::BeforeLast(wxUniChar ch) const
1208 {
1209 wxString str;
1210 int iPos = Find(ch, true);
1211 if ( iPos != wxNOT_FOUND && iPos != 0 )
1212 str = wxString(c_str(), iPos);
1213
1214 return str;
1215 }
1216
1217 /// get all characters after the first occurence of ch
1218 /// (returns empty string if ch not found)
1219 wxString wxString::AfterFirst(wxUniChar ch) const
1220 {
1221 wxString str;
1222 int iPos = Find(ch);
1223 if ( iPos != wxNOT_FOUND )
1224 str = wx_str() + iPos + 1;
1225
1226 return str;
1227 }
1228
1229 // replace first (or all) occurences of some substring with another one
1230 size_t wxString::Replace(const wxString& strOld,
1231 const wxString& strNew, bool bReplaceAll)
1232 {
1233 // if we tried to replace an empty string we'd enter an infinite loop below
1234 wxCHECK_MSG( !strOld.empty(), 0,
1235 _T("wxString::Replace(): invalid parameter") );
1236
1237 size_t uiCount = 0; // count of replacements made
1238
1239 size_t uiOldLen = strOld.length();
1240 size_t uiNewLen = strNew.length();
1241
1242 for ( size_t dwPos = 0; dwPos < length(); )
1243 {
1244 dwPos = find(strOld, dwPos);
1245 if ( dwPos == npos )
1246 break;
1247
1248 // replace this occurance of the old string with the new one
1249 replace(dwPos, uiOldLen, strNew, uiNewLen);
1250
1251 // move up pos past the string that was replaced
1252 dwPos += uiNewLen;
1253
1254 // increase replace count
1255 ++uiCount;
1256
1257 // stop after the first one?
1258 if ( !bReplaceAll )
1259 break;
1260 }
1261
1262 return uiCount;
1263 }
1264
1265 bool wxString::IsAscii() const
1266 {
1267 for ( const_iterator i = begin(); i != end(); ++i )
1268 {
1269 if ( !(*i).IsAscii() )
1270 return false;
1271 }
1272
1273 return true;
1274 }
1275
1276 bool wxString::IsWord() const
1277 {
1278 for ( const_iterator i = begin(); i != end(); ++i )
1279 {
1280 if ( !wxIsalpha(*i) )
1281 return false;
1282 }
1283
1284 return true;
1285 }
1286
1287 bool wxString::IsNumber() const
1288 {
1289 if ( empty() )
1290 return true;
1291
1292 const_iterator i = begin();
1293
1294 if ( *i == _T('-') || *i == _T('+') )
1295 ++i;
1296
1297 for ( ; i != end(); ++i )
1298 {
1299 if ( !wxIsdigit(*i) )
1300 return false;
1301 }
1302
1303 return true;
1304 }
1305
1306 wxString wxString::Strip(stripType w) const
1307 {
1308 wxString s = *this;
1309 if ( w & leading ) s.Trim(false);
1310 if ( w & trailing ) s.Trim(true);
1311 return s;
1312 }
1313
1314 // ---------------------------------------------------------------------------
1315 // case conversion
1316 // ---------------------------------------------------------------------------
1317
1318 wxString& wxString::MakeUpper()
1319 {
1320 for ( iterator it = begin(), en = end(); it != en; ++it )
1321 *it = (wxChar)wxToupper(*it);
1322
1323 return *this;
1324 }
1325
1326 wxString& wxString::MakeLower()
1327 {
1328 for ( iterator it = begin(), en = end(); it != en; ++it )
1329 *it = (wxChar)wxTolower(*it);
1330
1331 return *this;
1332 }
1333
1334 // ---------------------------------------------------------------------------
1335 // trimming and padding
1336 // ---------------------------------------------------------------------------
1337
1338 // some compilers (VC++ 6.0 not to name them) return true for a call to
1339 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1340 // to live with this by checking that the character is a 7 bit one - even if
1341 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1342 // space-like symbols somewhere except in the first 128 chars), it is arguably
1343 // still better than trimming away accented letters
1344 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1345
1346 // trims spaces (in the sense of isspace) from left or right side
1347 wxString& wxString::Trim(bool bFromRight)
1348 {
1349 // first check if we're going to modify the string at all
1350 if ( !empty() &&
1351 (
1352 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1353 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1354 )
1355 )
1356 {
1357 if ( bFromRight )
1358 {
1359 // find last non-space character
1360 reverse_iterator psz = rbegin();
1361 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1362 ++psz;
1363
1364 // truncate at trailing space start
1365 erase(psz.base(), end());
1366 }
1367 else
1368 {
1369 // find first non-space character
1370 iterator psz = begin();
1371 while ( (psz != end()) && wxSafeIsspace(*psz) )
1372 ++psz;
1373
1374 // fix up data and length
1375 erase(begin(), psz);
1376 }
1377 }
1378
1379 return *this;
1380 }
1381
1382 // adds nCount characters chPad to the string from either side
1383 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1384 {
1385 wxString s(chPad, nCount);
1386
1387 if ( bFromRight )
1388 *this += s;
1389 else
1390 {
1391 s += *this;
1392 swap(s);
1393 }
1394
1395 return *this;
1396 }
1397
1398 // truncate the string
1399 wxString& wxString::Truncate(size_t uiLen)
1400 {
1401 if ( uiLen < length() )
1402 {
1403 erase(begin() + uiLen, end());
1404 }
1405 //else: nothing to do, string is already short enough
1406
1407 return *this;
1408 }
1409
1410 // ---------------------------------------------------------------------------
1411 // finding (return wxNOT_FOUND if not found and index otherwise)
1412 // ---------------------------------------------------------------------------
1413
1414 // find a character
1415 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1416 {
1417 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1418
1419 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1420 }
1421
1422 // ----------------------------------------------------------------------------
1423 // conversion to numbers
1424 // ----------------------------------------------------------------------------
1425
1426 // The implementation of all the functions below is exactly the same so factor
1427 // it out. Note that number extraction works correctly on UTF-8 strings, so
1428 // we can use wxStringCharType and wx_str() for maximum efficiency.
1429
1430 #ifndef __WXWINCE__
1431 #define DO_IF_NOT_WINCE(x) x
1432 #else
1433 #define DO_IF_NOT_WINCE(x)
1434 #endif
1435
1436 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1437 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1438 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1439 \
1440 DO_IF_NOT_WINCE( errno = 0; ) \
1441 \
1442 const wxStringCharType *start = wx_str(); \
1443 wxStringCharType *end; \
1444 T val = func(start, &end, base); \
1445 \
1446 /* return true only if scan was stopped by the terminating NUL and */ \
1447 /* if the string was not empty to start with and no under/overflow */ \
1448 /* occurred: */ \
1449 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1450 return false; \
1451 *out = val; \
1452 return true
1453
1454 bool wxString::ToLong(long *pVal, int base) const
1455 {
1456 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
1457 }
1458
1459 bool wxString::ToULong(unsigned long *pVal, int base) const
1460 {
1461 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
1462 }
1463
1464 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1465 {
1466 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
1467 }
1468
1469 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1470 {
1471 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
1472 }
1473
1474 bool wxString::ToDouble(double *pVal) const
1475 {
1476 wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
1477
1478 DO_IF_NOT_WINCE( errno = 0; )
1479
1480 const wxChar *start = c_str();
1481 wxChar *end;
1482 double val = wxStrtod(start, &end);
1483
1484 // return true only if scan was stopped by the terminating NUL and if the
1485 // string was not empty to start with and no under/overflow occurred
1486 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1487 return false;
1488
1489 *pVal = val;
1490
1491 return true;
1492 }
1493
1494 // ---------------------------------------------------------------------------
1495 // formatted output
1496 // ---------------------------------------------------------------------------
1497
1498 #if !wxUSE_UTF8_LOCALE_ONLY
1499 /* static */
1500 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1501 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1502 #else
1503 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1504 #endif
1505 {
1506 va_list argptr;
1507 va_start(argptr, format);
1508
1509 wxString s;
1510 s.PrintfV(format, argptr);
1511
1512 va_end(argptr);
1513
1514 return s;
1515 }
1516 #endif // !wxUSE_UTF8_LOCALE_ONLY
1517
1518 #if wxUSE_UNICODE_UTF8
1519 /* static */
1520 wxString wxString::DoFormatUtf8(const char *format, ...)
1521 {
1522 va_list argptr;
1523 va_start(argptr, format);
1524
1525 wxString s;
1526 s.PrintfV(format, argptr);
1527
1528 va_end(argptr);
1529
1530 return s;
1531 }
1532 #endif // wxUSE_UNICODE_UTF8
1533
1534 /* static */
1535 wxString wxString::FormatV(const wxString& format, va_list argptr)
1536 {
1537 wxString s;
1538 s.PrintfV(format, argptr);
1539 return s;
1540 }
1541
1542 #if !wxUSE_UTF8_LOCALE_ONLY
1543 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1544 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1545 #else
1546 int wxString::DoPrintfWchar(const wxChar *format, ...)
1547 #endif
1548 {
1549 va_list argptr;
1550 va_start(argptr, format);
1551
1552 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1553 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1554 // because it's the only cast that works safely for downcasting when
1555 // multiple inheritance is used:
1556 wxString *str = static_cast<wxString*>(this);
1557 #else
1558 wxString *str = this;
1559 #endif
1560
1561 int iLen = str->PrintfV(format, argptr);
1562
1563 va_end(argptr);
1564
1565 return iLen;
1566 }
1567 #endif // !wxUSE_UTF8_LOCALE_ONLY
1568
1569 #if wxUSE_UNICODE_UTF8
1570 int wxString::DoPrintfUtf8(const char *format, ...)
1571 {
1572 va_list argptr;
1573 va_start(argptr, format);
1574
1575 int iLen = PrintfV(format, argptr);
1576
1577 va_end(argptr);
1578
1579 return iLen;
1580 }
1581 #endif // wxUSE_UNICODE_UTF8
1582
1583 /*
1584 Uses wxVsnprintf and places the result into the this string.
1585
1586 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1587 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1588 the ISO C99 (and thus SUSv3) standard the return value for the case of
1589 an undersized buffer is inconsistent. For conforming vsnprintf
1590 implementations the function must return the number of characters that
1591 would have been printed had the buffer been large enough. For conforming
1592 vswprintf implementations the function must return a negative number
1593 and set errno.
1594
1595 What vswprintf sets errno to is undefined but Darwin seems to set it to
1596 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1597 those are defined in the standard and backed up by several conformance
1598 statements. Note that ENOMEM mentioned in the manual page does not
1599 apply to swprintf, only wprintf and fwprintf.
1600
1601 Official manual page:
1602 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1603
1604 Some conformance statements (AIX, Solaris):
1605 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1606 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1607
1608 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1609 EILSEQ and EINVAL are specifically defined to mean the error is other than
1610 an undersized buffer and no other errno are defined we treat those two
1611 as meaning hard errors and everything else gets the old behavior which
1612 is to keep looping and increasing buffer size until the function succeeds.
1613
1614 In practice it's impossible to determine before compilation which behavior
1615 may be used. The vswprintf function may have vsnprintf-like behavior or
1616 vice-versa. Behavior detected on one release can theoretically change
1617 with an updated release. Not to mention that configure testing for it
1618 would require the test to be run on the host system, not the build system
1619 which makes cross compilation difficult. Therefore, we make no assumptions
1620 about behavior and try our best to handle every known case, including the
1621 case where wxVsnprintf returns a negative number and fails to set errno.
1622
1623 There is yet one more non-standard implementation and that is our own.
1624 Fortunately, that can be detected at compile-time.
1625
1626 On top of all that, ISO C99 explicitly defines snprintf to write a null
1627 character to the last position of the specified buffer. That would be at
1628 at the given buffer size minus 1. It is supposed to do this even if it
1629 turns out that the buffer is sized too small.
1630
1631 Darwin (tested on 10.5) follows the C99 behavior exactly.
1632
1633 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1634 errno even when it fails. However, it only seems to ever fail due
1635 to an undersized buffer.
1636 */
1637 #if wxUSE_UNICODE_UTF8
1638 template<typename BufferType>
1639 #else
1640 // we only need one version in non-UTF8 builds and at least two Windows
1641 // compilers have problems with this function template, so use just one
1642 // normal function here
1643 #endif
1644 static int DoStringPrintfV(wxString& str,
1645 const wxString& format, va_list argptr)
1646 {
1647 int size = 1024;
1648
1649 for ( ;; )
1650 {
1651 #if wxUSE_UNICODE_UTF8
1652 BufferType tmp(str, size + 1);
1653 typename BufferType::CharType *buf = tmp;
1654 #else
1655 wxStringBuffer tmp(str, size + 1);
1656 wxChar *buf = tmp;
1657 #endif
1658
1659 if ( !buf )
1660 {
1661 // out of memory
1662
1663 // in UTF-8 build, leaving uninitialized junk in the buffer
1664 // could result in invalid non-empty UTF-8 string, so just
1665 // reset the string to empty on failure:
1666 buf[0] = '\0';
1667 return -1;
1668 }
1669
1670 // wxVsnprintf() may modify the original arg pointer, so pass it
1671 // only a copy
1672 va_list argptrcopy;
1673 wxVaCopy(argptrcopy, argptr);
1674
1675 #ifndef __WXWINCE__
1676 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1677 errno = 0;
1678 #endif
1679 int len = wxVsnprintf(buf, size, format, argptrcopy);
1680 va_end(argptrcopy);
1681
1682 // some implementations of vsnprintf() don't NUL terminate
1683 // the string if there is not enough space for it so
1684 // always do it manually
1685 // FIXME: This really seems to be the wrong and would be an off-by-one
1686 // bug except the code above allocates an extra character.
1687 buf[size] = _T('\0');
1688
1689 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1690 // total number of characters which would have been written if the
1691 // buffer were large enough (newer standards such as Unix98)
1692 if ( len < 0 )
1693 {
1694 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1695 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1696 // is true if *both* of them use our own implementation,
1697 // otherwise we can't be sure
1698 #if wxUSE_WXVSNPRINTF
1699 // we know that our own implementation of wxVsnprintf() returns -1
1700 // only for a format error - thus there's something wrong with
1701 // the user's format string
1702 buf[0] = '\0';
1703 return -1;
1704 #else // possibly using system version
1705 // assume it only returns error if there is not enough space, but
1706 // as we don't know how much we need, double the current size of
1707 // the buffer
1708 #ifndef __WXWINCE__
1709 if( (errno == EILSEQ) || (errno == EINVAL) )
1710 // If errno was set to one of the two well-known hard errors
1711 // then fail immediately to avoid an infinite loop.
1712 return -1;
1713 else
1714 #endif // __WXWINCE__
1715 // still not enough, as we don't know how much we need, double the
1716 // current size of the buffer
1717 size *= 2;
1718 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1719 }
1720 else if ( len >= size )
1721 {
1722 #if wxUSE_WXVSNPRINTF
1723 // we know that our own implementation of wxVsnprintf() returns
1724 // size+1 when there's not enough space but that's not the size
1725 // of the required buffer!
1726 size *= 2; // so we just double the current size of the buffer
1727 #else
1728 // some vsnprintf() implementations NUL-terminate the buffer and
1729 // some don't in len == size case, to be safe always add 1
1730 // FIXME: I don't quite understand this comment. The vsnprintf
1731 // function is specifically defined to return the number of
1732 // characters printed not including the null terminator.
1733 // So OF COURSE you need to add 1 to get the right buffer size.
1734 // The following line is definitely correct, no question.
1735 size = len + 1;
1736 #endif
1737 }
1738 else // ok, there was enough space
1739 {
1740 break;
1741 }
1742 }
1743
1744 // we could have overshot
1745 str.Shrink();
1746
1747 return str.length();
1748 }
1749
1750 int wxString::PrintfV(const wxString& format, va_list argptr)
1751 {
1752 #if wxUSE_UNICODE_UTF8
1753 #if wxUSE_STL_BASED_WXSTRING
1754 typedef wxStringTypeBuffer<char> Utf8Buffer;
1755 #else
1756 typedef wxStringInternalBuffer Utf8Buffer;
1757 #endif
1758 #endif
1759
1760 #if wxUSE_UTF8_LOCALE_ONLY
1761 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1762 #else
1763 #if wxUSE_UNICODE_UTF8
1764 if ( wxLocaleIsUtf8 )
1765 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1766 else
1767 // wxChar* version
1768 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1769 #else
1770 return DoStringPrintfV(*this, format, argptr);
1771 #endif // UTF8/WCHAR
1772 #endif
1773 }
1774
1775 // ----------------------------------------------------------------------------
1776 // misc other operations
1777 // ----------------------------------------------------------------------------
1778
1779 // returns true if the string matches the pattern which may contain '*' and
1780 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1781 // of them)
1782 bool wxString::Matches(const wxString& mask) const
1783 {
1784 // I disable this code as it doesn't seem to be faster (in fact, it seems
1785 // to be much slower) than the old, hand-written code below and using it
1786 // here requires always linking with libregex even if the user code doesn't
1787 // use it
1788 #if 0 // wxUSE_REGEX
1789 // first translate the shell-like mask into a regex
1790 wxString pattern;
1791 pattern.reserve(wxStrlen(pszMask));
1792
1793 pattern += _T('^');
1794 while ( *pszMask )
1795 {
1796 switch ( *pszMask )
1797 {
1798 case _T('?'):
1799 pattern += _T('.');
1800 break;
1801
1802 case _T('*'):
1803 pattern += _T(".*");
1804 break;
1805
1806 case _T('^'):
1807 case _T('.'):
1808 case _T('$'):
1809 case _T('('):
1810 case _T(')'):
1811 case _T('|'):
1812 case _T('+'):
1813 case _T('\\'):
1814 // these characters are special in a RE, quote them
1815 // (however note that we don't quote '[' and ']' to allow
1816 // using them for Unix shell like matching)
1817 pattern += _T('\\');
1818 // fall through
1819
1820 default:
1821 pattern += *pszMask;
1822 }
1823
1824 pszMask++;
1825 }
1826 pattern += _T('$');
1827
1828 // and now use it
1829 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1830 #else // !wxUSE_REGEX
1831 // TODO: this is, of course, awfully inefficient...
1832
1833 // FIXME-UTF8: implement using iterators, remove #if
1834 #if wxUSE_UNICODE_UTF8
1835 wxWCharBuffer maskBuf = mask.wc_str();
1836 wxWCharBuffer txtBuf = wc_str();
1837 const wxChar *pszMask = maskBuf.data();
1838 const wxChar *pszTxt = txtBuf.data();
1839 #else
1840 const wxChar *pszMask = mask.wx_str();
1841 // the char currently being checked
1842 const wxChar *pszTxt = wx_str();
1843 #endif
1844
1845 // the last location where '*' matched
1846 const wxChar *pszLastStarInText = NULL;
1847 const wxChar *pszLastStarInMask = NULL;
1848
1849 match:
1850 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1851 switch ( *pszMask ) {
1852 case wxT('?'):
1853 if ( *pszTxt == wxT('\0') )
1854 return false;
1855
1856 // pszTxt and pszMask will be incremented in the loop statement
1857
1858 break;
1859
1860 case wxT('*'):
1861 {
1862 // remember where we started to be able to backtrack later
1863 pszLastStarInText = pszTxt;
1864 pszLastStarInMask = pszMask;
1865
1866 // ignore special chars immediately following this one
1867 // (should this be an error?)
1868 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1869 pszMask++;
1870
1871 // if there is nothing more, match
1872 if ( *pszMask == wxT('\0') )
1873 return true;
1874
1875 // are there any other metacharacters in the mask?
1876 size_t uiLenMask;
1877 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
1878
1879 if ( pEndMask != NULL ) {
1880 // we have to match the string between two metachars
1881 uiLenMask = pEndMask - pszMask;
1882 }
1883 else {
1884 // we have to match the remainder of the string
1885 uiLenMask = wxStrlen(pszMask);
1886 }
1887
1888 wxString strToMatch(pszMask, uiLenMask);
1889 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
1890 if ( pMatch == NULL )
1891 return false;
1892
1893 // -1 to compensate "++" in the loop
1894 pszTxt = pMatch + uiLenMask - 1;
1895 pszMask += uiLenMask - 1;
1896 }
1897 break;
1898
1899 default:
1900 if ( *pszMask != *pszTxt )
1901 return false;
1902 break;
1903 }
1904 }
1905
1906 // match only if nothing left
1907 if ( *pszTxt == wxT('\0') )
1908 return true;
1909
1910 // if we failed to match, backtrack if we can
1911 if ( pszLastStarInText ) {
1912 pszTxt = pszLastStarInText + 1;
1913 pszMask = pszLastStarInMask;
1914
1915 pszLastStarInText = NULL;
1916
1917 // don't bother resetting pszLastStarInMask, it's unnecessary
1918
1919 goto match;
1920 }
1921
1922 return false;
1923 #endif // wxUSE_REGEX/!wxUSE_REGEX
1924 }
1925
1926 // Count the number of chars
1927 int wxString::Freq(wxUniChar ch) const
1928 {
1929 int count = 0;
1930 for ( const_iterator i = begin(); i != end(); ++i )
1931 {
1932 if ( *i == ch )
1933 count ++;
1934 }
1935 return count;
1936 }
1937
1938 // convert to upper case, return the copy of the string
1939 wxString wxString::Upper() const
1940 { wxString s(*this); return s.MakeUpper(); }
1941
1942 // convert to lower case, return the copy of the string
1943 wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
1944
1945 // ----------------------------------------------------------------------------
1946 // wxUTF8StringBuffer
1947 // ----------------------------------------------------------------------------
1948
1949 #if wxUSE_UNICODE_WCHAR
1950 wxUTF8StringBuffer::~wxUTF8StringBuffer()
1951 {
1952 wxMBConvStrictUTF8 conv;
1953 size_t wlen = conv.ToWChar(NULL, 0, m_buf);
1954 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1955
1956 wxStringInternalBuffer wbuf(m_str, wlen);
1957 conv.ToWChar(wbuf, wlen, m_buf);
1958 }
1959
1960 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
1961 {
1962 wxCHECK_RET(m_lenSet, "length not set");
1963
1964 wxMBConvStrictUTF8 conv;
1965 size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
1966 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
1967
1968 wxStringInternalBufferLength wbuf(m_str, wlen);
1969 conv.ToWChar(wbuf, wlen, m_buf, m_len);
1970 wbuf.SetLength(wlen);
1971 }
1972 #endif // wxUSE_UNICODE_WCHAR