]> git.saurik.com Git - wxWidgets.git/blob - src/common/string.cpp
access TLS cache variable directly and not via a helper function when using compiler...
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
16
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #endif
28
29 #include <ctype.h>
30
31 #ifndef __WXWINCE__
32 #include <errno.h>
33 #endif
34
35 #include <string.h>
36 #include <stdlib.h>
37
38 #include "wx/hashmap.h"
39
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
46 #else
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
51 #endif
52
53
54 // ---------------------------------------------------------------------------
55 // static class variables definition
56 // ---------------------------------------------------------------------------
57
58 //According to STL _must_ be a -1 size_t
59 const size_t wxString::npos = (size_t) -1;
60
61 #if wxUSE_STRING_POS_CACHE
62
63 #ifdef wxHAS_COMPILER_TLS
64
65 wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
66
67 #else // !wxHAS_COMPILER_TLS
68
69 struct wxStrCacheInitializer
70 {
71 wxStrCacheInitializer()
72 {
73 // calling this function triggers s_cache initialization in it, and
74 // from now on it becomes safe to call from multiple threads
75 wxString::GetCache();
76 }
77 };
78
79 static wxStrCacheInitializer gs_stringCacheInit;
80
81 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
82
83 // gdb seems to be unable to display thread-local variables correctly, at least
84 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
85 #ifdef __WXDEBUG__
86
87 struct wxStrCacheDumper
88 {
89 static void ShowAll()
90 {
91 puts("*** wxString cache dump:");
92 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
93 {
94 const wxString::Cache::Element&
95 c = wxString::GetCacheBegin()[n];
96
97 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
98 n,
99 n == wxString::LastUsedCacheElement() ? " [*]" : "",
100 c.str,
101 (unsigned long)c.pos,
102 (unsigned long)c.impl,
103 (long)c.len);
104 }
105 }
106 };
107
108 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
109
110 #endif // __WXDEBUG__
111
112 #ifdef wxPROFILE_STRING_CACHE
113
114 wxString::CacheStats wxString::ms_cacheStats;
115
116 struct wxStrCacheStatsDumper
117 {
118 ~wxStrCacheStatsDumper()
119 {
120 const wxString::CacheStats& stats = wxString::ms_cacheStats;
121
122 if ( stats.postot )
123 {
124 puts("*** wxString cache statistics:");
125 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
126 stats.postot);
127 printf("\tHits %u (of which %u not used) or %.2f%%\n",
128 stats.poshits,
129 stats.mishits,
130 100.*float(stats.poshits - stats.mishits)/stats.postot);
131 printf("\tAverage position requested: %.2f\n",
132 float(stats.sumpos) / stats.postot);
133 printf("\tAverage offset after cached hint: %.2f\n",
134 float(stats.sumofs) / stats.postot);
135 }
136
137 if ( stats.lentot )
138 {
139 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
140 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
141 }
142 }
143 };
144
145 static wxStrCacheStatsDumper s_showCacheStats;
146
147 #endif // wxPROFILE_STRING_CACHE
148
149 #endif // wxUSE_STRING_POS_CACHE
150
151 // ----------------------------------------------------------------------------
152 // global functions
153 // ----------------------------------------------------------------------------
154
155 #if wxUSE_STD_IOSTREAM
156
157 #include <iostream>
158
159 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
160 {
161 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
162 return os << (const char *)str.AsCharBuf();
163 #else
164 return os << str.AsInternal();
165 #endif
166 }
167
168 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
169 {
170 return os << str.c_str();
171 }
172
173 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
174 {
175 return os << str.data();
176 }
177
178 #ifndef __BORLANDC__
179 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
180 {
181 return os << str.data();
182 }
183 #endif
184
185 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
186
187 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
188 {
189 return wos << str.wc_str();
190 }
191
192 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
193 {
194 return wos << str.AsWChar();
195 }
196
197 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
198 {
199 return wos << str.data();
200 }
201
202 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
203
204 #endif // wxUSE_STD_IOSTREAM
205
206 // ===========================================================================
207 // wxString class core
208 // ===========================================================================
209
210 #if wxUSE_UNICODE_UTF8
211
212 void wxString::PosLenToImpl(size_t pos, size_t len,
213 size_t *implPos, size_t *implLen) const
214 {
215 if ( pos == npos )
216 {
217 *implPos = npos;
218 }
219 else // have valid start position
220 {
221 const const_iterator b = GetIterForNthChar(pos);
222 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
223 if ( len == npos )
224 {
225 *implLen = npos;
226 }
227 else // have valid length too
228 {
229 // we need to handle the case of length specifying a substring
230 // going beyond the end of the string, just as std::string does
231 const const_iterator e(end());
232 const_iterator i(b);
233 while ( len && i <= e )
234 {
235 ++i;
236 --len;
237 }
238
239 *implLen = i.impl() - b.impl();
240 }
241 }
242 }
243
244 #endif // wxUSE_UNICODE_UTF8
245
246 // ----------------------------------------------------------------------------
247 // wxCStrData converted strings caching
248 // ----------------------------------------------------------------------------
249
250 // FIXME-UTF8: temporarily disabled because it doesn't work with global
251 // string objects; re-enable after fixing this bug and benchmarking
252 // performance to see if using a hash is a good idea at all
253 #if 0
254
255 // For backward compatibility reasons, it must be possible to assign the value
256 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
257 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
258 // because the memory would be freed immediately, but it has to be valid as long
259 // as the string is not modified, so that code like this still works:
260 //
261 // const wxChar *s = str.c_str();
262 // while ( s ) { ... }
263
264 // FIXME-UTF8: not thread safe!
265 // FIXME-UTF8: we currently clear the cached conversion only when the string is
266 // destroyed, but we should do it when the string is modified, to
267 // keep memory usage down
268 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
269 // invalidated the cache on every change, we could keep the previous
270 // conversion
271 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
272 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
273
274 template<typename T>
275 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
276 {
277 typename T::iterator i = hash.find(wxConstCast(s, wxString));
278 if ( i != hash.end() )
279 {
280 free(i->second);
281 hash.erase(i);
282 }
283 }
284
285 #if wxUSE_UNICODE
286 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
287 // so we have to use wxString* here and const-cast when used
288 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
289 wxStringCharConversionCache);
290 static wxStringCharConversionCache gs_stringsCharCache;
291
292 const char* wxCStrData::AsChar() const
293 {
294 // remove previously cache value, if any (see FIXMEs above):
295 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
296
297 // convert the string and keep it:
298 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
299 m_str->mb_str().release();
300
301 return s + m_offset;
302 }
303 #endif // wxUSE_UNICODE
304
305 #if !wxUSE_UNICODE_WCHAR
306 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
307 wxStringWCharConversionCache);
308 static wxStringWCharConversionCache gs_stringsWCharCache;
309
310 const wchar_t* wxCStrData::AsWChar() const
311 {
312 // remove previously cache value, if any (see FIXMEs above):
313 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
314
315 // convert the string and keep it:
316 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
317 m_str->wc_str().release();
318
319 return s + m_offset;
320 }
321 #endif // !wxUSE_UNICODE_WCHAR
322
323 wxString::~wxString()
324 {
325 #if wxUSE_UNICODE
326 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
327 DeleteStringFromConversionCache(gs_stringsCharCache, this);
328 #endif
329 #if !wxUSE_UNICODE_WCHAR
330 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
331 #endif
332 }
333 #endif
334
335 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
336 const char* wxCStrData::AsChar() const
337 {
338 #if wxUSE_UNICODE_UTF8
339 if ( wxLocaleIsUtf8 )
340 return AsInternal();
341 #endif
342 // under non-UTF8 locales, we have to convert the internal UTF-8
343 // representation using wxConvLibc and cache the result
344
345 wxString *str = wxConstCast(m_str, wxString);
346
347 // convert the string:
348 //
349 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
350 // have it) but it's unfortunately not obvious to implement
351 // because we don't know how big buffer do we need for the
352 // given string length (in case of multibyte encodings, e.g.
353 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
354 //
355 // One idea would be to store more than just m_convertedToChar
356 // in wxString: then we could record the length of the string
357 // which was converted the last time and try to reuse the same
358 // buffer if the current length is not greater than it (this
359 // could still fail because string could have been modified in
360 // place but it would work most of the time, so we'd do it and
361 // only allocate the new buffer if in-place conversion returned
362 // an error). We could also store a bit saying if the string
363 // was modified since the last conversion (and update it in all
364 // operation modifying the string, of course) to avoid unneeded
365 // consequential conversions. But both of these ideas require
366 // adding more fields to wxString and require profiling results
367 // to be sure that we really gain enough from them to justify
368 // doing it.
369 wxCharBuffer buf(str->mb_str());
370
371 // if it failed, return empty string and not NULL to avoid crashes in code
372 // written with either wxWidgets 2 wxString or std::string behaviour in
373 // mind: neither of them ever returns NULL and so we shouldn't neither
374 if ( !buf )
375 return "";
376
377 if ( str->m_convertedToChar &&
378 strlen(buf) == strlen(str->m_convertedToChar) )
379 {
380 // keep the same buffer for as long as possible, so that several calls
381 // to c_str() in a row still work:
382 strcpy(str->m_convertedToChar, buf);
383 }
384 else
385 {
386 str->m_convertedToChar = buf.release();
387 }
388
389 // and keep it:
390 return str->m_convertedToChar + m_offset;
391 }
392 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
393
394 #if !wxUSE_UNICODE_WCHAR
395 const wchar_t* wxCStrData::AsWChar() const
396 {
397 wxString *str = wxConstCast(m_str, wxString);
398
399 // convert the string:
400 wxWCharBuffer buf(str->wc_str());
401
402 // notice that here, unlike above in AsChar(), conversion can't fail as our
403 // internal UTF-8 is always well-formed -- or the string was corrupted and
404 // all bets are off anyhow
405
406 // FIXME-UTF8: do the conversion in-place in the existing buffer
407 if ( str->m_convertedToWChar &&
408 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
409 {
410 // keep the same buffer for as long as possible, so that several calls
411 // to c_str() in a row still work:
412 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
413 }
414 else
415 {
416 str->m_convertedToWChar = buf.release();
417 }
418
419 // and keep it:
420 return str->m_convertedToWChar + m_offset;
421 }
422 #endif // !wxUSE_UNICODE_WCHAR
423
424 // ===========================================================================
425 // wxString class core
426 // ===========================================================================
427
428 // ---------------------------------------------------------------------------
429 // construction and conversion
430 // ---------------------------------------------------------------------------
431
432 #if wxUSE_UNICODE_WCHAR
433 /* static */
434 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
435 const wxMBConv& conv)
436 {
437 // anything to do?
438 if ( !psz || nLength == 0 )
439 return SubstrBufFromMB(L"", 0);
440
441 if ( nLength == npos )
442 nLength = wxNO_LEN;
443
444 size_t wcLen;
445 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
446 if ( !wcLen )
447 return SubstrBufFromMB(_T(""), 0);
448 else
449 return SubstrBufFromMB(wcBuf, wcLen);
450 }
451 #endif // wxUSE_UNICODE_WCHAR
452
453 #if wxUSE_UNICODE_UTF8
454 /* static */
455 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
456 const wxMBConv& conv)
457 {
458 // anything to do?
459 if ( !psz || nLength == 0 )
460 return SubstrBufFromMB("", 0);
461
462 // if psz is already in UTF-8, we don't have to do the roundtrip to
463 // wchar_t* and back:
464 if ( conv.IsUTF8() )
465 {
466 // we need to validate the input because UTF8 iterators assume valid
467 // UTF-8 sequence and psz may be invalid:
468 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
469 {
470 // we must pass the real string length to SubstrBufFromMB ctor
471 if ( nLength == npos )
472 nLength = psz ? strlen(psz) : 0;
473 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
474 }
475 // else: do the roundtrip through wchar_t*
476 }
477
478 if ( nLength == npos )
479 nLength = wxNO_LEN;
480
481 // first convert to wide string:
482 size_t wcLen;
483 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
484 if ( !wcLen )
485 return SubstrBufFromMB("", 0);
486
487 // and then to UTF-8:
488 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
489 // widechar -> UTF-8 conversion isn't supposed to ever fail:
490 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
491
492 return buf;
493 }
494 #endif // wxUSE_UNICODE_UTF8
495
496 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
497 /* static */
498 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
499 const wxMBConv& conv)
500 {
501 // anything to do?
502 if ( !pwz || nLength == 0 )
503 return SubstrBufFromWC("", 0);
504
505 if ( nLength == npos )
506 nLength = wxNO_LEN;
507
508 size_t mbLen;
509 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
510 if ( !mbLen )
511 return SubstrBufFromWC("", 0);
512 else
513 return SubstrBufFromWC(mbBuf, mbLen);
514 }
515 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
516
517
518 #if wxUSE_UNICODE_WCHAR
519
520 //Convert wxString in Unicode mode to a multi-byte string
521 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
522 {
523 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
524 }
525
526 #elif wxUSE_UNICODE_UTF8
527
528 const wxWCharBuffer wxString::wc_str() const
529 {
530 return wxMBConvStrictUTF8().cMB2WC
531 (
532 m_impl.c_str(),
533 m_impl.length() + 1, // size, not length
534 NULL
535 );
536 }
537
538 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
539 {
540 if ( conv.IsUTF8() )
541 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
542
543 // FIXME-UTF8: use wc_str() here once we have buffers with length
544
545 size_t wcLen;
546 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
547 (
548 m_impl.c_str(),
549 m_impl.length() + 1, // size
550 &wcLen
551 ));
552 if ( !wcLen )
553 return wxCharBuffer("");
554
555 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
556 }
557
558 #else // ANSI
559
560 //Converts this string to a wide character string if unicode
561 //mode is not enabled and wxUSE_WCHAR_T is enabled
562 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
563 {
564 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
565 }
566
567 #endif // Unicode/ANSI
568
569 // shrink to minimal size (releasing extra memory)
570 bool wxString::Shrink()
571 {
572 wxString tmp(begin(), end());
573 swap(tmp);
574 return tmp.length() == length();
575 }
576
577 // deprecated compatibility code:
578 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
579 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
580 {
581 return DoGetWriteBuf(nLen);
582 }
583
584 void wxString::UngetWriteBuf()
585 {
586 DoUngetWriteBuf();
587 }
588
589 void wxString::UngetWriteBuf(size_t nLen)
590 {
591 DoUngetWriteBuf(nLen);
592 }
593 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
594
595
596 // ---------------------------------------------------------------------------
597 // data access
598 // ---------------------------------------------------------------------------
599
600 // all functions are inline in string.h
601
602 // ---------------------------------------------------------------------------
603 // concatenation operators
604 // ---------------------------------------------------------------------------
605
606 /*
607 * concatenation functions come in 5 flavours:
608 * string + string
609 * char + string and string + char
610 * C str + string and string + C str
611 */
612
613 wxString operator+(const wxString& str1, const wxString& str2)
614 {
615 #if !wxUSE_STL_BASED_WXSTRING
616 wxASSERT( str1.IsValid() );
617 wxASSERT( str2.IsValid() );
618 #endif
619
620 wxString s = str1;
621 s += str2;
622
623 return s;
624 }
625
626 wxString operator+(const wxString& str, wxUniChar ch)
627 {
628 #if !wxUSE_STL_BASED_WXSTRING
629 wxASSERT( str.IsValid() );
630 #endif
631
632 wxString s = str;
633 s += ch;
634
635 return s;
636 }
637
638 wxString operator+(wxUniChar ch, const wxString& str)
639 {
640 #if !wxUSE_STL_BASED_WXSTRING
641 wxASSERT( str.IsValid() );
642 #endif
643
644 wxString s = ch;
645 s += str;
646
647 return s;
648 }
649
650 wxString operator+(const wxString& str, const char *psz)
651 {
652 #if !wxUSE_STL_BASED_WXSTRING
653 wxASSERT( str.IsValid() );
654 #endif
655
656 wxString s;
657 if ( !s.Alloc(strlen(psz) + str.length()) ) {
658 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
659 }
660 s += str;
661 s += psz;
662
663 return s;
664 }
665
666 wxString operator+(const wxString& str, const wchar_t *pwz)
667 {
668 #if !wxUSE_STL_BASED_WXSTRING
669 wxASSERT( str.IsValid() );
670 #endif
671
672 wxString s;
673 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
674 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
675 }
676 s += str;
677 s += pwz;
678
679 return s;
680 }
681
682 wxString operator+(const char *psz, const wxString& str)
683 {
684 #if !wxUSE_STL_BASED_WXSTRING
685 wxASSERT( str.IsValid() );
686 #endif
687
688 wxString s;
689 if ( !s.Alloc(strlen(psz) + str.length()) ) {
690 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
691 }
692 s = psz;
693 s += str;
694
695 return s;
696 }
697
698 wxString operator+(const wchar_t *pwz, const wxString& str)
699 {
700 #if !wxUSE_STL_BASED_WXSTRING
701 wxASSERT( str.IsValid() );
702 #endif
703
704 wxString s;
705 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
706 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
707 }
708 s = pwz;
709 s += str;
710
711 return s;
712 }
713
714 // ---------------------------------------------------------------------------
715 // string comparison
716 // ---------------------------------------------------------------------------
717
718 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
719 {
720 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
721 : wxToupper(GetChar(0u)) == wxToupper(c));
722 }
723
724 #ifdef HAVE_STD_STRING_COMPARE
725
726 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
727 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
728 // sort strings in characters code point order by sorting the byte sequence
729 // in byte values order (i.e. what strcmp() and memcmp() do).
730
731 int wxString::compare(const wxString& str) const
732 {
733 return m_impl.compare(str.m_impl);
734 }
735
736 int wxString::compare(size_t nStart, size_t nLen,
737 const wxString& str) const
738 {
739 size_t pos, len;
740 PosLenToImpl(nStart, nLen, &pos, &len);
741 return m_impl.compare(pos, len, str.m_impl);
742 }
743
744 int wxString::compare(size_t nStart, size_t nLen,
745 const wxString& str,
746 size_t nStart2, size_t nLen2) const
747 {
748 size_t pos, len;
749 PosLenToImpl(nStart, nLen, &pos, &len);
750
751 size_t pos2, len2;
752 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
753
754 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
755 }
756
757 int wxString::compare(const char* sz) const
758 {
759 return m_impl.compare(ImplStr(sz));
760 }
761
762 int wxString::compare(const wchar_t* sz) const
763 {
764 return m_impl.compare(ImplStr(sz));
765 }
766
767 int wxString::compare(size_t nStart, size_t nLen,
768 const char* sz, size_t nCount) const
769 {
770 size_t pos, len;
771 PosLenToImpl(nStart, nLen, &pos, &len);
772
773 SubstrBufFromMB str(ImplStr(sz, nCount));
774
775 return m_impl.compare(pos, len, str.data, str.len);
776 }
777
778 int wxString::compare(size_t nStart, size_t nLen,
779 const wchar_t* sz, size_t nCount) const
780 {
781 size_t pos, len;
782 PosLenToImpl(nStart, nLen, &pos, &len);
783
784 SubstrBufFromWC str(ImplStr(sz, nCount));
785
786 return m_impl.compare(pos, len, str.data, str.len);
787 }
788
789 #else // !HAVE_STD_STRING_COMPARE
790
791 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
792 const wxStringCharType* s2, size_t l2)
793 {
794 if( l1 == l2 )
795 return wxStringMemcmp(s1, s2, l1);
796 else if( l1 < l2 )
797 {
798 int ret = wxStringMemcmp(s1, s2, l1);
799 return ret == 0 ? -1 : ret;
800 }
801 else
802 {
803 int ret = wxStringMemcmp(s1, s2, l2);
804 return ret == 0 ? +1 : ret;
805 }
806 }
807
808 int wxString::compare(const wxString& str) const
809 {
810 return ::wxDoCmp(m_impl.data(), m_impl.length(),
811 str.m_impl.data(), str.m_impl.length());
812 }
813
814 int wxString::compare(size_t nStart, size_t nLen,
815 const wxString& str) const
816 {
817 wxASSERT(nStart <= length());
818 size_type strLen = length() - nStart;
819 nLen = strLen < nLen ? strLen : nLen;
820
821 size_t pos, len;
822 PosLenToImpl(nStart, nLen, &pos, &len);
823
824 return ::wxDoCmp(m_impl.data() + pos, len,
825 str.m_impl.data(), str.m_impl.length());
826 }
827
828 int wxString::compare(size_t nStart, size_t nLen,
829 const wxString& str,
830 size_t nStart2, size_t nLen2) const
831 {
832 wxASSERT(nStart <= length());
833 wxASSERT(nStart2 <= str.length());
834 size_type strLen = length() - nStart,
835 strLen2 = str.length() - nStart2;
836 nLen = strLen < nLen ? strLen : nLen;
837 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
838
839 size_t pos, len;
840 PosLenToImpl(nStart, nLen, &pos, &len);
841 size_t pos2, len2;
842 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
843
844 return ::wxDoCmp(m_impl.data() + pos, len,
845 str.m_impl.data() + pos2, len2);
846 }
847
848 int wxString::compare(const char* sz) const
849 {
850 SubstrBufFromMB str(ImplStr(sz, npos));
851 if ( str.len == npos )
852 str.len = wxStringStrlen(str.data);
853 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
854 }
855
856 int wxString::compare(const wchar_t* sz) const
857 {
858 SubstrBufFromWC str(ImplStr(sz, npos));
859 if ( str.len == npos )
860 str.len = wxStringStrlen(str.data);
861 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
862 }
863
864 int wxString::compare(size_t nStart, size_t nLen,
865 const char* sz, size_t nCount) const
866 {
867 wxASSERT(nStart <= length());
868 size_type strLen = length() - nStart;
869 nLen = strLen < nLen ? strLen : nLen;
870
871 size_t pos, len;
872 PosLenToImpl(nStart, nLen, &pos, &len);
873
874 SubstrBufFromMB str(ImplStr(sz, nCount));
875 if ( str.len == npos )
876 str.len = wxStringStrlen(str.data);
877
878 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
879 }
880
881 int wxString::compare(size_t nStart, size_t nLen,
882 const wchar_t* sz, size_t nCount) const
883 {
884 wxASSERT(nStart <= length());
885 size_type strLen = length() - nStart;
886 nLen = strLen < nLen ? strLen : nLen;
887
888 size_t pos, len;
889 PosLenToImpl(nStart, nLen, &pos, &len);
890
891 SubstrBufFromWC str(ImplStr(sz, nCount));
892 if ( str.len == npos )
893 str.len = wxStringStrlen(str.data);
894
895 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
896 }
897
898 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
899
900
901 // ---------------------------------------------------------------------------
902 // find_{first,last}_[not]_of functions
903 // ---------------------------------------------------------------------------
904
905 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
906
907 // NB: All these functions are implemented with the argument being wxChar*,
908 // i.e. widechar string in any Unicode build, even though native string
909 // representation is char* in the UTF-8 build. This is because we couldn't
910 // use memchr() to determine if a character is in a set encoded as UTF-8.
911
912 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
913 {
914 return find_first_of(sz, nStart, wxStrlen(sz));
915 }
916
917 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
918 {
919 return find_first_not_of(sz, nStart, wxStrlen(sz));
920 }
921
922 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
923 {
924 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
925
926 size_t idx = nStart;
927 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
928 {
929 if ( wxTmemchr(sz, *i, n) )
930 return idx;
931 }
932
933 return npos;
934 }
935
936 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
937 {
938 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
939
940 size_t idx = nStart;
941 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
942 {
943 if ( !wxTmemchr(sz, *i, n) )
944 return idx;
945 }
946
947 return npos;
948 }
949
950
951 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
952 {
953 return find_last_of(sz, nStart, wxStrlen(sz));
954 }
955
956 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
957 {
958 return find_last_not_of(sz, nStart, wxStrlen(sz));
959 }
960
961 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
962 {
963 size_t len = length();
964
965 if ( nStart == npos )
966 {
967 nStart = len - 1;
968 }
969 else
970 {
971 wxASSERT_MSG( nStart <= len, _T("invalid index") );
972 }
973
974 size_t idx = nStart;
975 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
976 i != rend(); --idx, ++i )
977 {
978 if ( wxTmemchr(sz, *i, n) )
979 return idx;
980 }
981
982 return npos;
983 }
984
985 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
986 {
987 size_t len = length();
988
989 if ( nStart == npos )
990 {
991 nStart = len - 1;
992 }
993 else
994 {
995 wxASSERT_MSG( nStart <= len, _T("invalid index") );
996 }
997
998 size_t idx = nStart;
999 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1000 i != rend(); --idx, ++i )
1001 {
1002 if ( !wxTmemchr(sz, *i, n) )
1003 return idx;
1004 }
1005
1006 return npos;
1007 }
1008
1009 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
1010 {
1011 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
1012
1013 size_t idx = nStart;
1014 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1015 {
1016 if ( *i != ch )
1017 return idx;
1018 }
1019
1020 return npos;
1021 }
1022
1023 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1024 {
1025 size_t len = length();
1026
1027 if ( nStart == npos )
1028 {
1029 nStart = len - 1;
1030 }
1031 else
1032 {
1033 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1034 }
1035
1036 size_t idx = nStart;
1037 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1038 i != rend(); --idx, ++i )
1039 {
1040 if ( *i != ch )
1041 return idx;
1042 }
1043
1044 return npos;
1045 }
1046
1047 // the functions above were implemented for wchar_t* arguments in Unicode
1048 // build and char* in ANSI build; below are implementations for the other
1049 // version:
1050 #if wxUSE_UNICODE
1051 #define wxOtherCharType char
1052 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1053 #else
1054 #define wxOtherCharType wchar_t
1055 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1056 #endif
1057
1058 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1059 { return find_first_of(STRCONV(sz), nStart); }
1060
1061 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1062 size_t n) const
1063 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1064 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1065 { return find_last_of(STRCONV(sz), nStart); }
1066 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1067 size_t n) const
1068 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1069 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1070 { return find_first_not_of(STRCONV(sz), nStart); }
1071 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1072 size_t n) const
1073 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1074 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1075 { return find_last_not_of(STRCONV(sz), nStart); }
1076 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1077 size_t n) const
1078 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1079
1080 #undef wxOtherCharType
1081 #undef STRCONV
1082
1083 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1084
1085 // ===========================================================================
1086 // other common string functions
1087 // ===========================================================================
1088
1089 int wxString::CmpNoCase(const wxString& s) const
1090 {
1091 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1092
1093 const_iterator i1 = begin();
1094 const_iterator end1 = end();
1095 const_iterator i2 = s.begin();
1096 const_iterator end2 = s.end();
1097
1098 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1099 {
1100 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1101 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1102 if ( lower1 != lower2 )
1103 return lower1 < lower2 ? -1 : 1;
1104 }
1105
1106 size_t len1 = length();
1107 size_t len2 = s.length();
1108
1109 if ( len1 < len2 )
1110 return -1;
1111 else if ( len1 > len2 )
1112 return 1;
1113 return 0;
1114 }
1115
1116
1117 #if wxUSE_UNICODE
1118
1119 #ifdef __MWERKS__
1120 #ifndef __SCHAR_MAX__
1121 #define __SCHAR_MAX__ 127
1122 #endif
1123 #endif
1124
1125 wxString wxString::FromAscii(const char *ascii, size_t len)
1126 {
1127 if (!ascii || len == 0)
1128 return wxEmptyString;
1129
1130 wxString res;
1131
1132 {
1133 wxStringInternalBuffer buf(res, len);
1134 wxStringCharType *dest = buf;
1135
1136 for ( ; len > 0; --len )
1137 {
1138 unsigned char c = (unsigned char)*ascii++;
1139 wxASSERT_MSG( c < 0x80,
1140 _T("Non-ASCII value passed to FromAscii().") );
1141
1142 *dest++ = (wchar_t)c;
1143 }
1144 }
1145
1146 return res;
1147 }
1148
1149 wxString wxString::FromAscii(const char *ascii)
1150 {
1151 return FromAscii(ascii, wxStrlen(ascii));
1152 }
1153
1154 wxString wxString::FromAscii(char ascii)
1155 {
1156 // What do we do with '\0' ?
1157
1158 unsigned char c = (unsigned char)ascii;
1159
1160 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1161
1162 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1163 return wxString(wxUniChar((wchar_t)c));
1164 }
1165
1166 const wxCharBuffer wxString::ToAscii() const
1167 {
1168 // this will allocate enough space for the terminating NUL too
1169 wxCharBuffer buffer(length());
1170 char *dest = buffer.data();
1171
1172 for ( const_iterator i = begin(); i != end(); ++i )
1173 {
1174 wxUniChar c(*i);
1175 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1176 *dest++ = c.IsAscii() ? (char)c : '_';
1177
1178 // the output string can't have embedded NULs anyhow, so we can safely
1179 // stop at first of them even if we do have any
1180 if ( !c )
1181 break;
1182 }
1183
1184 return buffer;
1185 }
1186
1187 #endif // wxUSE_UNICODE
1188
1189 // extract string of length nCount starting at nFirst
1190 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1191 {
1192 size_t nLen = length();
1193
1194 // default value of nCount is npos and means "till the end"
1195 if ( nCount == npos )
1196 {
1197 nCount = nLen - nFirst;
1198 }
1199
1200 // out-of-bounds requests return sensible things
1201 if ( nFirst + nCount > nLen )
1202 {
1203 nCount = nLen - nFirst;
1204 }
1205
1206 if ( nFirst > nLen )
1207 {
1208 // AllocCopy() will return empty string
1209 return wxEmptyString;
1210 }
1211
1212 wxString dest(*this, nFirst, nCount);
1213 if ( dest.length() != nCount )
1214 {
1215 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1216 }
1217
1218 return dest;
1219 }
1220
1221 // check that the string starts with prefix and return the rest of the string
1222 // in the provided pointer if it is not NULL, otherwise return false
1223 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1224 {
1225 if ( compare(0, prefix.length(), prefix) != 0 )
1226 return false;
1227
1228 if ( rest )
1229 {
1230 // put the rest of the string into provided pointer
1231 rest->assign(*this, prefix.length(), npos);
1232 }
1233
1234 return true;
1235 }
1236
1237
1238 // check that the string ends with suffix and return the rest of it in the
1239 // provided pointer if it is not NULL, otherwise return false
1240 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1241 {
1242 int start = length() - suffix.length();
1243
1244 if ( start < 0 || compare(start, npos, suffix) != 0 )
1245 return false;
1246
1247 if ( rest )
1248 {
1249 // put the rest of the string into provided pointer
1250 rest->assign(*this, 0, start);
1251 }
1252
1253 return true;
1254 }
1255
1256
1257 // extract nCount last (rightmost) characters
1258 wxString wxString::Right(size_t nCount) const
1259 {
1260 if ( nCount > length() )
1261 nCount = length();
1262
1263 wxString dest(*this, length() - nCount, nCount);
1264 if ( dest.length() != nCount ) {
1265 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1266 }
1267 return dest;
1268 }
1269
1270 // get all characters after the last occurence of ch
1271 // (returns the whole string if ch not found)
1272 wxString wxString::AfterLast(wxUniChar ch) const
1273 {
1274 wxString str;
1275 int iPos = Find(ch, true);
1276 if ( iPos == wxNOT_FOUND )
1277 str = *this;
1278 else
1279 str = wx_str() + iPos + 1;
1280
1281 return str;
1282 }
1283
1284 // extract nCount first (leftmost) characters
1285 wxString wxString::Left(size_t nCount) const
1286 {
1287 if ( nCount > length() )
1288 nCount = length();
1289
1290 wxString dest(*this, 0, nCount);
1291 if ( dest.length() != nCount ) {
1292 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1293 }
1294 return dest;
1295 }
1296
1297 // get all characters before the first occurence of ch
1298 // (returns the whole string if ch not found)
1299 wxString wxString::BeforeFirst(wxUniChar ch) const
1300 {
1301 int iPos = Find(ch);
1302 if ( iPos == wxNOT_FOUND ) iPos = length();
1303 return wxString(*this, 0, iPos);
1304 }
1305
1306 /// get all characters before the last occurence of ch
1307 /// (returns empty string if ch not found)
1308 wxString wxString::BeforeLast(wxUniChar ch) const
1309 {
1310 wxString str;
1311 int iPos = Find(ch, true);
1312 if ( iPos != wxNOT_FOUND && iPos != 0 )
1313 str = wxString(c_str(), iPos);
1314
1315 return str;
1316 }
1317
1318 /// get all characters after the first occurence of ch
1319 /// (returns empty string if ch not found)
1320 wxString wxString::AfterFirst(wxUniChar ch) const
1321 {
1322 wxString str;
1323 int iPos = Find(ch);
1324 if ( iPos != wxNOT_FOUND )
1325 str = wx_str() + iPos + 1;
1326
1327 return str;
1328 }
1329
1330 // replace first (or all) occurences of some substring with another one
1331 size_t wxString::Replace(const wxString& strOld,
1332 const wxString& strNew, bool bReplaceAll)
1333 {
1334 // if we tried to replace an empty string we'd enter an infinite loop below
1335 wxCHECK_MSG( !strOld.empty(), 0,
1336 _T("wxString::Replace(): invalid parameter") );
1337
1338 wxSTRING_INVALIDATE_CACHE();
1339
1340 size_t uiCount = 0; // count of replacements made
1341
1342 // optimize the special common case: replacement of one character by
1343 // another one (in UTF-8 case we can only do this for ASCII characters)
1344 //
1345 // benchmarks show that this special version is around 3 times faster
1346 // (depending on the proportion of matching characters and UTF-8/wchar_t
1347 // build)
1348 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1349 {
1350 const wxStringCharType chOld = strOld.m_impl[0],
1351 chNew = strNew.m_impl[0];
1352
1353 // this loop is the simplified version of the one below
1354 for ( size_t pos = 0; ; )
1355 {
1356 pos = m_impl.find(chOld, pos);
1357 if ( pos == npos )
1358 break;
1359
1360 m_impl[pos++] = chNew;
1361
1362 uiCount++;
1363
1364 if ( !bReplaceAll )
1365 break;
1366 }
1367 }
1368 else // general case
1369 {
1370 const size_t uiOldLen = strOld.m_impl.length();
1371 const size_t uiNewLen = strNew.m_impl.length();
1372
1373 for ( size_t pos = 0; ; )
1374 {
1375 pos = m_impl.find(strOld.m_impl, pos);
1376 if ( pos == npos )
1377 break;
1378
1379 // replace this occurrence of the old string with the new one
1380 m_impl.replace(pos, uiOldLen, strNew.m_impl);
1381
1382 // move up pos past the string that was replaced
1383 pos += uiNewLen;
1384
1385 // increase replace count
1386 uiCount++;
1387
1388 // stop after the first one?
1389 if ( !bReplaceAll )
1390 break;
1391 }
1392 }
1393
1394 return uiCount;
1395 }
1396
1397 bool wxString::IsAscii() const
1398 {
1399 for ( const_iterator i = begin(); i != end(); ++i )
1400 {
1401 if ( !(*i).IsAscii() )
1402 return false;
1403 }
1404
1405 return true;
1406 }
1407
1408 bool wxString::IsWord() const
1409 {
1410 for ( const_iterator i = begin(); i != end(); ++i )
1411 {
1412 if ( !wxIsalpha(*i) )
1413 return false;
1414 }
1415
1416 return true;
1417 }
1418
1419 bool wxString::IsNumber() const
1420 {
1421 if ( empty() )
1422 return true;
1423
1424 const_iterator i = begin();
1425
1426 if ( *i == _T('-') || *i == _T('+') )
1427 ++i;
1428
1429 for ( ; i != end(); ++i )
1430 {
1431 if ( !wxIsdigit(*i) )
1432 return false;
1433 }
1434
1435 return true;
1436 }
1437
1438 wxString wxString::Strip(stripType w) const
1439 {
1440 wxString s = *this;
1441 if ( w & leading ) s.Trim(false);
1442 if ( w & trailing ) s.Trim(true);
1443 return s;
1444 }
1445
1446 // ---------------------------------------------------------------------------
1447 // case conversion
1448 // ---------------------------------------------------------------------------
1449
1450 wxString& wxString::MakeUpper()
1451 {
1452 for ( iterator it = begin(), en = end(); it != en; ++it )
1453 *it = (wxChar)wxToupper(*it);
1454
1455 return *this;
1456 }
1457
1458 wxString& wxString::MakeLower()
1459 {
1460 for ( iterator it = begin(), en = end(); it != en; ++it )
1461 *it = (wxChar)wxTolower(*it);
1462
1463 return *this;
1464 }
1465
1466 wxString& wxString::MakeCapitalized()
1467 {
1468 const iterator en = end();
1469 iterator it = begin();
1470 if ( it != en )
1471 {
1472 *it = (wxChar)wxToupper(*it);
1473 for ( ++it; it != en; ++it )
1474 *it = (wxChar)wxTolower(*it);
1475 }
1476
1477 return *this;
1478 }
1479
1480 // ---------------------------------------------------------------------------
1481 // trimming and padding
1482 // ---------------------------------------------------------------------------
1483
1484 // some compilers (VC++ 6.0 not to name them) return true for a call to
1485 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1486 // to live with this by checking that the character is a 7 bit one - even if
1487 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1488 // space-like symbols somewhere except in the first 128 chars), it is arguably
1489 // still better than trimming away accented letters
1490 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1491
1492 // trims spaces (in the sense of isspace) from left or right side
1493 wxString& wxString::Trim(bool bFromRight)
1494 {
1495 // first check if we're going to modify the string at all
1496 if ( !empty() &&
1497 (
1498 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1499 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1500 )
1501 )
1502 {
1503 if ( bFromRight )
1504 {
1505 // find last non-space character
1506 reverse_iterator psz = rbegin();
1507 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1508 ++psz;
1509
1510 // truncate at trailing space start
1511 erase(psz.base(), end());
1512 }
1513 else
1514 {
1515 // find first non-space character
1516 iterator psz = begin();
1517 while ( (psz != end()) && wxSafeIsspace(*psz) )
1518 ++psz;
1519
1520 // fix up data and length
1521 erase(begin(), psz);
1522 }
1523 }
1524
1525 return *this;
1526 }
1527
1528 // adds nCount characters chPad to the string from either side
1529 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1530 {
1531 wxString s(chPad, nCount);
1532
1533 if ( bFromRight )
1534 *this += s;
1535 else
1536 {
1537 s += *this;
1538 swap(s);
1539 }
1540
1541 return *this;
1542 }
1543
1544 // truncate the string
1545 wxString& wxString::Truncate(size_t uiLen)
1546 {
1547 if ( uiLen < length() )
1548 {
1549 erase(begin() + uiLen, end());
1550 }
1551 //else: nothing to do, string is already short enough
1552
1553 return *this;
1554 }
1555
1556 // ---------------------------------------------------------------------------
1557 // finding (return wxNOT_FOUND if not found and index otherwise)
1558 // ---------------------------------------------------------------------------
1559
1560 // find a character
1561 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1562 {
1563 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1564
1565 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1566 }
1567
1568 // ----------------------------------------------------------------------------
1569 // conversion to numbers
1570 // ----------------------------------------------------------------------------
1571
1572 // The implementation of all the functions below is exactly the same so factor
1573 // it out. Note that number extraction works correctly on UTF-8 strings, so
1574 // we can use wxStringCharType and wx_str() for maximum efficiency.
1575
1576 #ifndef __WXWINCE__
1577 #define DO_IF_NOT_WINCE(x) x
1578 #else
1579 #define DO_IF_NOT_WINCE(x)
1580 #endif
1581
1582 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1583 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1584 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1585 \
1586 DO_IF_NOT_WINCE( errno = 0; ) \
1587 \
1588 const wxStringCharType *start = wx_str(); \
1589 wxStringCharType *end; \
1590 T val = func(start, &end, base); \
1591 \
1592 /* return true only if scan was stopped by the terminating NUL and */ \
1593 /* if the string was not empty to start with and no under/overflow */ \
1594 /* occurred: */ \
1595 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1596 return false; \
1597 *out = val; \
1598 return true
1599
1600 bool wxString::ToLong(long *pVal, int base) const
1601 {
1602 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
1603 }
1604
1605 bool wxString::ToULong(unsigned long *pVal, int base) const
1606 {
1607 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
1608 }
1609
1610 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1611 {
1612 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
1613 }
1614
1615 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1616 {
1617 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
1618 }
1619
1620 bool wxString::ToDouble(double *pVal) const
1621 {
1622 wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
1623
1624 DO_IF_NOT_WINCE( errno = 0; )
1625
1626 const wxChar *start = c_str();
1627 wxChar *end;
1628 double val = wxStrtod(start, &end);
1629
1630 // return true only if scan was stopped by the terminating NUL and if the
1631 // string was not empty to start with and no under/overflow occurred
1632 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1633 return false;
1634
1635 *pVal = val;
1636
1637 return true;
1638 }
1639
1640 // ---------------------------------------------------------------------------
1641 // formatted output
1642 // ---------------------------------------------------------------------------
1643
1644 #if !wxUSE_UTF8_LOCALE_ONLY
1645 /* static */
1646 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1647 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1648 #else
1649 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1650 #endif
1651 {
1652 va_list argptr;
1653 va_start(argptr, format);
1654
1655 wxString s;
1656 s.PrintfV(format, argptr);
1657
1658 va_end(argptr);
1659
1660 return s;
1661 }
1662 #endif // !wxUSE_UTF8_LOCALE_ONLY
1663
1664 #if wxUSE_UNICODE_UTF8
1665 /* static */
1666 wxString wxString::DoFormatUtf8(const char *format, ...)
1667 {
1668 va_list argptr;
1669 va_start(argptr, format);
1670
1671 wxString s;
1672 s.PrintfV(format, argptr);
1673
1674 va_end(argptr);
1675
1676 return s;
1677 }
1678 #endif // wxUSE_UNICODE_UTF8
1679
1680 /* static */
1681 wxString wxString::FormatV(const wxString& format, va_list argptr)
1682 {
1683 wxString s;
1684 s.PrintfV(format, argptr);
1685 return s;
1686 }
1687
1688 #if !wxUSE_UTF8_LOCALE_ONLY
1689 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1690 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1691 #else
1692 int wxString::DoPrintfWchar(const wxChar *format, ...)
1693 #endif
1694 {
1695 va_list argptr;
1696 va_start(argptr, format);
1697
1698 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1699 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1700 // because it's the only cast that works safely for downcasting when
1701 // multiple inheritance is used:
1702 wxString *str = static_cast<wxString*>(this);
1703 #else
1704 wxString *str = this;
1705 #endif
1706
1707 int iLen = str->PrintfV(format, argptr);
1708
1709 va_end(argptr);
1710
1711 return iLen;
1712 }
1713 #endif // !wxUSE_UTF8_LOCALE_ONLY
1714
1715 #if wxUSE_UNICODE_UTF8
1716 int wxString::DoPrintfUtf8(const char *format, ...)
1717 {
1718 va_list argptr;
1719 va_start(argptr, format);
1720
1721 int iLen = PrintfV(format, argptr);
1722
1723 va_end(argptr);
1724
1725 return iLen;
1726 }
1727 #endif // wxUSE_UNICODE_UTF8
1728
1729 /*
1730 Uses wxVsnprintf and places the result into the this string.
1731
1732 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1733 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1734 the ISO C99 (and thus SUSv3) standard the return value for the case of
1735 an undersized buffer is inconsistent. For conforming vsnprintf
1736 implementations the function must return the number of characters that
1737 would have been printed had the buffer been large enough. For conforming
1738 vswprintf implementations the function must return a negative number
1739 and set errno.
1740
1741 What vswprintf sets errno to is undefined but Darwin seems to set it to
1742 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1743 those are defined in the standard and backed up by several conformance
1744 statements. Note that ENOMEM mentioned in the manual page does not
1745 apply to swprintf, only wprintf and fwprintf.
1746
1747 Official manual page:
1748 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1749
1750 Some conformance statements (AIX, Solaris):
1751 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1752 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1753
1754 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1755 EILSEQ and EINVAL are specifically defined to mean the error is other than
1756 an undersized buffer and no other errno are defined we treat those two
1757 as meaning hard errors and everything else gets the old behavior which
1758 is to keep looping and increasing buffer size until the function succeeds.
1759
1760 In practice it's impossible to determine before compilation which behavior
1761 may be used. The vswprintf function may have vsnprintf-like behavior or
1762 vice-versa. Behavior detected on one release can theoretically change
1763 with an updated release. Not to mention that configure testing for it
1764 would require the test to be run on the host system, not the build system
1765 which makes cross compilation difficult. Therefore, we make no assumptions
1766 about behavior and try our best to handle every known case, including the
1767 case where wxVsnprintf returns a negative number and fails to set errno.
1768
1769 There is yet one more non-standard implementation and that is our own.
1770 Fortunately, that can be detected at compile-time.
1771
1772 On top of all that, ISO C99 explicitly defines snprintf to write a null
1773 character to the last position of the specified buffer. That would be at
1774 at the given buffer size minus 1. It is supposed to do this even if it
1775 turns out that the buffer is sized too small.
1776
1777 Darwin (tested on 10.5) follows the C99 behavior exactly.
1778
1779 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1780 errno even when it fails. However, it only seems to ever fail due
1781 to an undersized buffer.
1782 */
1783 #if wxUSE_UNICODE_UTF8
1784 template<typename BufferType>
1785 #else
1786 // we only need one version in non-UTF8 builds and at least two Windows
1787 // compilers have problems with this function template, so use just one
1788 // normal function here
1789 #endif
1790 static int DoStringPrintfV(wxString& str,
1791 const wxString& format, va_list argptr)
1792 {
1793 int size = 1024;
1794
1795 for ( ;; )
1796 {
1797 #if wxUSE_UNICODE_UTF8
1798 BufferType tmp(str, size + 1);
1799 typename BufferType::CharType *buf = tmp;
1800 #else
1801 wxStringBuffer tmp(str, size + 1);
1802 wxChar *buf = tmp;
1803 #endif
1804
1805 if ( !buf )
1806 {
1807 // out of memory
1808
1809 // in UTF-8 build, leaving uninitialized junk in the buffer
1810 // could result in invalid non-empty UTF-8 string, so just
1811 // reset the string to empty on failure:
1812 buf[0] = '\0';
1813 return -1;
1814 }
1815
1816 // wxVsnprintf() may modify the original arg pointer, so pass it
1817 // only a copy
1818 va_list argptrcopy;
1819 wxVaCopy(argptrcopy, argptr);
1820
1821 #ifndef __WXWINCE__
1822 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1823 errno = 0;
1824 #endif
1825 int len = wxVsnprintf(buf, size, format, argptrcopy);
1826 va_end(argptrcopy);
1827
1828 // some implementations of vsnprintf() don't NUL terminate
1829 // the string if there is not enough space for it so
1830 // always do it manually
1831 // FIXME: This really seems to be the wrong and would be an off-by-one
1832 // bug except the code above allocates an extra character.
1833 buf[size] = _T('\0');
1834
1835 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1836 // total number of characters which would have been written if the
1837 // buffer were large enough (newer standards such as Unix98)
1838 if ( len < 0 )
1839 {
1840 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1841 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1842 // is true if *both* of them use our own implementation,
1843 // otherwise we can't be sure
1844 #if wxUSE_WXVSNPRINTF
1845 // we know that our own implementation of wxVsnprintf() returns -1
1846 // only for a format error - thus there's something wrong with
1847 // the user's format string
1848 buf[0] = '\0';
1849 return -1;
1850 #else // possibly using system version
1851 // assume it only returns error if there is not enough space, but
1852 // as we don't know how much we need, double the current size of
1853 // the buffer
1854 #ifndef __WXWINCE__
1855 if( (errno == EILSEQ) || (errno == EINVAL) )
1856 // If errno was set to one of the two well-known hard errors
1857 // then fail immediately to avoid an infinite loop.
1858 return -1;
1859 else
1860 #endif // __WXWINCE__
1861 // still not enough, as we don't know how much we need, double the
1862 // current size of the buffer
1863 size *= 2;
1864 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1865 }
1866 else if ( len >= size )
1867 {
1868 #if wxUSE_WXVSNPRINTF
1869 // we know that our own implementation of wxVsnprintf() returns
1870 // size+1 when there's not enough space but that's not the size
1871 // of the required buffer!
1872 size *= 2; // so we just double the current size of the buffer
1873 #else
1874 // some vsnprintf() implementations NUL-terminate the buffer and
1875 // some don't in len == size case, to be safe always add 1
1876 // FIXME: I don't quite understand this comment. The vsnprintf
1877 // function is specifically defined to return the number of
1878 // characters printed not including the null terminator.
1879 // So OF COURSE you need to add 1 to get the right buffer size.
1880 // The following line is definitely correct, no question.
1881 size = len + 1;
1882 #endif
1883 }
1884 else // ok, there was enough space
1885 {
1886 break;
1887 }
1888 }
1889
1890 // we could have overshot
1891 str.Shrink();
1892
1893 return str.length();
1894 }
1895
1896 int wxString::PrintfV(const wxString& format, va_list argptr)
1897 {
1898 #if wxUSE_UNICODE_UTF8
1899 #if wxUSE_STL_BASED_WXSTRING
1900 typedef wxStringTypeBuffer<char> Utf8Buffer;
1901 #else
1902 typedef wxStringInternalBuffer Utf8Buffer;
1903 #endif
1904 #endif
1905
1906 #if wxUSE_UTF8_LOCALE_ONLY
1907 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1908 #else
1909 #if wxUSE_UNICODE_UTF8
1910 if ( wxLocaleIsUtf8 )
1911 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1912 else
1913 // wxChar* version
1914 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1915 #else
1916 return DoStringPrintfV(*this, format, argptr);
1917 #endif // UTF8/WCHAR
1918 #endif
1919 }
1920
1921 // ----------------------------------------------------------------------------
1922 // misc other operations
1923 // ----------------------------------------------------------------------------
1924
1925 // returns true if the string matches the pattern which may contain '*' and
1926 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1927 // of them)
1928 bool wxString::Matches(const wxString& mask) const
1929 {
1930 // I disable this code as it doesn't seem to be faster (in fact, it seems
1931 // to be much slower) than the old, hand-written code below and using it
1932 // here requires always linking with libregex even if the user code doesn't
1933 // use it
1934 #if 0 // wxUSE_REGEX
1935 // first translate the shell-like mask into a regex
1936 wxString pattern;
1937 pattern.reserve(wxStrlen(pszMask));
1938
1939 pattern += _T('^');
1940 while ( *pszMask )
1941 {
1942 switch ( *pszMask )
1943 {
1944 case _T('?'):
1945 pattern += _T('.');
1946 break;
1947
1948 case _T('*'):
1949 pattern += _T(".*");
1950 break;
1951
1952 case _T('^'):
1953 case _T('.'):
1954 case _T('$'):
1955 case _T('('):
1956 case _T(')'):
1957 case _T('|'):
1958 case _T('+'):
1959 case _T('\\'):
1960 // these characters are special in a RE, quote them
1961 // (however note that we don't quote '[' and ']' to allow
1962 // using them for Unix shell like matching)
1963 pattern += _T('\\');
1964 // fall through
1965
1966 default:
1967 pattern += *pszMask;
1968 }
1969
1970 pszMask++;
1971 }
1972 pattern += _T('$');
1973
1974 // and now use it
1975 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1976 #else // !wxUSE_REGEX
1977 // TODO: this is, of course, awfully inefficient...
1978
1979 // FIXME-UTF8: implement using iterators, remove #if
1980 #if wxUSE_UNICODE_UTF8
1981 wxWCharBuffer maskBuf = mask.wc_str();
1982 wxWCharBuffer txtBuf = wc_str();
1983 const wxChar *pszMask = maskBuf.data();
1984 const wxChar *pszTxt = txtBuf.data();
1985 #else
1986 const wxChar *pszMask = mask.wx_str();
1987 // the char currently being checked
1988 const wxChar *pszTxt = wx_str();
1989 #endif
1990
1991 // the last location where '*' matched
1992 const wxChar *pszLastStarInText = NULL;
1993 const wxChar *pszLastStarInMask = NULL;
1994
1995 match:
1996 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1997 switch ( *pszMask ) {
1998 case wxT('?'):
1999 if ( *pszTxt == wxT('\0') )
2000 return false;
2001
2002 // pszTxt and pszMask will be incremented in the loop statement
2003
2004 break;
2005
2006 case wxT('*'):
2007 {
2008 // remember where we started to be able to backtrack later
2009 pszLastStarInText = pszTxt;
2010 pszLastStarInMask = pszMask;
2011
2012 // ignore special chars immediately following this one
2013 // (should this be an error?)
2014 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
2015 pszMask++;
2016
2017 // if there is nothing more, match
2018 if ( *pszMask == wxT('\0') )
2019 return true;
2020
2021 // are there any other metacharacters in the mask?
2022 size_t uiLenMask;
2023 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2024
2025 if ( pEndMask != NULL ) {
2026 // we have to match the string between two metachars
2027 uiLenMask = pEndMask - pszMask;
2028 }
2029 else {
2030 // we have to match the remainder of the string
2031 uiLenMask = wxStrlen(pszMask);
2032 }
2033
2034 wxString strToMatch(pszMask, uiLenMask);
2035 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2036 if ( pMatch == NULL )
2037 return false;
2038
2039 // -1 to compensate "++" in the loop
2040 pszTxt = pMatch + uiLenMask - 1;
2041 pszMask += uiLenMask - 1;
2042 }
2043 break;
2044
2045 default:
2046 if ( *pszMask != *pszTxt )
2047 return false;
2048 break;
2049 }
2050 }
2051
2052 // match only if nothing left
2053 if ( *pszTxt == wxT('\0') )
2054 return true;
2055
2056 // if we failed to match, backtrack if we can
2057 if ( pszLastStarInText ) {
2058 pszTxt = pszLastStarInText + 1;
2059 pszMask = pszLastStarInMask;
2060
2061 pszLastStarInText = NULL;
2062
2063 // don't bother resetting pszLastStarInMask, it's unnecessary
2064
2065 goto match;
2066 }
2067
2068 return false;
2069 #endif // wxUSE_REGEX/!wxUSE_REGEX
2070 }
2071
2072 // Count the number of chars
2073 int wxString::Freq(wxUniChar ch) const
2074 {
2075 int count = 0;
2076 for ( const_iterator i = begin(); i != end(); ++i )
2077 {
2078 if ( *i == ch )
2079 count ++;
2080 }
2081 return count;
2082 }
2083
2084 // ----------------------------------------------------------------------------
2085 // wxUTF8StringBuffer
2086 // ----------------------------------------------------------------------------
2087
2088 #if wxUSE_UNICODE_WCHAR
2089 wxUTF8StringBuffer::~wxUTF8StringBuffer()
2090 {
2091 wxMBConvStrictUTF8 conv;
2092 size_t wlen = conv.ToWChar(NULL, 0, m_buf);
2093 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2094
2095 wxStringInternalBuffer wbuf(m_str, wlen);
2096 conv.ToWChar(wbuf, wlen, m_buf);
2097 }
2098
2099 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2100 {
2101 wxCHECK_RET(m_lenSet, "length not set");
2102
2103 wxMBConvStrictUTF8 conv;
2104 size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
2105 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2106
2107 wxStringInternalBufferLength wbuf(m_str, wlen);
2108 conv.ToWChar(wbuf, wlen, m_buf, m_len);
2109 wbuf.SetLength(wlen);
2110 }
2111 #endif // wxUSE_UNICODE_WCHAR
2112
2113 // ----------------------------------------------------------------------------
2114 // wxCharBufferType<T>
2115 // ----------------------------------------------------------------------------
2116
2117 template<>
2118 wxCharTypeBuffer<char>::Data
2119 wxCharTypeBuffer<char>::NullData(NULL);
2120
2121 template<>
2122 wxCharTypeBuffer<wchar_t>::Data
2123 wxCharTypeBuffer<wchar_t>::NullData(NULL);