]> git.saurik.com Git - wxWidgets.git/blob - src/common/string.cpp
rename ShowCacheStats to wxStrCacheStatsDumper
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
16
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #endif
28
29 #include <ctype.h>
30
31 #ifndef __WXWINCE__
32 #include <errno.h>
33 #endif
34
35 #include <string.h>
36 #include <stdlib.h>
37
38 #include "wx/hashmap.h"
39
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
46 #else
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
51 #endif
52
53
54 // ---------------------------------------------------------------------------
55 // static class variables definition
56 // ---------------------------------------------------------------------------
57
58 //According to STL _must_ be a -1 size_t
59 const size_t wxString::npos = (size_t) -1;
60
61 #if wxUSE_STRING_POS_CACHE
62
63 struct wxStrCacheInitializer
64 {
65 wxStrCacheInitializer()
66 {
67 // calling this function triggers s_cache initialization in it, and
68 // from now on it becomes safe to call from multiple threads
69 wxString::GetCache();
70 }
71 };
72
73 static wxStrCacheInitializer gs_stringCacheInit;
74
75 // gdb seems to be unable to display thread-local variables correctly, at least
76 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
77 #ifdef __WXDEBUG__
78
79 struct wxStrCacheDumper
80 {
81 static void ShowAll()
82 {
83 puts("*** wxString cache dump:");
84 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
85 {
86 const wxString::Cache::Element&
87 c = wxString::GetCacheBegin()[n];
88
89 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
90 n,
91 n == wxString::LastUsedCacheElement() ? " [*]" : "",
92 c.str,
93 (unsigned long)c.pos,
94 (unsigned long)c.impl,
95 (long)c.len);
96 }
97 }
98 };
99
100 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
101
102 #endif // __WXDEBUG__
103
104 #ifdef wxPROFILE_STRING_CACHE
105
106 wxString::CacheStats wxString::ms_cacheStats;
107
108 struct wxStrCacheStatsDumper
109 {
110 ~wxStrCacheStatsDumper()
111 {
112 const wxString::CacheStats& stats = wxString::ms_cacheStats;
113
114 if ( stats.postot )
115 {
116 puts("*** wxString cache statistics:");
117 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
118 stats.postot);
119 printf("\tHits %u (of which %u not used) or %.2f%%\n",
120 stats.poshits,
121 stats.mishits,
122 100.*float(stats.poshits - stats.mishits)/stats.postot);
123 printf("\tAverage position requested: %.2f\n",
124 float(stats.sumpos) / stats.postot);
125 printf("\tAverage offset after cached hint: %.2f\n",
126 float(stats.sumofs) / stats.postot);
127 }
128
129 if ( stats.lentot )
130 {
131 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
132 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
133 }
134 }
135 };
136
137 static wxStrCacheStatsDumper s_showCacheStats;
138
139 #endif // wxPROFILE_STRING_CACHE
140
141 #endif // wxUSE_STRING_POS_CACHE
142
143 // ----------------------------------------------------------------------------
144 // global functions
145 // ----------------------------------------------------------------------------
146
147 #if wxUSE_STD_IOSTREAM
148
149 #include <iostream>
150
151 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
152 {
153 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
154 return os << (const char *)str.AsCharBuf();
155 #else
156 return os << str.AsInternal();
157 #endif
158 }
159
160 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
161 {
162 return os << str.c_str();
163 }
164
165 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
166 {
167 return os << str.data();
168 }
169
170 #ifndef __BORLANDC__
171 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
172 {
173 return os << str.data();
174 }
175 #endif
176
177 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
178
179 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
180 {
181 return wos << str.wc_str();
182 }
183
184 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
185 {
186 return wos << str.AsWChar();
187 }
188
189 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
190 {
191 return wos << str.data();
192 }
193
194 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
195
196 #endif // wxUSE_STD_IOSTREAM
197
198 // ===========================================================================
199 // wxString class core
200 // ===========================================================================
201
202 #if wxUSE_UNICODE_UTF8
203
204 void wxString::PosLenToImpl(size_t pos, size_t len,
205 size_t *implPos, size_t *implLen) const
206 {
207 if ( pos == npos )
208 {
209 *implPos = npos;
210 }
211 else // have valid start position
212 {
213 const const_iterator b = GetIterForNthChar(pos);
214 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
215 if ( len == npos )
216 {
217 *implLen = npos;
218 }
219 else // have valid length too
220 {
221 // we need to handle the case of length specifying a substring
222 // going beyond the end of the string, just as std::string does
223 const const_iterator e(end());
224 const_iterator i(b);
225 while ( len && i <= e )
226 {
227 ++i;
228 --len;
229 }
230
231 *implLen = i.impl() - b.impl();
232 }
233 }
234 }
235
236 #endif // wxUSE_UNICODE_UTF8
237
238 // ----------------------------------------------------------------------------
239 // wxCStrData converted strings caching
240 // ----------------------------------------------------------------------------
241
242 // FIXME-UTF8: temporarily disabled because it doesn't work with global
243 // string objects; re-enable after fixing this bug and benchmarking
244 // performance to see if using a hash is a good idea at all
245 #if 0
246
247 // For backward compatibility reasons, it must be possible to assign the value
248 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
249 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
250 // because the memory would be freed immediately, but it has to be valid as long
251 // as the string is not modified, so that code like this still works:
252 //
253 // const wxChar *s = str.c_str();
254 // while ( s ) { ... }
255
256 // FIXME-UTF8: not thread safe!
257 // FIXME-UTF8: we currently clear the cached conversion only when the string is
258 // destroyed, but we should do it when the string is modified, to
259 // keep memory usage down
260 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
261 // invalidated the cache on every change, we could keep the previous
262 // conversion
263 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
264 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
265
266 template<typename T>
267 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
268 {
269 typename T::iterator i = hash.find(wxConstCast(s, wxString));
270 if ( i != hash.end() )
271 {
272 free(i->second);
273 hash.erase(i);
274 }
275 }
276
277 #if wxUSE_UNICODE
278 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
279 // so we have to use wxString* here and const-cast when used
280 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
281 wxStringCharConversionCache);
282 static wxStringCharConversionCache gs_stringsCharCache;
283
284 const char* wxCStrData::AsChar() const
285 {
286 // remove previously cache value, if any (see FIXMEs above):
287 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
288
289 // convert the string and keep it:
290 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
291 m_str->mb_str().release();
292
293 return s + m_offset;
294 }
295 #endif // wxUSE_UNICODE
296
297 #if !wxUSE_UNICODE_WCHAR
298 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
299 wxStringWCharConversionCache);
300 static wxStringWCharConversionCache gs_stringsWCharCache;
301
302 const wchar_t* wxCStrData::AsWChar() const
303 {
304 // remove previously cache value, if any (see FIXMEs above):
305 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
306
307 // convert the string and keep it:
308 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
309 m_str->wc_str().release();
310
311 return s + m_offset;
312 }
313 #endif // !wxUSE_UNICODE_WCHAR
314
315 wxString::~wxString()
316 {
317 #if wxUSE_UNICODE
318 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
319 DeleteStringFromConversionCache(gs_stringsCharCache, this);
320 #endif
321 #if !wxUSE_UNICODE_WCHAR
322 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
323 #endif
324 }
325 #endif
326
327 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
328 const char* wxCStrData::AsChar() const
329 {
330 #if wxUSE_UNICODE_UTF8
331 if ( wxLocaleIsUtf8 )
332 return AsInternal();
333 #endif
334 // under non-UTF8 locales, we have to convert the internal UTF-8
335 // representation using wxConvLibc and cache the result
336
337 wxString *str = wxConstCast(m_str, wxString);
338
339 // convert the string:
340 //
341 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
342 // have it) but it's unfortunately not obvious to implement
343 // because we don't know how big buffer do we need for the
344 // given string length (in case of multibyte encodings, e.g.
345 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
346 //
347 // One idea would be to store more than just m_convertedToChar
348 // in wxString: then we could record the length of the string
349 // which was converted the last time and try to reuse the same
350 // buffer if the current length is not greater than it (this
351 // could still fail because string could have been modified in
352 // place but it would work most of the time, so we'd do it and
353 // only allocate the new buffer if in-place conversion returned
354 // an error). We could also store a bit saying if the string
355 // was modified since the last conversion (and update it in all
356 // operation modifying the string, of course) to avoid unneeded
357 // consequential conversions. But both of these ideas require
358 // adding more fields to wxString and require profiling results
359 // to be sure that we really gain enough from them to justify
360 // doing it.
361 wxCharBuffer buf(str->mb_str());
362
363 // if it failed, return empty string and not NULL to avoid crashes in code
364 // written with either wxWidgets 2 wxString or std::string behaviour in
365 // mind: neither of them ever returns NULL and so we shouldn't neither
366 if ( !buf )
367 return "";
368
369 if ( str->m_convertedToChar &&
370 strlen(buf) == strlen(str->m_convertedToChar) )
371 {
372 // keep the same buffer for as long as possible, so that several calls
373 // to c_str() in a row still work:
374 strcpy(str->m_convertedToChar, buf);
375 }
376 else
377 {
378 str->m_convertedToChar = buf.release();
379 }
380
381 // and keep it:
382 return str->m_convertedToChar + m_offset;
383 }
384 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
385
386 #if !wxUSE_UNICODE_WCHAR
387 const wchar_t* wxCStrData::AsWChar() const
388 {
389 wxString *str = wxConstCast(m_str, wxString);
390
391 // convert the string:
392 wxWCharBuffer buf(str->wc_str());
393
394 // notice that here, unlike above in AsChar(), conversion can't fail as our
395 // internal UTF-8 is always well-formed -- or the string was corrupted and
396 // all bets are off anyhow
397
398 // FIXME-UTF8: do the conversion in-place in the existing buffer
399 if ( str->m_convertedToWChar &&
400 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
401 {
402 // keep the same buffer for as long as possible, so that several calls
403 // to c_str() in a row still work:
404 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
405 }
406 else
407 {
408 str->m_convertedToWChar = buf.release();
409 }
410
411 // and keep it:
412 return str->m_convertedToWChar + m_offset;
413 }
414 #endif // !wxUSE_UNICODE_WCHAR
415
416 // ===========================================================================
417 // wxString class core
418 // ===========================================================================
419
420 // ---------------------------------------------------------------------------
421 // construction and conversion
422 // ---------------------------------------------------------------------------
423
424 #if wxUSE_UNICODE_WCHAR
425 /* static */
426 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
427 const wxMBConv& conv)
428 {
429 // anything to do?
430 if ( !psz || nLength == 0 )
431 return SubstrBufFromMB(L"", 0);
432
433 if ( nLength == npos )
434 nLength = wxNO_LEN;
435
436 size_t wcLen;
437 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
438 if ( !wcLen )
439 return SubstrBufFromMB(_T(""), 0);
440 else
441 return SubstrBufFromMB(wcBuf, wcLen);
442 }
443 #endif // wxUSE_UNICODE_WCHAR
444
445 #if wxUSE_UNICODE_UTF8
446 /* static */
447 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
448 const wxMBConv& conv)
449 {
450 // anything to do?
451 if ( !psz || nLength == 0 )
452 return SubstrBufFromMB("", 0);
453
454 // if psz is already in UTF-8, we don't have to do the roundtrip to
455 // wchar_t* and back:
456 if ( conv.IsUTF8() )
457 {
458 // we need to validate the input because UTF8 iterators assume valid
459 // UTF-8 sequence and psz may be invalid:
460 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
461 {
462 // we must pass the real string length to SubstrBufFromMB ctor
463 if ( nLength == npos )
464 nLength = psz ? strlen(psz) : 0;
465 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
466 }
467 // else: do the roundtrip through wchar_t*
468 }
469
470 if ( nLength == npos )
471 nLength = wxNO_LEN;
472
473 // first convert to wide string:
474 size_t wcLen;
475 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
476 if ( !wcLen )
477 return SubstrBufFromMB("", 0);
478
479 // and then to UTF-8:
480 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
481 // widechar -> UTF-8 conversion isn't supposed to ever fail:
482 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
483
484 return buf;
485 }
486 #endif // wxUSE_UNICODE_UTF8
487
488 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
489 /* static */
490 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
491 const wxMBConv& conv)
492 {
493 // anything to do?
494 if ( !pwz || nLength == 0 )
495 return SubstrBufFromWC("", 0);
496
497 if ( nLength == npos )
498 nLength = wxNO_LEN;
499
500 size_t mbLen;
501 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
502 if ( !mbLen )
503 return SubstrBufFromWC("", 0);
504 else
505 return SubstrBufFromWC(mbBuf, mbLen);
506 }
507 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
508
509
510 #if wxUSE_UNICODE_WCHAR
511
512 //Convert wxString in Unicode mode to a multi-byte string
513 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
514 {
515 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
516 }
517
518 #elif wxUSE_UNICODE_UTF8
519
520 const wxWCharBuffer wxString::wc_str() const
521 {
522 return wxMBConvStrictUTF8().cMB2WC
523 (
524 m_impl.c_str(),
525 m_impl.length() + 1, // size, not length
526 NULL
527 );
528 }
529
530 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
531 {
532 if ( conv.IsUTF8() )
533 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
534
535 // FIXME-UTF8: use wc_str() here once we have buffers with length
536
537 size_t wcLen;
538 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
539 (
540 m_impl.c_str(),
541 m_impl.length() + 1, // size
542 &wcLen
543 ));
544 if ( !wcLen )
545 return wxCharBuffer("");
546
547 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
548 }
549
550 #else // ANSI
551
552 //Converts this string to a wide character string if unicode
553 //mode is not enabled and wxUSE_WCHAR_T is enabled
554 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
555 {
556 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
557 }
558
559 #endif // Unicode/ANSI
560
561 // shrink to minimal size (releasing extra memory)
562 bool wxString::Shrink()
563 {
564 wxString tmp(begin(), end());
565 swap(tmp);
566 return tmp.length() == length();
567 }
568
569 // deprecated compatibility code:
570 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
571 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
572 {
573 return DoGetWriteBuf(nLen);
574 }
575
576 void wxString::UngetWriteBuf()
577 {
578 DoUngetWriteBuf();
579 }
580
581 void wxString::UngetWriteBuf(size_t nLen)
582 {
583 DoUngetWriteBuf(nLen);
584 }
585 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
586
587
588 // ---------------------------------------------------------------------------
589 // data access
590 // ---------------------------------------------------------------------------
591
592 // all functions are inline in string.h
593
594 // ---------------------------------------------------------------------------
595 // concatenation operators
596 // ---------------------------------------------------------------------------
597
598 /*
599 * concatenation functions come in 5 flavours:
600 * string + string
601 * char + string and string + char
602 * C str + string and string + C str
603 */
604
605 wxString operator+(const wxString& str1, const wxString& str2)
606 {
607 #if !wxUSE_STL_BASED_WXSTRING
608 wxASSERT( str1.IsValid() );
609 wxASSERT( str2.IsValid() );
610 #endif
611
612 wxString s = str1;
613 s += str2;
614
615 return s;
616 }
617
618 wxString operator+(const wxString& str, wxUniChar ch)
619 {
620 #if !wxUSE_STL_BASED_WXSTRING
621 wxASSERT( str.IsValid() );
622 #endif
623
624 wxString s = str;
625 s += ch;
626
627 return s;
628 }
629
630 wxString operator+(wxUniChar ch, const wxString& str)
631 {
632 #if !wxUSE_STL_BASED_WXSTRING
633 wxASSERT( str.IsValid() );
634 #endif
635
636 wxString s = ch;
637 s += str;
638
639 return s;
640 }
641
642 wxString operator+(const wxString& str, const char *psz)
643 {
644 #if !wxUSE_STL_BASED_WXSTRING
645 wxASSERT( str.IsValid() );
646 #endif
647
648 wxString s;
649 if ( !s.Alloc(strlen(psz) + str.length()) ) {
650 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
651 }
652 s += str;
653 s += psz;
654
655 return s;
656 }
657
658 wxString operator+(const wxString& str, const wchar_t *pwz)
659 {
660 #if !wxUSE_STL_BASED_WXSTRING
661 wxASSERT( str.IsValid() );
662 #endif
663
664 wxString s;
665 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
666 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
667 }
668 s += str;
669 s += pwz;
670
671 return s;
672 }
673
674 wxString operator+(const char *psz, const wxString& str)
675 {
676 #if !wxUSE_STL_BASED_WXSTRING
677 wxASSERT( str.IsValid() );
678 #endif
679
680 wxString s;
681 if ( !s.Alloc(strlen(psz) + str.length()) ) {
682 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
683 }
684 s = psz;
685 s += str;
686
687 return s;
688 }
689
690 wxString operator+(const wchar_t *pwz, const wxString& str)
691 {
692 #if !wxUSE_STL_BASED_WXSTRING
693 wxASSERT( str.IsValid() );
694 #endif
695
696 wxString s;
697 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
698 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
699 }
700 s = pwz;
701 s += str;
702
703 return s;
704 }
705
706 // ---------------------------------------------------------------------------
707 // string comparison
708 // ---------------------------------------------------------------------------
709
710 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
711 {
712 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
713 : wxToupper(GetChar(0u)) == wxToupper(c));
714 }
715
716 #ifdef HAVE_STD_STRING_COMPARE
717
718 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
719 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
720 // sort strings in characters code point order by sorting the byte sequence
721 // in byte values order (i.e. what strcmp() and memcmp() do).
722
723 int wxString::compare(const wxString& str) const
724 {
725 return m_impl.compare(str.m_impl);
726 }
727
728 int wxString::compare(size_t nStart, size_t nLen,
729 const wxString& str) const
730 {
731 size_t pos, len;
732 PosLenToImpl(nStart, nLen, &pos, &len);
733 return m_impl.compare(pos, len, str.m_impl);
734 }
735
736 int wxString::compare(size_t nStart, size_t nLen,
737 const wxString& str,
738 size_t nStart2, size_t nLen2) const
739 {
740 size_t pos, len;
741 PosLenToImpl(nStart, nLen, &pos, &len);
742
743 size_t pos2, len2;
744 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
745
746 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
747 }
748
749 int wxString::compare(const char* sz) const
750 {
751 return m_impl.compare(ImplStr(sz));
752 }
753
754 int wxString::compare(const wchar_t* sz) const
755 {
756 return m_impl.compare(ImplStr(sz));
757 }
758
759 int wxString::compare(size_t nStart, size_t nLen,
760 const char* sz, size_t nCount) const
761 {
762 size_t pos, len;
763 PosLenToImpl(nStart, nLen, &pos, &len);
764
765 SubstrBufFromMB str(ImplStr(sz, nCount));
766
767 return m_impl.compare(pos, len, str.data, str.len);
768 }
769
770 int wxString::compare(size_t nStart, size_t nLen,
771 const wchar_t* sz, size_t nCount) const
772 {
773 size_t pos, len;
774 PosLenToImpl(nStart, nLen, &pos, &len);
775
776 SubstrBufFromWC str(ImplStr(sz, nCount));
777
778 return m_impl.compare(pos, len, str.data, str.len);
779 }
780
781 #else // !HAVE_STD_STRING_COMPARE
782
783 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
784 const wxStringCharType* s2, size_t l2)
785 {
786 if( l1 == l2 )
787 return wxStringMemcmp(s1, s2, l1);
788 else if( l1 < l2 )
789 {
790 int ret = wxStringMemcmp(s1, s2, l1);
791 return ret == 0 ? -1 : ret;
792 }
793 else
794 {
795 int ret = wxStringMemcmp(s1, s2, l2);
796 return ret == 0 ? +1 : ret;
797 }
798 }
799
800 int wxString::compare(const wxString& str) const
801 {
802 return ::wxDoCmp(m_impl.data(), m_impl.length(),
803 str.m_impl.data(), str.m_impl.length());
804 }
805
806 int wxString::compare(size_t nStart, size_t nLen,
807 const wxString& str) const
808 {
809 wxASSERT(nStart <= length());
810 size_type strLen = length() - nStart;
811 nLen = strLen < nLen ? strLen : nLen;
812
813 size_t pos, len;
814 PosLenToImpl(nStart, nLen, &pos, &len);
815
816 return ::wxDoCmp(m_impl.data() + pos, len,
817 str.m_impl.data(), str.m_impl.length());
818 }
819
820 int wxString::compare(size_t nStart, size_t nLen,
821 const wxString& str,
822 size_t nStart2, size_t nLen2) const
823 {
824 wxASSERT(nStart <= length());
825 wxASSERT(nStart2 <= str.length());
826 size_type strLen = length() - nStart,
827 strLen2 = str.length() - nStart2;
828 nLen = strLen < nLen ? strLen : nLen;
829 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
830
831 size_t pos, len;
832 PosLenToImpl(nStart, nLen, &pos, &len);
833 size_t pos2, len2;
834 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
835
836 return ::wxDoCmp(m_impl.data() + pos, len,
837 str.m_impl.data() + pos2, len2);
838 }
839
840 int wxString::compare(const char* sz) const
841 {
842 SubstrBufFromMB str(ImplStr(sz, npos));
843 if ( str.len == npos )
844 str.len = wxStringStrlen(str.data);
845 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
846 }
847
848 int wxString::compare(const wchar_t* sz) const
849 {
850 SubstrBufFromWC str(ImplStr(sz, npos));
851 if ( str.len == npos )
852 str.len = wxStringStrlen(str.data);
853 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
854 }
855
856 int wxString::compare(size_t nStart, size_t nLen,
857 const char* sz, size_t nCount) const
858 {
859 wxASSERT(nStart <= length());
860 size_type strLen = length() - nStart;
861 nLen = strLen < nLen ? strLen : nLen;
862
863 size_t pos, len;
864 PosLenToImpl(nStart, nLen, &pos, &len);
865
866 SubstrBufFromMB str(ImplStr(sz, nCount));
867 if ( str.len == npos )
868 str.len = wxStringStrlen(str.data);
869
870 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
871 }
872
873 int wxString::compare(size_t nStart, size_t nLen,
874 const wchar_t* sz, size_t nCount) const
875 {
876 wxASSERT(nStart <= length());
877 size_type strLen = length() - nStart;
878 nLen = strLen < nLen ? strLen : nLen;
879
880 size_t pos, len;
881 PosLenToImpl(nStart, nLen, &pos, &len);
882
883 SubstrBufFromWC str(ImplStr(sz, nCount));
884 if ( str.len == npos )
885 str.len = wxStringStrlen(str.data);
886
887 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
888 }
889
890 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
891
892
893 // ---------------------------------------------------------------------------
894 // find_{first,last}_[not]_of functions
895 // ---------------------------------------------------------------------------
896
897 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
898
899 // NB: All these functions are implemented with the argument being wxChar*,
900 // i.e. widechar string in any Unicode build, even though native string
901 // representation is char* in the UTF-8 build. This is because we couldn't
902 // use memchr() to determine if a character is in a set encoded as UTF-8.
903
904 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
905 {
906 return find_first_of(sz, nStart, wxStrlen(sz));
907 }
908
909 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
910 {
911 return find_first_not_of(sz, nStart, wxStrlen(sz));
912 }
913
914 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
915 {
916 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
917
918 size_t idx = nStart;
919 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
920 {
921 if ( wxTmemchr(sz, *i, n) )
922 return idx;
923 }
924
925 return npos;
926 }
927
928 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
929 {
930 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
931
932 size_t idx = nStart;
933 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
934 {
935 if ( !wxTmemchr(sz, *i, n) )
936 return idx;
937 }
938
939 return npos;
940 }
941
942
943 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
944 {
945 return find_last_of(sz, nStart, wxStrlen(sz));
946 }
947
948 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
949 {
950 return find_last_not_of(sz, nStart, wxStrlen(sz));
951 }
952
953 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
954 {
955 size_t len = length();
956
957 if ( nStart == npos )
958 {
959 nStart = len - 1;
960 }
961 else
962 {
963 wxASSERT_MSG( nStart <= len, _T("invalid index") );
964 }
965
966 size_t idx = nStart;
967 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
968 i != rend(); --idx, ++i )
969 {
970 if ( wxTmemchr(sz, *i, n) )
971 return idx;
972 }
973
974 return npos;
975 }
976
977 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
978 {
979 size_t len = length();
980
981 if ( nStart == npos )
982 {
983 nStart = len - 1;
984 }
985 else
986 {
987 wxASSERT_MSG( nStart <= len, _T("invalid index") );
988 }
989
990 size_t idx = nStart;
991 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
992 i != rend(); --idx, ++i )
993 {
994 if ( !wxTmemchr(sz, *i, n) )
995 return idx;
996 }
997
998 return npos;
999 }
1000
1001 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
1002 {
1003 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
1004
1005 size_t idx = nStart;
1006 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1007 {
1008 if ( *i != ch )
1009 return idx;
1010 }
1011
1012 return npos;
1013 }
1014
1015 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1016 {
1017 size_t len = length();
1018
1019 if ( nStart == npos )
1020 {
1021 nStart = len - 1;
1022 }
1023 else
1024 {
1025 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1026 }
1027
1028 size_t idx = nStart;
1029 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1030 i != rend(); --idx, ++i )
1031 {
1032 if ( *i != ch )
1033 return idx;
1034 }
1035
1036 return npos;
1037 }
1038
1039 // the functions above were implemented for wchar_t* arguments in Unicode
1040 // build and char* in ANSI build; below are implementations for the other
1041 // version:
1042 #if wxUSE_UNICODE
1043 #define wxOtherCharType char
1044 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1045 #else
1046 #define wxOtherCharType wchar_t
1047 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1048 #endif
1049
1050 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1051 { return find_first_of(STRCONV(sz), nStart); }
1052
1053 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1054 size_t n) const
1055 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1056 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1057 { return find_last_of(STRCONV(sz), nStart); }
1058 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1059 size_t n) const
1060 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1061 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1062 { return find_first_not_of(STRCONV(sz), nStart); }
1063 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1064 size_t n) const
1065 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1066 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1067 { return find_last_not_of(STRCONV(sz), nStart); }
1068 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1069 size_t n) const
1070 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1071
1072 #undef wxOtherCharType
1073 #undef STRCONV
1074
1075 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1076
1077 // ===========================================================================
1078 // other common string functions
1079 // ===========================================================================
1080
1081 int wxString::CmpNoCase(const wxString& s) const
1082 {
1083 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1084
1085 const_iterator i1 = begin();
1086 const_iterator end1 = end();
1087 const_iterator i2 = s.begin();
1088 const_iterator end2 = s.end();
1089
1090 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1091 {
1092 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1093 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1094 if ( lower1 != lower2 )
1095 return lower1 < lower2 ? -1 : 1;
1096 }
1097
1098 size_t len1 = length();
1099 size_t len2 = s.length();
1100
1101 if ( len1 < len2 )
1102 return -1;
1103 else if ( len1 > len2 )
1104 return 1;
1105 return 0;
1106 }
1107
1108
1109 #if wxUSE_UNICODE
1110
1111 #ifdef __MWERKS__
1112 #ifndef __SCHAR_MAX__
1113 #define __SCHAR_MAX__ 127
1114 #endif
1115 #endif
1116
1117 wxString wxString::FromAscii(const char *ascii, size_t len)
1118 {
1119 if (!ascii || len == 0)
1120 return wxEmptyString;
1121
1122 wxString res;
1123
1124 {
1125 wxStringInternalBuffer buf(res, len);
1126 wxStringCharType *dest = buf;
1127
1128 for ( ; len > 0; --len )
1129 {
1130 unsigned char c = (unsigned char)*ascii++;
1131 wxASSERT_MSG( c < 0x80,
1132 _T("Non-ASCII value passed to FromAscii().") );
1133
1134 *dest++ = (wchar_t)c;
1135 }
1136 }
1137
1138 return res;
1139 }
1140
1141 wxString wxString::FromAscii(const char *ascii)
1142 {
1143 return FromAscii(ascii, wxStrlen(ascii));
1144 }
1145
1146 wxString wxString::FromAscii(char ascii)
1147 {
1148 // What do we do with '\0' ?
1149
1150 unsigned char c = (unsigned char)ascii;
1151
1152 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1153
1154 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1155 return wxString(wxUniChar((wchar_t)c));
1156 }
1157
1158 const wxCharBuffer wxString::ToAscii() const
1159 {
1160 // this will allocate enough space for the terminating NUL too
1161 wxCharBuffer buffer(length());
1162 char *dest = buffer.data();
1163
1164 for ( const_iterator i = begin(); i != end(); ++i )
1165 {
1166 wxUniChar c(*i);
1167 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1168 *dest++ = c.IsAscii() ? (char)c : '_';
1169
1170 // the output string can't have embedded NULs anyhow, so we can safely
1171 // stop at first of them even if we do have any
1172 if ( !c )
1173 break;
1174 }
1175
1176 return buffer;
1177 }
1178
1179 #endif // wxUSE_UNICODE
1180
1181 // extract string of length nCount starting at nFirst
1182 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1183 {
1184 size_t nLen = length();
1185
1186 // default value of nCount is npos and means "till the end"
1187 if ( nCount == npos )
1188 {
1189 nCount = nLen - nFirst;
1190 }
1191
1192 // out-of-bounds requests return sensible things
1193 if ( nFirst + nCount > nLen )
1194 {
1195 nCount = nLen - nFirst;
1196 }
1197
1198 if ( nFirst > nLen )
1199 {
1200 // AllocCopy() will return empty string
1201 return wxEmptyString;
1202 }
1203
1204 wxString dest(*this, nFirst, nCount);
1205 if ( dest.length() != nCount )
1206 {
1207 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1208 }
1209
1210 return dest;
1211 }
1212
1213 // check that the string starts with prefix and return the rest of the string
1214 // in the provided pointer if it is not NULL, otherwise return false
1215 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1216 {
1217 if ( compare(0, prefix.length(), prefix) != 0 )
1218 return false;
1219
1220 if ( rest )
1221 {
1222 // put the rest of the string into provided pointer
1223 rest->assign(*this, prefix.length(), npos);
1224 }
1225
1226 return true;
1227 }
1228
1229
1230 // check that the string ends with suffix and return the rest of it in the
1231 // provided pointer if it is not NULL, otherwise return false
1232 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1233 {
1234 int start = length() - suffix.length();
1235
1236 if ( start < 0 || compare(start, npos, suffix) != 0 )
1237 return false;
1238
1239 if ( rest )
1240 {
1241 // put the rest of the string into provided pointer
1242 rest->assign(*this, 0, start);
1243 }
1244
1245 return true;
1246 }
1247
1248
1249 // extract nCount last (rightmost) characters
1250 wxString wxString::Right(size_t nCount) const
1251 {
1252 if ( nCount > length() )
1253 nCount = length();
1254
1255 wxString dest(*this, length() - nCount, nCount);
1256 if ( dest.length() != nCount ) {
1257 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1258 }
1259 return dest;
1260 }
1261
1262 // get all characters after the last occurence of ch
1263 // (returns the whole string if ch not found)
1264 wxString wxString::AfterLast(wxUniChar ch) const
1265 {
1266 wxString str;
1267 int iPos = Find(ch, true);
1268 if ( iPos == wxNOT_FOUND )
1269 str = *this;
1270 else
1271 str = wx_str() + iPos + 1;
1272
1273 return str;
1274 }
1275
1276 // extract nCount first (leftmost) characters
1277 wxString wxString::Left(size_t nCount) const
1278 {
1279 if ( nCount > length() )
1280 nCount = length();
1281
1282 wxString dest(*this, 0, nCount);
1283 if ( dest.length() != nCount ) {
1284 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1285 }
1286 return dest;
1287 }
1288
1289 // get all characters before the first occurence of ch
1290 // (returns the whole string if ch not found)
1291 wxString wxString::BeforeFirst(wxUniChar ch) const
1292 {
1293 int iPos = Find(ch);
1294 if ( iPos == wxNOT_FOUND ) iPos = length();
1295 return wxString(*this, 0, iPos);
1296 }
1297
1298 /// get all characters before the last occurence of ch
1299 /// (returns empty string if ch not found)
1300 wxString wxString::BeforeLast(wxUniChar ch) const
1301 {
1302 wxString str;
1303 int iPos = Find(ch, true);
1304 if ( iPos != wxNOT_FOUND && iPos != 0 )
1305 str = wxString(c_str(), iPos);
1306
1307 return str;
1308 }
1309
1310 /// get all characters after the first occurence of ch
1311 /// (returns empty string if ch not found)
1312 wxString wxString::AfterFirst(wxUniChar ch) const
1313 {
1314 wxString str;
1315 int iPos = Find(ch);
1316 if ( iPos != wxNOT_FOUND )
1317 str = wx_str() + iPos + 1;
1318
1319 return str;
1320 }
1321
1322 // replace first (or all) occurences of some substring with another one
1323 size_t wxString::Replace(const wxString& strOld,
1324 const wxString& strNew, bool bReplaceAll)
1325 {
1326 // if we tried to replace an empty string we'd enter an infinite loop below
1327 wxCHECK_MSG( !strOld.empty(), 0,
1328 _T("wxString::Replace(): invalid parameter") );
1329
1330 wxSTRING_INVALIDATE_CACHE();
1331
1332 size_t uiCount = 0; // count of replacements made
1333
1334 // optimize the special common case: replacement of one character by
1335 // another one (in UTF-8 case we can only do this for ASCII characters)
1336 //
1337 // benchmarks show that this special version is around 3 times faster
1338 // (depending on the proportion of matching characters and UTF-8/wchar_t
1339 // build)
1340 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1341 {
1342 const wxStringCharType chOld = strOld.m_impl[0],
1343 chNew = strNew.m_impl[0];
1344
1345 // this loop is the simplified version of the one below
1346 for ( size_t pos = 0; ; )
1347 {
1348 pos = m_impl.find(chOld, pos);
1349 if ( pos == npos )
1350 break;
1351
1352 m_impl[pos++] = chNew;
1353
1354 uiCount++;
1355
1356 if ( !bReplaceAll )
1357 break;
1358 }
1359 }
1360 else // general case
1361 {
1362 const size_t uiOldLen = strOld.m_impl.length();
1363 const size_t uiNewLen = strNew.m_impl.length();
1364
1365 for ( size_t pos = 0; ; )
1366 {
1367 pos = m_impl.find(strOld.m_impl, pos);
1368 if ( pos == npos )
1369 break;
1370
1371 // replace this occurrence of the old string with the new one
1372 m_impl.replace(pos, uiOldLen, strNew.m_impl);
1373
1374 // move up pos past the string that was replaced
1375 pos += uiNewLen;
1376
1377 // increase replace count
1378 uiCount++;
1379
1380 // stop after the first one?
1381 if ( !bReplaceAll )
1382 break;
1383 }
1384 }
1385
1386 return uiCount;
1387 }
1388
1389 bool wxString::IsAscii() const
1390 {
1391 for ( const_iterator i = begin(); i != end(); ++i )
1392 {
1393 if ( !(*i).IsAscii() )
1394 return false;
1395 }
1396
1397 return true;
1398 }
1399
1400 bool wxString::IsWord() const
1401 {
1402 for ( const_iterator i = begin(); i != end(); ++i )
1403 {
1404 if ( !wxIsalpha(*i) )
1405 return false;
1406 }
1407
1408 return true;
1409 }
1410
1411 bool wxString::IsNumber() const
1412 {
1413 if ( empty() )
1414 return true;
1415
1416 const_iterator i = begin();
1417
1418 if ( *i == _T('-') || *i == _T('+') )
1419 ++i;
1420
1421 for ( ; i != end(); ++i )
1422 {
1423 if ( !wxIsdigit(*i) )
1424 return false;
1425 }
1426
1427 return true;
1428 }
1429
1430 wxString wxString::Strip(stripType w) const
1431 {
1432 wxString s = *this;
1433 if ( w & leading ) s.Trim(false);
1434 if ( w & trailing ) s.Trim(true);
1435 return s;
1436 }
1437
1438 // ---------------------------------------------------------------------------
1439 // case conversion
1440 // ---------------------------------------------------------------------------
1441
1442 wxString& wxString::MakeUpper()
1443 {
1444 for ( iterator it = begin(), en = end(); it != en; ++it )
1445 *it = (wxChar)wxToupper(*it);
1446
1447 return *this;
1448 }
1449
1450 wxString& wxString::MakeLower()
1451 {
1452 for ( iterator it = begin(), en = end(); it != en; ++it )
1453 *it = (wxChar)wxTolower(*it);
1454
1455 return *this;
1456 }
1457
1458 wxString& wxString::MakeCapitalized()
1459 {
1460 const iterator en = end();
1461 iterator it = begin();
1462 if ( it != en )
1463 {
1464 *it = (wxChar)wxToupper(*it);
1465 for ( ++it; it != en; ++it )
1466 *it = (wxChar)wxTolower(*it);
1467 }
1468
1469 return *this;
1470 }
1471
1472 // ---------------------------------------------------------------------------
1473 // trimming and padding
1474 // ---------------------------------------------------------------------------
1475
1476 // some compilers (VC++ 6.0 not to name them) return true for a call to
1477 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1478 // to live with this by checking that the character is a 7 bit one - even if
1479 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1480 // space-like symbols somewhere except in the first 128 chars), it is arguably
1481 // still better than trimming away accented letters
1482 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1483
1484 // trims spaces (in the sense of isspace) from left or right side
1485 wxString& wxString::Trim(bool bFromRight)
1486 {
1487 // first check if we're going to modify the string at all
1488 if ( !empty() &&
1489 (
1490 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1491 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1492 )
1493 )
1494 {
1495 if ( bFromRight )
1496 {
1497 // find last non-space character
1498 reverse_iterator psz = rbegin();
1499 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1500 ++psz;
1501
1502 // truncate at trailing space start
1503 erase(psz.base(), end());
1504 }
1505 else
1506 {
1507 // find first non-space character
1508 iterator psz = begin();
1509 while ( (psz != end()) && wxSafeIsspace(*psz) )
1510 ++psz;
1511
1512 // fix up data and length
1513 erase(begin(), psz);
1514 }
1515 }
1516
1517 return *this;
1518 }
1519
1520 // adds nCount characters chPad to the string from either side
1521 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1522 {
1523 wxString s(chPad, nCount);
1524
1525 if ( bFromRight )
1526 *this += s;
1527 else
1528 {
1529 s += *this;
1530 swap(s);
1531 }
1532
1533 return *this;
1534 }
1535
1536 // truncate the string
1537 wxString& wxString::Truncate(size_t uiLen)
1538 {
1539 if ( uiLen < length() )
1540 {
1541 erase(begin() + uiLen, end());
1542 }
1543 //else: nothing to do, string is already short enough
1544
1545 return *this;
1546 }
1547
1548 // ---------------------------------------------------------------------------
1549 // finding (return wxNOT_FOUND if not found and index otherwise)
1550 // ---------------------------------------------------------------------------
1551
1552 // find a character
1553 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1554 {
1555 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1556
1557 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1558 }
1559
1560 // ----------------------------------------------------------------------------
1561 // conversion to numbers
1562 // ----------------------------------------------------------------------------
1563
1564 // The implementation of all the functions below is exactly the same so factor
1565 // it out. Note that number extraction works correctly on UTF-8 strings, so
1566 // we can use wxStringCharType and wx_str() for maximum efficiency.
1567
1568 #ifndef __WXWINCE__
1569 #define DO_IF_NOT_WINCE(x) x
1570 #else
1571 #define DO_IF_NOT_WINCE(x)
1572 #endif
1573
1574 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1575 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1576 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1577 \
1578 DO_IF_NOT_WINCE( errno = 0; ) \
1579 \
1580 const wxStringCharType *start = wx_str(); \
1581 wxStringCharType *end; \
1582 T val = func(start, &end, base); \
1583 \
1584 /* return true only if scan was stopped by the terminating NUL and */ \
1585 /* if the string was not empty to start with and no under/overflow */ \
1586 /* occurred: */ \
1587 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1588 return false; \
1589 *out = val; \
1590 return true
1591
1592 bool wxString::ToLong(long *pVal, int base) const
1593 {
1594 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
1595 }
1596
1597 bool wxString::ToULong(unsigned long *pVal, int base) const
1598 {
1599 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
1600 }
1601
1602 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1603 {
1604 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
1605 }
1606
1607 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1608 {
1609 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
1610 }
1611
1612 bool wxString::ToDouble(double *pVal) const
1613 {
1614 wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
1615
1616 DO_IF_NOT_WINCE( errno = 0; )
1617
1618 const wxChar *start = c_str();
1619 wxChar *end;
1620 double val = wxStrtod(start, &end);
1621
1622 // return true only if scan was stopped by the terminating NUL and if the
1623 // string was not empty to start with and no under/overflow occurred
1624 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1625 return false;
1626
1627 *pVal = val;
1628
1629 return true;
1630 }
1631
1632 // ---------------------------------------------------------------------------
1633 // formatted output
1634 // ---------------------------------------------------------------------------
1635
1636 #if !wxUSE_UTF8_LOCALE_ONLY
1637 /* static */
1638 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1639 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1640 #else
1641 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1642 #endif
1643 {
1644 va_list argptr;
1645 va_start(argptr, format);
1646
1647 wxString s;
1648 s.PrintfV(format, argptr);
1649
1650 va_end(argptr);
1651
1652 return s;
1653 }
1654 #endif // !wxUSE_UTF8_LOCALE_ONLY
1655
1656 #if wxUSE_UNICODE_UTF8
1657 /* static */
1658 wxString wxString::DoFormatUtf8(const char *format, ...)
1659 {
1660 va_list argptr;
1661 va_start(argptr, format);
1662
1663 wxString s;
1664 s.PrintfV(format, argptr);
1665
1666 va_end(argptr);
1667
1668 return s;
1669 }
1670 #endif // wxUSE_UNICODE_UTF8
1671
1672 /* static */
1673 wxString wxString::FormatV(const wxString& format, va_list argptr)
1674 {
1675 wxString s;
1676 s.PrintfV(format, argptr);
1677 return s;
1678 }
1679
1680 #if !wxUSE_UTF8_LOCALE_ONLY
1681 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1682 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1683 #else
1684 int wxString::DoPrintfWchar(const wxChar *format, ...)
1685 #endif
1686 {
1687 va_list argptr;
1688 va_start(argptr, format);
1689
1690 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1691 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1692 // because it's the only cast that works safely for downcasting when
1693 // multiple inheritance is used:
1694 wxString *str = static_cast<wxString*>(this);
1695 #else
1696 wxString *str = this;
1697 #endif
1698
1699 int iLen = str->PrintfV(format, argptr);
1700
1701 va_end(argptr);
1702
1703 return iLen;
1704 }
1705 #endif // !wxUSE_UTF8_LOCALE_ONLY
1706
1707 #if wxUSE_UNICODE_UTF8
1708 int wxString::DoPrintfUtf8(const char *format, ...)
1709 {
1710 va_list argptr;
1711 va_start(argptr, format);
1712
1713 int iLen = PrintfV(format, argptr);
1714
1715 va_end(argptr);
1716
1717 return iLen;
1718 }
1719 #endif // wxUSE_UNICODE_UTF8
1720
1721 /*
1722 Uses wxVsnprintf and places the result into the this string.
1723
1724 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1725 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1726 the ISO C99 (and thus SUSv3) standard the return value for the case of
1727 an undersized buffer is inconsistent. For conforming vsnprintf
1728 implementations the function must return the number of characters that
1729 would have been printed had the buffer been large enough. For conforming
1730 vswprintf implementations the function must return a negative number
1731 and set errno.
1732
1733 What vswprintf sets errno to is undefined but Darwin seems to set it to
1734 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1735 those are defined in the standard and backed up by several conformance
1736 statements. Note that ENOMEM mentioned in the manual page does not
1737 apply to swprintf, only wprintf and fwprintf.
1738
1739 Official manual page:
1740 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1741
1742 Some conformance statements (AIX, Solaris):
1743 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1744 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1745
1746 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1747 EILSEQ and EINVAL are specifically defined to mean the error is other than
1748 an undersized buffer and no other errno are defined we treat those two
1749 as meaning hard errors and everything else gets the old behavior which
1750 is to keep looping and increasing buffer size until the function succeeds.
1751
1752 In practice it's impossible to determine before compilation which behavior
1753 may be used. The vswprintf function may have vsnprintf-like behavior or
1754 vice-versa. Behavior detected on one release can theoretically change
1755 with an updated release. Not to mention that configure testing for it
1756 would require the test to be run on the host system, not the build system
1757 which makes cross compilation difficult. Therefore, we make no assumptions
1758 about behavior and try our best to handle every known case, including the
1759 case where wxVsnprintf returns a negative number and fails to set errno.
1760
1761 There is yet one more non-standard implementation and that is our own.
1762 Fortunately, that can be detected at compile-time.
1763
1764 On top of all that, ISO C99 explicitly defines snprintf to write a null
1765 character to the last position of the specified buffer. That would be at
1766 at the given buffer size minus 1. It is supposed to do this even if it
1767 turns out that the buffer is sized too small.
1768
1769 Darwin (tested on 10.5) follows the C99 behavior exactly.
1770
1771 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1772 errno even when it fails. However, it only seems to ever fail due
1773 to an undersized buffer.
1774 */
1775 #if wxUSE_UNICODE_UTF8
1776 template<typename BufferType>
1777 #else
1778 // we only need one version in non-UTF8 builds and at least two Windows
1779 // compilers have problems with this function template, so use just one
1780 // normal function here
1781 #endif
1782 static int DoStringPrintfV(wxString& str,
1783 const wxString& format, va_list argptr)
1784 {
1785 int size = 1024;
1786
1787 for ( ;; )
1788 {
1789 #if wxUSE_UNICODE_UTF8
1790 BufferType tmp(str, size + 1);
1791 typename BufferType::CharType *buf = tmp;
1792 #else
1793 wxStringBuffer tmp(str, size + 1);
1794 wxChar *buf = tmp;
1795 #endif
1796
1797 if ( !buf )
1798 {
1799 // out of memory
1800
1801 // in UTF-8 build, leaving uninitialized junk in the buffer
1802 // could result in invalid non-empty UTF-8 string, so just
1803 // reset the string to empty on failure:
1804 buf[0] = '\0';
1805 return -1;
1806 }
1807
1808 // wxVsnprintf() may modify the original arg pointer, so pass it
1809 // only a copy
1810 va_list argptrcopy;
1811 wxVaCopy(argptrcopy, argptr);
1812
1813 #ifndef __WXWINCE__
1814 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1815 errno = 0;
1816 #endif
1817 int len = wxVsnprintf(buf, size, format, argptrcopy);
1818 va_end(argptrcopy);
1819
1820 // some implementations of vsnprintf() don't NUL terminate
1821 // the string if there is not enough space for it so
1822 // always do it manually
1823 // FIXME: This really seems to be the wrong and would be an off-by-one
1824 // bug except the code above allocates an extra character.
1825 buf[size] = _T('\0');
1826
1827 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1828 // total number of characters which would have been written if the
1829 // buffer were large enough (newer standards such as Unix98)
1830 if ( len < 0 )
1831 {
1832 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1833 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1834 // is true if *both* of them use our own implementation,
1835 // otherwise we can't be sure
1836 #if wxUSE_WXVSNPRINTF
1837 // we know that our own implementation of wxVsnprintf() returns -1
1838 // only for a format error - thus there's something wrong with
1839 // the user's format string
1840 buf[0] = '\0';
1841 return -1;
1842 #else // possibly using system version
1843 // assume it only returns error if there is not enough space, but
1844 // as we don't know how much we need, double the current size of
1845 // the buffer
1846 #ifndef __WXWINCE__
1847 if( (errno == EILSEQ) || (errno == EINVAL) )
1848 // If errno was set to one of the two well-known hard errors
1849 // then fail immediately to avoid an infinite loop.
1850 return -1;
1851 else
1852 #endif // __WXWINCE__
1853 // still not enough, as we don't know how much we need, double the
1854 // current size of the buffer
1855 size *= 2;
1856 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1857 }
1858 else if ( len >= size )
1859 {
1860 #if wxUSE_WXVSNPRINTF
1861 // we know that our own implementation of wxVsnprintf() returns
1862 // size+1 when there's not enough space but that's not the size
1863 // of the required buffer!
1864 size *= 2; // so we just double the current size of the buffer
1865 #else
1866 // some vsnprintf() implementations NUL-terminate the buffer and
1867 // some don't in len == size case, to be safe always add 1
1868 // FIXME: I don't quite understand this comment. The vsnprintf
1869 // function is specifically defined to return the number of
1870 // characters printed not including the null terminator.
1871 // So OF COURSE you need to add 1 to get the right buffer size.
1872 // The following line is definitely correct, no question.
1873 size = len + 1;
1874 #endif
1875 }
1876 else // ok, there was enough space
1877 {
1878 break;
1879 }
1880 }
1881
1882 // we could have overshot
1883 str.Shrink();
1884
1885 return str.length();
1886 }
1887
1888 int wxString::PrintfV(const wxString& format, va_list argptr)
1889 {
1890 #if wxUSE_UNICODE_UTF8
1891 #if wxUSE_STL_BASED_WXSTRING
1892 typedef wxStringTypeBuffer<char> Utf8Buffer;
1893 #else
1894 typedef wxStringInternalBuffer Utf8Buffer;
1895 #endif
1896 #endif
1897
1898 #if wxUSE_UTF8_LOCALE_ONLY
1899 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1900 #else
1901 #if wxUSE_UNICODE_UTF8
1902 if ( wxLocaleIsUtf8 )
1903 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1904 else
1905 // wxChar* version
1906 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1907 #else
1908 return DoStringPrintfV(*this, format, argptr);
1909 #endif // UTF8/WCHAR
1910 #endif
1911 }
1912
1913 // ----------------------------------------------------------------------------
1914 // misc other operations
1915 // ----------------------------------------------------------------------------
1916
1917 // returns true if the string matches the pattern which may contain '*' and
1918 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1919 // of them)
1920 bool wxString::Matches(const wxString& mask) const
1921 {
1922 // I disable this code as it doesn't seem to be faster (in fact, it seems
1923 // to be much slower) than the old, hand-written code below and using it
1924 // here requires always linking with libregex even if the user code doesn't
1925 // use it
1926 #if 0 // wxUSE_REGEX
1927 // first translate the shell-like mask into a regex
1928 wxString pattern;
1929 pattern.reserve(wxStrlen(pszMask));
1930
1931 pattern += _T('^');
1932 while ( *pszMask )
1933 {
1934 switch ( *pszMask )
1935 {
1936 case _T('?'):
1937 pattern += _T('.');
1938 break;
1939
1940 case _T('*'):
1941 pattern += _T(".*");
1942 break;
1943
1944 case _T('^'):
1945 case _T('.'):
1946 case _T('$'):
1947 case _T('('):
1948 case _T(')'):
1949 case _T('|'):
1950 case _T('+'):
1951 case _T('\\'):
1952 // these characters are special in a RE, quote them
1953 // (however note that we don't quote '[' and ']' to allow
1954 // using them for Unix shell like matching)
1955 pattern += _T('\\');
1956 // fall through
1957
1958 default:
1959 pattern += *pszMask;
1960 }
1961
1962 pszMask++;
1963 }
1964 pattern += _T('$');
1965
1966 // and now use it
1967 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1968 #else // !wxUSE_REGEX
1969 // TODO: this is, of course, awfully inefficient...
1970
1971 // FIXME-UTF8: implement using iterators, remove #if
1972 #if wxUSE_UNICODE_UTF8
1973 wxWCharBuffer maskBuf = mask.wc_str();
1974 wxWCharBuffer txtBuf = wc_str();
1975 const wxChar *pszMask = maskBuf.data();
1976 const wxChar *pszTxt = txtBuf.data();
1977 #else
1978 const wxChar *pszMask = mask.wx_str();
1979 // the char currently being checked
1980 const wxChar *pszTxt = wx_str();
1981 #endif
1982
1983 // the last location where '*' matched
1984 const wxChar *pszLastStarInText = NULL;
1985 const wxChar *pszLastStarInMask = NULL;
1986
1987 match:
1988 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1989 switch ( *pszMask ) {
1990 case wxT('?'):
1991 if ( *pszTxt == wxT('\0') )
1992 return false;
1993
1994 // pszTxt and pszMask will be incremented in the loop statement
1995
1996 break;
1997
1998 case wxT('*'):
1999 {
2000 // remember where we started to be able to backtrack later
2001 pszLastStarInText = pszTxt;
2002 pszLastStarInMask = pszMask;
2003
2004 // ignore special chars immediately following this one
2005 // (should this be an error?)
2006 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
2007 pszMask++;
2008
2009 // if there is nothing more, match
2010 if ( *pszMask == wxT('\0') )
2011 return true;
2012
2013 // are there any other metacharacters in the mask?
2014 size_t uiLenMask;
2015 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2016
2017 if ( pEndMask != NULL ) {
2018 // we have to match the string between two metachars
2019 uiLenMask = pEndMask - pszMask;
2020 }
2021 else {
2022 // we have to match the remainder of the string
2023 uiLenMask = wxStrlen(pszMask);
2024 }
2025
2026 wxString strToMatch(pszMask, uiLenMask);
2027 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2028 if ( pMatch == NULL )
2029 return false;
2030
2031 // -1 to compensate "++" in the loop
2032 pszTxt = pMatch + uiLenMask - 1;
2033 pszMask += uiLenMask - 1;
2034 }
2035 break;
2036
2037 default:
2038 if ( *pszMask != *pszTxt )
2039 return false;
2040 break;
2041 }
2042 }
2043
2044 // match only if nothing left
2045 if ( *pszTxt == wxT('\0') )
2046 return true;
2047
2048 // if we failed to match, backtrack if we can
2049 if ( pszLastStarInText ) {
2050 pszTxt = pszLastStarInText + 1;
2051 pszMask = pszLastStarInMask;
2052
2053 pszLastStarInText = NULL;
2054
2055 // don't bother resetting pszLastStarInMask, it's unnecessary
2056
2057 goto match;
2058 }
2059
2060 return false;
2061 #endif // wxUSE_REGEX/!wxUSE_REGEX
2062 }
2063
2064 // Count the number of chars
2065 int wxString::Freq(wxUniChar ch) const
2066 {
2067 int count = 0;
2068 for ( const_iterator i = begin(); i != end(); ++i )
2069 {
2070 if ( *i == ch )
2071 count ++;
2072 }
2073 return count;
2074 }
2075
2076 // ----------------------------------------------------------------------------
2077 // wxUTF8StringBuffer
2078 // ----------------------------------------------------------------------------
2079
2080 #if wxUSE_UNICODE_WCHAR
2081 wxUTF8StringBuffer::~wxUTF8StringBuffer()
2082 {
2083 wxMBConvStrictUTF8 conv;
2084 size_t wlen = conv.ToWChar(NULL, 0, m_buf);
2085 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2086
2087 wxStringInternalBuffer wbuf(m_str, wlen);
2088 conv.ToWChar(wbuf, wlen, m_buf);
2089 }
2090
2091 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2092 {
2093 wxCHECK_RET(m_lenSet, "length not set");
2094
2095 wxMBConvStrictUTF8 conv;
2096 size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
2097 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2098
2099 wxStringInternalBufferLength wbuf(m_str, wlen);
2100 conv.ToWChar(wbuf, wlen, m_buf, m_len);
2101 wbuf.SetLength(wlen);
2102 }
2103 #endif // wxUSE_UNICODE_WCHAR
2104
2105 // ----------------------------------------------------------------------------
2106 // wxCharBufferType<T>
2107 // ----------------------------------------------------------------------------
2108
2109 template<>
2110 wxCharTypeBuffer<char>::Data
2111 wxCharTypeBuffer<char>::NullData(NULL);
2112
2113 template<>
2114 wxCharTypeBuffer<wchar_t>::Data
2115 wxCharTypeBuffer<wchar_t>::NullData(NULL);