]> git.saurik.com Git - wxWidgets.git/blob - src/common/string.cpp
85b30905335d5583528b65729888a2060b7f6829
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
16
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #endif
28
29 #include <ctype.h>
30
31 #ifndef __WXWINCE__
32 #include <errno.h>
33 #endif
34
35 #include <string.h>
36 #include <stdlib.h>
37
38 #include "wx/hashmap.h"
39
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
46 #else
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
51 #endif
52
53
54 // ---------------------------------------------------------------------------
55 // static class variables definition
56 // ---------------------------------------------------------------------------
57
58 //According to STL _must_ be a -1 size_t
59 const size_t wxString::npos = (size_t) -1;
60
61 #if wxUSE_STRING_POS_CACHE
62
63 // gdb seems to be unable to display thread-local variables correctly, at least
64 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
65 #ifdef __WXDEBUG__
66
67 struct wxStrCacheDumper
68 {
69 static void ShowAll()
70 {
71 puts("*** wxString cache dump:");
72 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
73 {
74 const wxString::Cache::Element&
75 c = wxString::GetCacheBegin()[n];
76
77 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
78 n,
79 n == wxString::LastUsedCacheElement() ? " [*]" : "",
80 c.str,
81 (unsigned long)c.pos,
82 (unsigned long)c.impl,
83 (long)c.len);
84 }
85 }
86 };
87
88 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
89
90 #endif // __WXDEBUG__
91
92 #ifdef wxPROFILE_STRING_CACHE
93
94 wxString::CacheStats wxString::ms_cacheStats;
95
96 namespace
97 {
98
99 struct ShowCacheStats
100 {
101 ~ShowCacheStats()
102 {
103 const wxString::CacheStats& stats = wxString::ms_cacheStats;
104
105 if ( stats.postot )
106 {
107 puts("*** wxString cache statistics:");
108 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
109 stats.postot);
110 printf("\tHits %u (of which %u not used) or %.2f%%\n",
111 stats.poshits,
112 stats.mishits,
113 100.*float(stats.poshits - stats.mishits)/stats.postot);
114 printf("\tAverage position requested: %.2f\n",
115 float(stats.sumpos) / stats.postot);
116 printf("\tAverage offset after cached hint: %.2f\n",
117 float(stats.sumofs) / stats.postot);
118 }
119
120 if ( stats.lentot )
121 {
122 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
123 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
124 }
125 }
126 } s_showCacheStats;
127
128 } // anonymous namespace
129
130 #endif // wxPROFILE_STRING_CACHE
131
132 #endif // wxUSE_STRING_POS_CACHE
133
134 // ----------------------------------------------------------------------------
135 // global functions
136 // ----------------------------------------------------------------------------
137
138 #if wxUSE_STD_IOSTREAM
139
140 #include <iostream>
141
142 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
143 {
144 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
145 return os << (const char *)str.AsCharBuf();
146 #else
147 return os << str.AsInternal();
148 #endif
149 }
150
151 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
152 {
153 return os << str.c_str();
154 }
155
156 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
157 {
158 return os << str.data();
159 }
160
161 #ifndef __BORLANDC__
162 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
163 {
164 return os << str.data();
165 }
166 #endif
167
168 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
169
170 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
171 {
172 return wos << str.wc_str();
173 }
174
175 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
176 {
177 return wos << str.AsWChar();
178 }
179
180 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
181 {
182 return wos << str.data();
183 }
184
185 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
186
187 #endif // wxUSE_STD_IOSTREAM
188
189 // ===========================================================================
190 // wxString class core
191 // ===========================================================================
192
193 #if wxUSE_UNICODE_UTF8
194
195 void wxString::PosLenToImpl(size_t pos, size_t len,
196 size_t *implPos, size_t *implLen) const
197 {
198 if ( pos == npos )
199 {
200 *implPos = npos;
201 }
202 else // have valid start position
203 {
204 const const_iterator b = GetIterForNthChar(pos);
205 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
206 if ( len == npos )
207 {
208 *implLen = npos;
209 }
210 else // have valid length too
211 {
212 // we need to handle the case of length specifying a substring
213 // going beyond the end of the string, just as std::string does
214 const const_iterator e(end());
215 const_iterator i(b);
216 while ( len && i <= e )
217 {
218 ++i;
219 --len;
220 }
221
222 *implLen = i.impl() - b.impl();
223 }
224 }
225 }
226
227 #endif // wxUSE_UNICODE_UTF8
228
229 // ----------------------------------------------------------------------------
230 // wxCStrData converted strings caching
231 // ----------------------------------------------------------------------------
232
233 // FIXME-UTF8: temporarily disabled because it doesn't work with global
234 // string objects; re-enable after fixing this bug and benchmarking
235 // performance to see if using a hash is a good idea at all
236 #if 0
237
238 // For backward compatibility reasons, it must be possible to assign the value
239 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
240 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
241 // because the memory would be freed immediately, but it has to be valid as long
242 // as the string is not modified, so that code like this still works:
243 //
244 // const wxChar *s = str.c_str();
245 // while ( s ) { ... }
246
247 // FIXME-UTF8: not thread safe!
248 // FIXME-UTF8: we currently clear the cached conversion only when the string is
249 // destroyed, but we should do it when the string is modified, to
250 // keep memory usage down
251 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
252 // invalidated the cache on every change, we could keep the previous
253 // conversion
254 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
255 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
256
257 template<typename T>
258 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
259 {
260 typename T::iterator i = hash.find(wxConstCast(s, wxString));
261 if ( i != hash.end() )
262 {
263 free(i->second);
264 hash.erase(i);
265 }
266 }
267
268 #if wxUSE_UNICODE
269 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
270 // so we have to use wxString* here and const-cast when used
271 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
272 wxStringCharConversionCache);
273 static wxStringCharConversionCache gs_stringsCharCache;
274
275 const char* wxCStrData::AsChar() const
276 {
277 // remove previously cache value, if any (see FIXMEs above):
278 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
279
280 // convert the string and keep it:
281 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
282 m_str->mb_str().release();
283
284 return s + m_offset;
285 }
286 #endif // wxUSE_UNICODE
287
288 #if !wxUSE_UNICODE_WCHAR
289 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
290 wxStringWCharConversionCache);
291 static wxStringWCharConversionCache gs_stringsWCharCache;
292
293 const wchar_t* wxCStrData::AsWChar() const
294 {
295 // remove previously cache value, if any (see FIXMEs above):
296 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
297
298 // convert the string and keep it:
299 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
300 m_str->wc_str().release();
301
302 return s + m_offset;
303 }
304 #endif // !wxUSE_UNICODE_WCHAR
305
306 wxString::~wxString()
307 {
308 #if wxUSE_UNICODE
309 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
310 DeleteStringFromConversionCache(gs_stringsCharCache, this);
311 #endif
312 #if !wxUSE_UNICODE_WCHAR
313 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
314 #endif
315 }
316 #endif
317
318 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
319 const char* wxCStrData::AsChar() const
320 {
321 #if wxUSE_UNICODE_UTF8
322 if ( wxLocaleIsUtf8 )
323 return AsInternal();
324 #endif
325 // under non-UTF8 locales, we have to convert the internal UTF-8
326 // representation using wxConvLibc and cache the result
327
328 wxString *str = wxConstCast(m_str, wxString);
329
330 // convert the string:
331 //
332 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
333 // have it) but it's unfortunately not obvious to implement
334 // because we don't know how big buffer do we need for the
335 // given string length (in case of multibyte encodings, e.g.
336 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
337 //
338 // One idea would be to store more than just m_convertedToChar
339 // in wxString: then we could record the length of the string
340 // which was converted the last time and try to reuse the same
341 // buffer if the current length is not greater than it (this
342 // could still fail because string could have been modified in
343 // place but it would work most of the time, so we'd do it and
344 // only allocate the new buffer if in-place conversion returned
345 // an error). We could also store a bit saying if the string
346 // was modified since the last conversion (and update it in all
347 // operation modifying the string, of course) to avoid unneeded
348 // consequential conversions. But both of these ideas require
349 // adding more fields to wxString and require profiling results
350 // to be sure that we really gain enough from them to justify
351 // doing it.
352 wxCharBuffer buf(str->mb_str());
353
354 // if it failed, return empty string and not NULL to avoid crashes in code
355 // written with either wxWidgets 2 wxString or std::string behaviour in
356 // mind: neither of them ever returns NULL and so we shouldn't neither
357 if ( !buf )
358 return "";
359
360 if ( str->m_convertedToChar &&
361 strlen(buf) == strlen(str->m_convertedToChar) )
362 {
363 // keep the same buffer for as long as possible, so that several calls
364 // to c_str() in a row still work:
365 strcpy(str->m_convertedToChar, buf);
366 }
367 else
368 {
369 str->m_convertedToChar = buf.release();
370 }
371
372 // and keep it:
373 return str->m_convertedToChar + m_offset;
374 }
375 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
376
377 #if !wxUSE_UNICODE_WCHAR
378 const wchar_t* wxCStrData::AsWChar() const
379 {
380 wxString *str = wxConstCast(m_str, wxString);
381
382 // convert the string:
383 wxWCharBuffer buf(str->wc_str());
384
385 // notice that here, unlike above in AsChar(), conversion can't fail as our
386 // internal UTF-8 is always well-formed -- or the string was corrupted and
387 // all bets are off anyhow
388
389 // FIXME-UTF8: do the conversion in-place in the existing buffer
390 if ( str->m_convertedToWChar &&
391 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
392 {
393 // keep the same buffer for as long as possible, so that several calls
394 // to c_str() in a row still work:
395 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
396 }
397 else
398 {
399 str->m_convertedToWChar = buf.release();
400 }
401
402 // and keep it:
403 return str->m_convertedToWChar + m_offset;
404 }
405 #endif // !wxUSE_UNICODE_WCHAR
406
407 // ===========================================================================
408 // wxString class core
409 // ===========================================================================
410
411 // ---------------------------------------------------------------------------
412 // construction and conversion
413 // ---------------------------------------------------------------------------
414
415 #if wxUSE_UNICODE_WCHAR
416 /* static */
417 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
418 const wxMBConv& conv)
419 {
420 // anything to do?
421 if ( !psz || nLength == 0 )
422 return SubstrBufFromMB(L"", 0);
423
424 if ( nLength == npos )
425 nLength = wxNO_LEN;
426
427 size_t wcLen;
428 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
429 if ( !wcLen )
430 return SubstrBufFromMB(_T(""), 0);
431 else
432 return SubstrBufFromMB(wcBuf, wcLen);
433 }
434 #endif // wxUSE_UNICODE_WCHAR
435
436 #if wxUSE_UNICODE_UTF8
437 /* static */
438 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
439 const wxMBConv& conv)
440 {
441 // anything to do?
442 if ( !psz || nLength == 0 )
443 return SubstrBufFromMB("", 0);
444
445 // if psz is already in UTF-8, we don't have to do the roundtrip to
446 // wchar_t* and back:
447 if ( conv.IsUTF8() )
448 {
449 // we need to validate the input because UTF8 iterators assume valid
450 // UTF-8 sequence and psz may be invalid:
451 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
452 {
453 // we must pass the real string length to SubstrBufFromMB ctor
454 if ( nLength == npos )
455 nLength = psz ? strlen(psz) : 0;
456 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
457 }
458 // else: do the roundtrip through wchar_t*
459 }
460
461 if ( nLength == npos )
462 nLength = wxNO_LEN;
463
464 // first convert to wide string:
465 size_t wcLen;
466 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
467 if ( !wcLen )
468 return SubstrBufFromMB("", 0);
469
470 // and then to UTF-8:
471 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
472 // widechar -> UTF-8 conversion isn't supposed to ever fail:
473 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
474
475 return buf;
476 }
477 #endif // wxUSE_UNICODE_UTF8
478
479 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
480 /* static */
481 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
482 const wxMBConv& conv)
483 {
484 // anything to do?
485 if ( !pwz || nLength == 0 )
486 return SubstrBufFromWC("", 0);
487
488 if ( nLength == npos )
489 nLength = wxNO_LEN;
490
491 size_t mbLen;
492 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
493 if ( !mbLen )
494 return SubstrBufFromWC("", 0);
495 else
496 return SubstrBufFromWC(mbBuf, mbLen);
497 }
498 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
499
500
501 #if wxUSE_UNICODE_WCHAR
502
503 //Convert wxString in Unicode mode to a multi-byte string
504 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
505 {
506 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
507 }
508
509 #elif wxUSE_UNICODE_UTF8
510
511 const wxWCharBuffer wxString::wc_str() const
512 {
513 return wxMBConvStrictUTF8().cMB2WC
514 (
515 m_impl.c_str(),
516 m_impl.length() + 1, // size, not length
517 NULL
518 );
519 }
520
521 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
522 {
523 if ( conv.IsUTF8() )
524 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
525
526 // FIXME-UTF8: use wc_str() here once we have buffers with length
527
528 size_t wcLen;
529 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
530 (
531 m_impl.c_str(),
532 m_impl.length() + 1, // size
533 &wcLen
534 ));
535 if ( !wcLen )
536 return wxCharBuffer("");
537
538 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
539 }
540
541 #else // ANSI
542
543 //Converts this string to a wide character string if unicode
544 //mode is not enabled and wxUSE_WCHAR_T is enabled
545 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
546 {
547 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
548 }
549
550 #endif // Unicode/ANSI
551
552 // shrink to minimal size (releasing extra memory)
553 bool wxString::Shrink()
554 {
555 wxString tmp(begin(), end());
556 swap(tmp);
557 return tmp.length() == length();
558 }
559
560 // deprecated compatibility code:
561 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
562 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
563 {
564 return DoGetWriteBuf(nLen);
565 }
566
567 void wxString::UngetWriteBuf()
568 {
569 DoUngetWriteBuf();
570 }
571
572 void wxString::UngetWriteBuf(size_t nLen)
573 {
574 DoUngetWriteBuf(nLen);
575 }
576 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
577
578
579 // ---------------------------------------------------------------------------
580 // data access
581 // ---------------------------------------------------------------------------
582
583 // all functions are inline in string.h
584
585 // ---------------------------------------------------------------------------
586 // concatenation operators
587 // ---------------------------------------------------------------------------
588
589 /*
590 * concatenation functions come in 5 flavours:
591 * string + string
592 * char + string and string + char
593 * C str + string and string + C str
594 */
595
596 wxString operator+(const wxString& str1, const wxString& str2)
597 {
598 #if !wxUSE_STL_BASED_WXSTRING
599 wxASSERT( str1.IsValid() );
600 wxASSERT( str2.IsValid() );
601 #endif
602
603 wxString s = str1;
604 s += str2;
605
606 return s;
607 }
608
609 wxString operator+(const wxString& str, wxUniChar ch)
610 {
611 #if !wxUSE_STL_BASED_WXSTRING
612 wxASSERT( str.IsValid() );
613 #endif
614
615 wxString s = str;
616 s += ch;
617
618 return s;
619 }
620
621 wxString operator+(wxUniChar ch, const wxString& str)
622 {
623 #if !wxUSE_STL_BASED_WXSTRING
624 wxASSERT( str.IsValid() );
625 #endif
626
627 wxString s = ch;
628 s += str;
629
630 return s;
631 }
632
633 wxString operator+(const wxString& str, const char *psz)
634 {
635 #if !wxUSE_STL_BASED_WXSTRING
636 wxASSERT( str.IsValid() );
637 #endif
638
639 wxString s;
640 if ( !s.Alloc(strlen(psz) + str.length()) ) {
641 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
642 }
643 s += str;
644 s += psz;
645
646 return s;
647 }
648
649 wxString operator+(const wxString& str, const wchar_t *pwz)
650 {
651 #if !wxUSE_STL_BASED_WXSTRING
652 wxASSERT( str.IsValid() );
653 #endif
654
655 wxString s;
656 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
657 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
658 }
659 s += str;
660 s += pwz;
661
662 return s;
663 }
664
665 wxString operator+(const char *psz, const wxString& str)
666 {
667 #if !wxUSE_STL_BASED_WXSTRING
668 wxASSERT( str.IsValid() );
669 #endif
670
671 wxString s;
672 if ( !s.Alloc(strlen(psz) + str.length()) ) {
673 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
674 }
675 s = psz;
676 s += str;
677
678 return s;
679 }
680
681 wxString operator+(const wchar_t *pwz, const wxString& str)
682 {
683 #if !wxUSE_STL_BASED_WXSTRING
684 wxASSERT( str.IsValid() );
685 #endif
686
687 wxString s;
688 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
689 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
690 }
691 s = pwz;
692 s += str;
693
694 return s;
695 }
696
697 // ---------------------------------------------------------------------------
698 // string comparison
699 // ---------------------------------------------------------------------------
700
701 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
702 {
703 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
704 : wxToupper(GetChar(0u)) == wxToupper(c));
705 }
706
707 #ifdef HAVE_STD_STRING_COMPARE
708
709 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
710 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
711 // sort strings in characters code point order by sorting the byte sequence
712 // in byte values order (i.e. what strcmp() and memcmp() do).
713
714 int wxString::compare(const wxString& str) const
715 {
716 return m_impl.compare(str.m_impl);
717 }
718
719 int wxString::compare(size_t nStart, size_t nLen,
720 const wxString& str) const
721 {
722 size_t pos, len;
723 PosLenToImpl(nStart, nLen, &pos, &len);
724 return m_impl.compare(pos, len, str.m_impl);
725 }
726
727 int wxString::compare(size_t nStart, size_t nLen,
728 const wxString& str,
729 size_t nStart2, size_t nLen2) const
730 {
731 size_t pos, len;
732 PosLenToImpl(nStart, nLen, &pos, &len);
733
734 size_t pos2, len2;
735 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
736
737 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
738 }
739
740 int wxString::compare(const char* sz) const
741 {
742 return m_impl.compare(ImplStr(sz));
743 }
744
745 int wxString::compare(const wchar_t* sz) const
746 {
747 return m_impl.compare(ImplStr(sz));
748 }
749
750 int wxString::compare(size_t nStart, size_t nLen,
751 const char* sz, size_t nCount) const
752 {
753 size_t pos, len;
754 PosLenToImpl(nStart, nLen, &pos, &len);
755
756 SubstrBufFromMB str(ImplStr(sz, nCount));
757
758 return m_impl.compare(pos, len, str.data, str.len);
759 }
760
761 int wxString::compare(size_t nStart, size_t nLen,
762 const wchar_t* sz, size_t nCount) const
763 {
764 size_t pos, len;
765 PosLenToImpl(nStart, nLen, &pos, &len);
766
767 SubstrBufFromWC str(ImplStr(sz, nCount));
768
769 return m_impl.compare(pos, len, str.data, str.len);
770 }
771
772 #else // !HAVE_STD_STRING_COMPARE
773
774 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
775 const wxStringCharType* s2, size_t l2)
776 {
777 if( l1 == l2 )
778 return wxStringMemcmp(s1, s2, l1);
779 else if( l1 < l2 )
780 {
781 int ret = wxStringMemcmp(s1, s2, l1);
782 return ret == 0 ? -1 : ret;
783 }
784 else
785 {
786 int ret = wxStringMemcmp(s1, s2, l2);
787 return ret == 0 ? +1 : ret;
788 }
789 }
790
791 int wxString::compare(const wxString& str) const
792 {
793 return ::wxDoCmp(m_impl.data(), m_impl.length(),
794 str.m_impl.data(), str.m_impl.length());
795 }
796
797 int wxString::compare(size_t nStart, size_t nLen,
798 const wxString& str) const
799 {
800 wxASSERT(nStart <= length());
801 size_type strLen = length() - nStart;
802 nLen = strLen < nLen ? strLen : nLen;
803
804 size_t pos, len;
805 PosLenToImpl(nStart, nLen, &pos, &len);
806
807 return ::wxDoCmp(m_impl.data() + pos, len,
808 str.m_impl.data(), str.m_impl.length());
809 }
810
811 int wxString::compare(size_t nStart, size_t nLen,
812 const wxString& str,
813 size_t nStart2, size_t nLen2) const
814 {
815 wxASSERT(nStart <= length());
816 wxASSERT(nStart2 <= str.length());
817 size_type strLen = length() - nStart,
818 strLen2 = str.length() - nStart2;
819 nLen = strLen < nLen ? strLen : nLen;
820 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
821
822 size_t pos, len;
823 PosLenToImpl(nStart, nLen, &pos, &len);
824 size_t pos2, len2;
825 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
826
827 return ::wxDoCmp(m_impl.data() + pos, len,
828 str.m_impl.data() + pos2, len2);
829 }
830
831 int wxString::compare(const char* sz) const
832 {
833 SubstrBufFromMB str(ImplStr(sz, npos));
834 if ( str.len == npos )
835 str.len = wxStringStrlen(str.data);
836 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
837 }
838
839 int wxString::compare(const wchar_t* sz) const
840 {
841 SubstrBufFromWC str(ImplStr(sz, npos));
842 if ( str.len == npos )
843 str.len = wxStringStrlen(str.data);
844 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
845 }
846
847 int wxString::compare(size_t nStart, size_t nLen,
848 const char* sz, size_t nCount) const
849 {
850 wxASSERT(nStart <= length());
851 size_type strLen = length() - nStart;
852 nLen = strLen < nLen ? strLen : nLen;
853
854 size_t pos, len;
855 PosLenToImpl(nStart, nLen, &pos, &len);
856
857 SubstrBufFromMB str(ImplStr(sz, nCount));
858 if ( str.len == npos )
859 str.len = wxStringStrlen(str.data);
860
861 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
862 }
863
864 int wxString::compare(size_t nStart, size_t nLen,
865 const wchar_t* sz, size_t nCount) const
866 {
867 wxASSERT(nStart <= length());
868 size_type strLen = length() - nStart;
869 nLen = strLen < nLen ? strLen : nLen;
870
871 size_t pos, len;
872 PosLenToImpl(nStart, nLen, &pos, &len);
873
874 SubstrBufFromWC str(ImplStr(sz, nCount));
875 if ( str.len == npos )
876 str.len = wxStringStrlen(str.data);
877
878 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
879 }
880
881 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
882
883
884 // ---------------------------------------------------------------------------
885 // find_{first,last}_[not]_of functions
886 // ---------------------------------------------------------------------------
887
888 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
889
890 // NB: All these functions are implemented with the argument being wxChar*,
891 // i.e. widechar string in any Unicode build, even though native string
892 // representation is char* in the UTF-8 build. This is because we couldn't
893 // use memchr() to determine if a character is in a set encoded as UTF-8.
894
895 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
896 {
897 return find_first_of(sz, nStart, wxStrlen(sz));
898 }
899
900 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
901 {
902 return find_first_not_of(sz, nStart, wxStrlen(sz));
903 }
904
905 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
906 {
907 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
908
909 size_t idx = nStart;
910 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
911 {
912 if ( wxTmemchr(sz, *i, n) )
913 return idx;
914 }
915
916 return npos;
917 }
918
919 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
920 {
921 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
922
923 size_t idx = nStart;
924 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
925 {
926 if ( !wxTmemchr(sz, *i, n) )
927 return idx;
928 }
929
930 return npos;
931 }
932
933
934 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
935 {
936 return find_last_of(sz, nStart, wxStrlen(sz));
937 }
938
939 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
940 {
941 return find_last_not_of(sz, nStart, wxStrlen(sz));
942 }
943
944 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
945 {
946 size_t len = length();
947
948 if ( nStart == npos )
949 {
950 nStart = len - 1;
951 }
952 else
953 {
954 wxASSERT_MSG( nStart <= len, _T("invalid index") );
955 }
956
957 size_t idx = nStart;
958 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
959 i != rend(); --idx, ++i )
960 {
961 if ( wxTmemchr(sz, *i, n) )
962 return idx;
963 }
964
965 return npos;
966 }
967
968 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
969 {
970 size_t len = length();
971
972 if ( nStart == npos )
973 {
974 nStart = len - 1;
975 }
976 else
977 {
978 wxASSERT_MSG( nStart <= len, _T("invalid index") );
979 }
980
981 size_t idx = nStart;
982 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
983 i != rend(); --idx, ++i )
984 {
985 if ( !wxTmemchr(sz, *i, n) )
986 return idx;
987 }
988
989 return npos;
990 }
991
992 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
993 {
994 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
995
996 size_t idx = nStart;
997 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
998 {
999 if ( *i != ch )
1000 return idx;
1001 }
1002
1003 return npos;
1004 }
1005
1006 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1007 {
1008 size_t len = length();
1009
1010 if ( nStart == npos )
1011 {
1012 nStart = len - 1;
1013 }
1014 else
1015 {
1016 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1017 }
1018
1019 size_t idx = nStart;
1020 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1021 i != rend(); --idx, ++i )
1022 {
1023 if ( *i != ch )
1024 return idx;
1025 }
1026
1027 return npos;
1028 }
1029
1030 // the functions above were implemented for wchar_t* arguments in Unicode
1031 // build and char* in ANSI build; below are implementations for the other
1032 // version:
1033 #if wxUSE_UNICODE
1034 #define wxOtherCharType char
1035 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1036 #else
1037 #define wxOtherCharType wchar_t
1038 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1039 #endif
1040
1041 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1042 { return find_first_of(STRCONV(sz), nStart); }
1043
1044 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1045 size_t n) const
1046 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1047 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1048 { return find_last_of(STRCONV(sz), nStart); }
1049 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1050 size_t n) const
1051 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1052 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1053 { return find_first_not_of(STRCONV(sz), nStart); }
1054 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1055 size_t n) const
1056 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1057 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1058 { return find_last_not_of(STRCONV(sz), nStart); }
1059 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1060 size_t n) const
1061 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1062
1063 #undef wxOtherCharType
1064 #undef STRCONV
1065
1066 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1067
1068 // ===========================================================================
1069 // other common string functions
1070 // ===========================================================================
1071
1072 int wxString::CmpNoCase(const wxString& s) const
1073 {
1074 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1075
1076 const_iterator i1 = begin();
1077 const_iterator end1 = end();
1078 const_iterator i2 = s.begin();
1079 const_iterator end2 = s.end();
1080
1081 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1082 {
1083 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1084 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1085 if ( lower1 != lower2 )
1086 return lower1 < lower2 ? -1 : 1;
1087 }
1088
1089 size_t len1 = length();
1090 size_t len2 = s.length();
1091
1092 if ( len1 < len2 )
1093 return -1;
1094 else if ( len1 > len2 )
1095 return 1;
1096 return 0;
1097 }
1098
1099
1100 #if wxUSE_UNICODE
1101
1102 #ifdef __MWERKS__
1103 #ifndef __SCHAR_MAX__
1104 #define __SCHAR_MAX__ 127
1105 #endif
1106 #endif
1107
1108 wxString wxString::FromAscii(const char *ascii, size_t len)
1109 {
1110 if (!ascii || len == 0)
1111 return wxEmptyString;
1112
1113 wxString res;
1114
1115 {
1116 wxStringInternalBuffer buf(res, len);
1117 wxStringCharType *dest = buf;
1118
1119 for ( ; len > 0; --len )
1120 {
1121 unsigned char c = (unsigned char)*ascii++;
1122 wxASSERT_MSG( c < 0x80,
1123 _T("Non-ASCII value passed to FromAscii().") );
1124
1125 *dest++ = (wchar_t)c;
1126 }
1127 }
1128
1129 return res;
1130 }
1131
1132 wxString wxString::FromAscii(const char *ascii)
1133 {
1134 return FromAscii(ascii, wxStrlen(ascii));
1135 }
1136
1137 wxString wxString::FromAscii(char ascii)
1138 {
1139 // What do we do with '\0' ?
1140
1141 unsigned char c = (unsigned char)ascii;
1142
1143 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1144
1145 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1146 return wxString(wxUniChar((wchar_t)c));
1147 }
1148
1149 const wxCharBuffer wxString::ToAscii() const
1150 {
1151 // this will allocate enough space for the terminating NUL too
1152 wxCharBuffer buffer(length());
1153 char *dest = buffer.data();
1154
1155 for ( const_iterator i = begin(); i != end(); ++i )
1156 {
1157 wxUniChar c(*i);
1158 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1159 *dest++ = c.IsAscii() ? (char)c : '_';
1160
1161 // the output string can't have embedded NULs anyhow, so we can safely
1162 // stop at first of them even if we do have any
1163 if ( !c )
1164 break;
1165 }
1166
1167 return buffer;
1168 }
1169
1170 #endif // wxUSE_UNICODE
1171
1172 // extract string of length nCount starting at nFirst
1173 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1174 {
1175 size_t nLen = length();
1176
1177 // default value of nCount is npos and means "till the end"
1178 if ( nCount == npos )
1179 {
1180 nCount = nLen - nFirst;
1181 }
1182
1183 // out-of-bounds requests return sensible things
1184 if ( nFirst + nCount > nLen )
1185 {
1186 nCount = nLen - nFirst;
1187 }
1188
1189 if ( nFirst > nLen )
1190 {
1191 // AllocCopy() will return empty string
1192 return wxEmptyString;
1193 }
1194
1195 wxString dest(*this, nFirst, nCount);
1196 if ( dest.length() != nCount )
1197 {
1198 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1199 }
1200
1201 return dest;
1202 }
1203
1204 // check that the string starts with prefix and return the rest of the string
1205 // in the provided pointer if it is not NULL, otherwise return false
1206 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1207 {
1208 if ( compare(0, prefix.length(), prefix) != 0 )
1209 return false;
1210
1211 if ( rest )
1212 {
1213 // put the rest of the string into provided pointer
1214 rest->assign(*this, prefix.length(), npos);
1215 }
1216
1217 return true;
1218 }
1219
1220
1221 // check that the string ends with suffix and return the rest of it in the
1222 // provided pointer if it is not NULL, otherwise return false
1223 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1224 {
1225 int start = length() - suffix.length();
1226
1227 if ( start < 0 || compare(start, npos, suffix) != 0 )
1228 return false;
1229
1230 if ( rest )
1231 {
1232 // put the rest of the string into provided pointer
1233 rest->assign(*this, 0, start);
1234 }
1235
1236 return true;
1237 }
1238
1239
1240 // extract nCount last (rightmost) characters
1241 wxString wxString::Right(size_t nCount) const
1242 {
1243 if ( nCount > length() )
1244 nCount = length();
1245
1246 wxString dest(*this, length() - nCount, nCount);
1247 if ( dest.length() != nCount ) {
1248 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1249 }
1250 return dest;
1251 }
1252
1253 // get all characters after the last occurence of ch
1254 // (returns the whole string if ch not found)
1255 wxString wxString::AfterLast(wxUniChar ch) const
1256 {
1257 wxString str;
1258 int iPos = Find(ch, true);
1259 if ( iPos == wxNOT_FOUND )
1260 str = *this;
1261 else
1262 str = wx_str() + iPos + 1;
1263
1264 return str;
1265 }
1266
1267 // extract nCount first (leftmost) characters
1268 wxString wxString::Left(size_t nCount) const
1269 {
1270 if ( nCount > length() )
1271 nCount = length();
1272
1273 wxString dest(*this, 0, nCount);
1274 if ( dest.length() != nCount ) {
1275 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1276 }
1277 return dest;
1278 }
1279
1280 // get all characters before the first occurence of ch
1281 // (returns the whole string if ch not found)
1282 wxString wxString::BeforeFirst(wxUniChar ch) const
1283 {
1284 int iPos = Find(ch);
1285 if ( iPos == wxNOT_FOUND ) iPos = length();
1286 return wxString(*this, 0, iPos);
1287 }
1288
1289 /// get all characters before the last occurence of ch
1290 /// (returns empty string if ch not found)
1291 wxString wxString::BeforeLast(wxUniChar ch) const
1292 {
1293 wxString str;
1294 int iPos = Find(ch, true);
1295 if ( iPos != wxNOT_FOUND && iPos != 0 )
1296 str = wxString(c_str(), iPos);
1297
1298 return str;
1299 }
1300
1301 /// get all characters after the first occurence of ch
1302 /// (returns empty string if ch not found)
1303 wxString wxString::AfterFirst(wxUniChar ch) const
1304 {
1305 wxString str;
1306 int iPos = Find(ch);
1307 if ( iPos != wxNOT_FOUND )
1308 str = wx_str() + iPos + 1;
1309
1310 return str;
1311 }
1312
1313 // replace first (or all) occurences of some substring with another one
1314 size_t wxString::Replace(const wxString& strOld,
1315 const wxString& strNew, bool bReplaceAll)
1316 {
1317 // if we tried to replace an empty string we'd enter an infinite loop below
1318 wxCHECK_MSG( !strOld.empty(), 0,
1319 _T("wxString::Replace(): invalid parameter") );
1320
1321 wxSTRING_INVALIDATE_CACHE();
1322
1323 size_t uiCount = 0; // count of replacements made
1324
1325 // optimize the special common case: replacement of one character by
1326 // another one (in UTF-8 case we can only do this for ASCII characters)
1327 //
1328 // benchmarks show that this special version is around 3 times faster
1329 // (depending on the proportion of matching characters and UTF-8/wchar_t
1330 // build)
1331 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1332 {
1333 const wxStringCharType chOld = strOld.m_impl[0],
1334 chNew = strNew.m_impl[0];
1335
1336 // this loop is the simplified version of the one below
1337 for ( size_t pos = 0; ; )
1338 {
1339 pos = m_impl.find(chOld, pos);
1340 if ( pos == npos )
1341 break;
1342
1343 m_impl[pos++] = chNew;
1344
1345 uiCount++;
1346
1347 if ( !bReplaceAll )
1348 break;
1349 }
1350 }
1351 else // general case
1352 {
1353 const size_t uiOldLen = strOld.m_impl.length();
1354 const size_t uiNewLen = strNew.m_impl.length();
1355
1356 for ( size_t pos = 0; ; )
1357 {
1358 pos = m_impl.find(strOld.m_impl, pos);
1359 if ( pos == npos )
1360 break;
1361
1362 // replace this occurrence of the old string with the new one
1363 m_impl.replace(pos, uiOldLen, strNew.m_impl);
1364
1365 // move up pos past the string that was replaced
1366 pos += uiNewLen;
1367
1368 // increase replace count
1369 uiCount++;
1370
1371 // stop after the first one?
1372 if ( !bReplaceAll )
1373 break;
1374 }
1375 }
1376
1377 return uiCount;
1378 }
1379
1380 bool wxString::IsAscii() const
1381 {
1382 for ( const_iterator i = begin(); i != end(); ++i )
1383 {
1384 if ( !(*i).IsAscii() )
1385 return false;
1386 }
1387
1388 return true;
1389 }
1390
1391 bool wxString::IsWord() const
1392 {
1393 for ( const_iterator i = begin(); i != end(); ++i )
1394 {
1395 if ( !wxIsalpha(*i) )
1396 return false;
1397 }
1398
1399 return true;
1400 }
1401
1402 bool wxString::IsNumber() const
1403 {
1404 if ( empty() )
1405 return true;
1406
1407 const_iterator i = begin();
1408
1409 if ( *i == _T('-') || *i == _T('+') )
1410 ++i;
1411
1412 for ( ; i != end(); ++i )
1413 {
1414 if ( !wxIsdigit(*i) )
1415 return false;
1416 }
1417
1418 return true;
1419 }
1420
1421 wxString wxString::Strip(stripType w) const
1422 {
1423 wxString s = *this;
1424 if ( w & leading ) s.Trim(false);
1425 if ( w & trailing ) s.Trim(true);
1426 return s;
1427 }
1428
1429 // ---------------------------------------------------------------------------
1430 // case conversion
1431 // ---------------------------------------------------------------------------
1432
1433 wxString& wxString::MakeUpper()
1434 {
1435 for ( iterator it = begin(), en = end(); it != en; ++it )
1436 *it = (wxChar)wxToupper(*it);
1437
1438 return *this;
1439 }
1440
1441 wxString& wxString::MakeLower()
1442 {
1443 for ( iterator it = begin(), en = end(); it != en; ++it )
1444 *it = (wxChar)wxTolower(*it);
1445
1446 return *this;
1447 }
1448
1449 wxString& wxString::MakeCapitalized()
1450 {
1451 const iterator en = end();
1452 iterator it = begin();
1453 if ( it != en )
1454 {
1455 *it = (wxChar)wxToupper(*it);
1456 for ( ++it; it != en; ++it )
1457 *it = (wxChar)wxTolower(*it);
1458 }
1459
1460 return *this;
1461 }
1462
1463 // ---------------------------------------------------------------------------
1464 // trimming and padding
1465 // ---------------------------------------------------------------------------
1466
1467 // some compilers (VC++ 6.0 not to name them) return true for a call to
1468 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1469 // to live with this by checking that the character is a 7 bit one - even if
1470 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1471 // space-like symbols somewhere except in the first 128 chars), it is arguably
1472 // still better than trimming away accented letters
1473 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1474
1475 // trims spaces (in the sense of isspace) from left or right side
1476 wxString& wxString::Trim(bool bFromRight)
1477 {
1478 // first check if we're going to modify the string at all
1479 if ( !empty() &&
1480 (
1481 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1482 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1483 )
1484 )
1485 {
1486 if ( bFromRight )
1487 {
1488 // find last non-space character
1489 reverse_iterator psz = rbegin();
1490 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1491 ++psz;
1492
1493 // truncate at trailing space start
1494 erase(psz.base(), end());
1495 }
1496 else
1497 {
1498 // find first non-space character
1499 iterator psz = begin();
1500 while ( (psz != end()) && wxSafeIsspace(*psz) )
1501 ++psz;
1502
1503 // fix up data and length
1504 erase(begin(), psz);
1505 }
1506 }
1507
1508 return *this;
1509 }
1510
1511 // adds nCount characters chPad to the string from either side
1512 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1513 {
1514 wxString s(chPad, nCount);
1515
1516 if ( bFromRight )
1517 *this += s;
1518 else
1519 {
1520 s += *this;
1521 swap(s);
1522 }
1523
1524 return *this;
1525 }
1526
1527 // truncate the string
1528 wxString& wxString::Truncate(size_t uiLen)
1529 {
1530 if ( uiLen < length() )
1531 {
1532 erase(begin() + uiLen, end());
1533 }
1534 //else: nothing to do, string is already short enough
1535
1536 return *this;
1537 }
1538
1539 // ---------------------------------------------------------------------------
1540 // finding (return wxNOT_FOUND if not found and index otherwise)
1541 // ---------------------------------------------------------------------------
1542
1543 // find a character
1544 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1545 {
1546 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1547
1548 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1549 }
1550
1551 // ----------------------------------------------------------------------------
1552 // conversion to numbers
1553 // ----------------------------------------------------------------------------
1554
1555 // The implementation of all the functions below is exactly the same so factor
1556 // it out. Note that number extraction works correctly on UTF-8 strings, so
1557 // we can use wxStringCharType and wx_str() for maximum efficiency.
1558
1559 #ifndef __WXWINCE__
1560 #define DO_IF_NOT_WINCE(x) x
1561 #else
1562 #define DO_IF_NOT_WINCE(x)
1563 #endif
1564
1565 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1566 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1567 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1568 \
1569 DO_IF_NOT_WINCE( errno = 0; ) \
1570 \
1571 const wxStringCharType *start = wx_str(); \
1572 wxStringCharType *end; \
1573 T val = func(start, &end, base); \
1574 \
1575 /* return true only if scan was stopped by the terminating NUL and */ \
1576 /* if the string was not empty to start with and no under/overflow */ \
1577 /* occurred: */ \
1578 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1579 return false; \
1580 *out = val; \
1581 return true
1582
1583 bool wxString::ToLong(long *pVal, int base) const
1584 {
1585 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
1586 }
1587
1588 bool wxString::ToULong(unsigned long *pVal, int base) const
1589 {
1590 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
1591 }
1592
1593 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1594 {
1595 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
1596 }
1597
1598 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1599 {
1600 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
1601 }
1602
1603 bool wxString::ToDouble(double *pVal) const
1604 {
1605 wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
1606
1607 DO_IF_NOT_WINCE( errno = 0; )
1608
1609 const wxChar *start = c_str();
1610 wxChar *end;
1611 double val = wxStrtod(start, &end);
1612
1613 // return true only if scan was stopped by the terminating NUL and if the
1614 // string was not empty to start with and no under/overflow occurred
1615 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1616 return false;
1617
1618 *pVal = val;
1619
1620 return true;
1621 }
1622
1623 // ---------------------------------------------------------------------------
1624 // formatted output
1625 // ---------------------------------------------------------------------------
1626
1627 #if !wxUSE_UTF8_LOCALE_ONLY
1628 /* static */
1629 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1630 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1631 #else
1632 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1633 #endif
1634 {
1635 va_list argptr;
1636 va_start(argptr, format);
1637
1638 wxString s;
1639 s.PrintfV(format, argptr);
1640
1641 va_end(argptr);
1642
1643 return s;
1644 }
1645 #endif // !wxUSE_UTF8_LOCALE_ONLY
1646
1647 #if wxUSE_UNICODE_UTF8
1648 /* static */
1649 wxString wxString::DoFormatUtf8(const char *format, ...)
1650 {
1651 va_list argptr;
1652 va_start(argptr, format);
1653
1654 wxString s;
1655 s.PrintfV(format, argptr);
1656
1657 va_end(argptr);
1658
1659 return s;
1660 }
1661 #endif // wxUSE_UNICODE_UTF8
1662
1663 /* static */
1664 wxString wxString::FormatV(const wxString& format, va_list argptr)
1665 {
1666 wxString s;
1667 s.PrintfV(format, argptr);
1668 return s;
1669 }
1670
1671 #if !wxUSE_UTF8_LOCALE_ONLY
1672 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1673 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1674 #else
1675 int wxString::DoPrintfWchar(const wxChar *format, ...)
1676 #endif
1677 {
1678 va_list argptr;
1679 va_start(argptr, format);
1680
1681 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1682 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1683 // because it's the only cast that works safely for downcasting when
1684 // multiple inheritance is used:
1685 wxString *str = static_cast<wxString*>(this);
1686 #else
1687 wxString *str = this;
1688 #endif
1689
1690 int iLen = str->PrintfV(format, argptr);
1691
1692 va_end(argptr);
1693
1694 return iLen;
1695 }
1696 #endif // !wxUSE_UTF8_LOCALE_ONLY
1697
1698 #if wxUSE_UNICODE_UTF8
1699 int wxString::DoPrintfUtf8(const char *format, ...)
1700 {
1701 va_list argptr;
1702 va_start(argptr, format);
1703
1704 int iLen = PrintfV(format, argptr);
1705
1706 va_end(argptr);
1707
1708 return iLen;
1709 }
1710 #endif // wxUSE_UNICODE_UTF8
1711
1712 /*
1713 Uses wxVsnprintf and places the result into the this string.
1714
1715 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1716 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1717 the ISO C99 (and thus SUSv3) standard the return value for the case of
1718 an undersized buffer is inconsistent. For conforming vsnprintf
1719 implementations the function must return the number of characters that
1720 would have been printed had the buffer been large enough. For conforming
1721 vswprintf implementations the function must return a negative number
1722 and set errno.
1723
1724 What vswprintf sets errno to is undefined but Darwin seems to set it to
1725 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1726 those are defined in the standard and backed up by several conformance
1727 statements. Note that ENOMEM mentioned in the manual page does not
1728 apply to swprintf, only wprintf and fwprintf.
1729
1730 Official manual page:
1731 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1732
1733 Some conformance statements (AIX, Solaris):
1734 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1735 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1736
1737 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1738 EILSEQ and EINVAL are specifically defined to mean the error is other than
1739 an undersized buffer and no other errno are defined we treat those two
1740 as meaning hard errors and everything else gets the old behavior which
1741 is to keep looping and increasing buffer size until the function succeeds.
1742
1743 In practice it's impossible to determine before compilation which behavior
1744 may be used. The vswprintf function may have vsnprintf-like behavior or
1745 vice-versa. Behavior detected on one release can theoretically change
1746 with an updated release. Not to mention that configure testing for it
1747 would require the test to be run on the host system, not the build system
1748 which makes cross compilation difficult. Therefore, we make no assumptions
1749 about behavior and try our best to handle every known case, including the
1750 case where wxVsnprintf returns a negative number and fails to set errno.
1751
1752 There is yet one more non-standard implementation and that is our own.
1753 Fortunately, that can be detected at compile-time.
1754
1755 On top of all that, ISO C99 explicitly defines snprintf to write a null
1756 character to the last position of the specified buffer. That would be at
1757 at the given buffer size minus 1. It is supposed to do this even if it
1758 turns out that the buffer is sized too small.
1759
1760 Darwin (tested on 10.5) follows the C99 behavior exactly.
1761
1762 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1763 errno even when it fails. However, it only seems to ever fail due
1764 to an undersized buffer.
1765 */
1766 #if wxUSE_UNICODE_UTF8
1767 template<typename BufferType>
1768 #else
1769 // we only need one version in non-UTF8 builds and at least two Windows
1770 // compilers have problems with this function template, so use just one
1771 // normal function here
1772 #endif
1773 static int DoStringPrintfV(wxString& str,
1774 const wxString& format, va_list argptr)
1775 {
1776 int size = 1024;
1777
1778 for ( ;; )
1779 {
1780 #if wxUSE_UNICODE_UTF8
1781 BufferType tmp(str, size + 1);
1782 typename BufferType::CharType *buf = tmp;
1783 #else
1784 wxStringBuffer tmp(str, size + 1);
1785 wxChar *buf = tmp;
1786 #endif
1787
1788 if ( !buf )
1789 {
1790 // out of memory
1791
1792 // in UTF-8 build, leaving uninitialized junk in the buffer
1793 // could result in invalid non-empty UTF-8 string, so just
1794 // reset the string to empty on failure:
1795 buf[0] = '\0';
1796 return -1;
1797 }
1798
1799 // wxVsnprintf() may modify the original arg pointer, so pass it
1800 // only a copy
1801 va_list argptrcopy;
1802 wxVaCopy(argptrcopy, argptr);
1803
1804 #ifndef __WXWINCE__
1805 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1806 errno = 0;
1807 #endif
1808 int len = wxVsnprintf(buf, size, format, argptrcopy);
1809 va_end(argptrcopy);
1810
1811 // some implementations of vsnprintf() don't NUL terminate
1812 // the string if there is not enough space for it so
1813 // always do it manually
1814 // FIXME: This really seems to be the wrong and would be an off-by-one
1815 // bug except the code above allocates an extra character.
1816 buf[size] = _T('\0');
1817
1818 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1819 // total number of characters which would have been written if the
1820 // buffer were large enough (newer standards such as Unix98)
1821 if ( len < 0 )
1822 {
1823 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1824 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1825 // is true if *both* of them use our own implementation,
1826 // otherwise we can't be sure
1827 #if wxUSE_WXVSNPRINTF
1828 // we know that our own implementation of wxVsnprintf() returns -1
1829 // only for a format error - thus there's something wrong with
1830 // the user's format string
1831 buf[0] = '\0';
1832 return -1;
1833 #else // possibly using system version
1834 // assume it only returns error if there is not enough space, but
1835 // as we don't know how much we need, double the current size of
1836 // the buffer
1837 #ifndef __WXWINCE__
1838 if( (errno == EILSEQ) || (errno == EINVAL) )
1839 // If errno was set to one of the two well-known hard errors
1840 // then fail immediately to avoid an infinite loop.
1841 return -1;
1842 else
1843 #endif // __WXWINCE__
1844 // still not enough, as we don't know how much we need, double the
1845 // current size of the buffer
1846 size *= 2;
1847 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1848 }
1849 else if ( len >= size )
1850 {
1851 #if wxUSE_WXVSNPRINTF
1852 // we know that our own implementation of wxVsnprintf() returns
1853 // size+1 when there's not enough space but that's not the size
1854 // of the required buffer!
1855 size *= 2; // so we just double the current size of the buffer
1856 #else
1857 // some vsnprintf() implementations NUL-terminate the buffer and
1858 // some don't in len == size case, to be safe always add 1
1859 // FIXME: I don't quite understand this comment. The vsnprintf
1860 // function is specifically defined to return the number of
1861 // characters printed not including the null terminator.
1862 // So OF COURSE you need to add 1 to get the right buffer size.
1863 // The following line is definitely correct, no question.
1864 size = len + 1;
1865 #endif
1866 }
1867 else // ok, there was enough space
1868 {
1869 break;
1870 }
1871 }
1872
1873 // we could have overshot
1874 str.Shrink();
1875
1876 return str.length();
1877 }
1878
1879 int wxString::PrintfV(const wxString& format, va_list argptr)
1880 {
1881 #if wxUSE_UNICODE_UTF8
1882 #if wxUSE_STL_BASED_WXSTRING
1883 typedef wxStringTypeBuffer<char> Utf8Buffer;
1884 #else
1885 typedef wxStringInternalBuffer Utf8Buffer;
1886 #endif
1887 #endif
1888
1889 #if wxUSE_UTF8_LOCALE_ONLY
1890 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1891 #else
1892 #if wxUSE_UNICODE_UTF8
1893 if ( wxLocaleIsUtf8 )
1894 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1895 else
1896 // wxChar* version
1897 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1898 #else
1899 return DoStringPrintfV(*this, format, argptr);
1900 #endif // UTF8/WCHAR
1901 #endif
1902 }
1903
1904 // ----------------------------------------------------------------------------
1905 // misc other operations
1906 // ----------------------------------------------------------------------------
1907
1908 // returns true if the string matches the pattern which may contain '*' and
1909 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1910 // of them)
1911 bool wxString::Matches(const wxString& mask) const
1912 {
1913 // I disable this code as it doesn't seem to be faster (in fact, it seems
1914 // to be much slower) than the old, hand-written code below and using it
1915 // here requires always linking with libregex even if the user code doesn't
1916 // use it
1917 #if 0 // wxUSE_REGEX
1918 // first translate the shell-like mask into a regex
1919 wxString pattern;
1920 pattern.reserve(wxStrlen(pszMask));
1921
1922 pattern += _T('^');
1923 while ( *pszMask )
1924 {
1925 switch ( *pszMask )
1926 {
1927 case _T('?'):
1928 pattern += _T('.');
1929 break;
1930
1931 case _T('*'):
1932 pattern += _T(".*");
1933 break;
1934
1935 case _T('^'):
1936 case _T('.'):
1937 case _T('$'):
1938 case _T('('):
1939 case _T(')'):
1940 case _T('|'):
1941 case _T('+'):
1942 case _T('\\'):
1943 // these characters are special in a RE, quote them
1944 // (however note that we don't quote '[' and ']' to allow
1945 // using them for Unix shell like matching)
1946 pattern += _T('\\');
1947 // fall through
1948
1949 default:
1950 pattern += *pszMask;
1951 }
1952
1953 pszMask++;
1954 }
1955 pattern += _T('$');
1956
1957 // and now use it
1958 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1959 #else // !wxUSE_REGEX
1960 // TODO: this is, of course, awfully inefficient...
1961
1962 // FIXME-UTF8: implement using iterators, remove #if
1963 #if wxUSE_UNICODE_UTF8
1964 wxWCharBuffer maskBuf = mask.wc_str();
1965 wxWCharBuffer txtBuf = wc_str();
1966 const wxChar *pszMask = maskBuf.data();
1967 const wxChar *pszTxt = txtBuf.data();
1968 #else
1969 const wxChar *pszMask = mask.wx_str();
1970 // the char currently being checked
1971 const wxChar *pszTxt = wx_str();
1972 #endif
1973
1974 // the last location where '*' matched
1975 const wxChar *pszLastStarInText = NULL;
1976 const wxChar *pszLastStarInMask = NULL;
1977
1978 match:
1979 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1980 switch ( *pszMask ) {
1981 case wxT('?'):
1982 if ( *pszTxt == wxT('\0') )
1983 return false;
1984
1985 // pszTxt and pszMask will be incremented in the loop statement
1986
1987 break;
1988
1989 case wxT('*'):
1990 {
1991 // remember where we started to be able to backtrack later
1992 pszLastStarInText = pszTxt;
1993 pszLastStarInMask = pszMask;
1994
1995 // ignore special chars immediately following this one
1996 // (should this be an error?)
1997 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1998 pszMask++;
1999
2000 // if there is nothing more, match
2001 if ( *pszMask == wxT('\0') )
2002 return true;
2003
2004 // are there any other metacharacters in the mask?
2005 size_t uiLenMask;
2006 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2007
2008 if ( pEndMask != NULL ) {
2009 // we have to match the string between two metachars
2010 uiLenMask = pEndMask - pszMask;
2011 }
2012 else {
2013 // we have to match the remainder of the string
2014 uiLenMask = wxStrlen(pszMask);
2015 }
2016
2017 wxString strToMatch(pszMask, uiLenMask);
2018 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2019 if ( pMatch == NULL )
2020 return false;
2021
2022 // -1 to compensate "++" in the loop
2023 pszTxt = pMatch + uiLenMask - 1;
2024 pszMask += uiLenMask - 1;
2025 }
2026 break;
2027
2028 default:
2029 if ( *pszMask != *pszTxt )
2030 return false;
2031 break;
2032 }
2033 }
2034
2035 // match only if nothing left
2036 if ( *pszTxt == wxT('\0') )
2037 return true;
2038
2039 // if we failed to match, backtrack if we can
2040 if ( pszLastStarInText ) {
2041 pszTxt = pszLastStarInText + 1;
2042 pszMask = pszLastStarInMask;
2043
2044 pszLastStarInText = NULL;
2045
2046 // don't bother resetting pszLastStarInMask, it's unnecessary
2047
2048 goto match;
2049 }
2050
2051 return false;
2052 #endif // wxUSE_REGEX/!wxUSE_REGEX
2053 }
2054
2055 // Count the number of chars
2056 int wxString::Freq(wxUniChar ch) const
2057 {
2058 int count = 0;
2059 for ( const_iterator i = begin(); i != end(); ++i )
2060 {
2061 if ( *i == ch )
2062 count ++;
2063 }
2064 return count;
2065 }
2066
2067 // ----------------------------------------------------------------------------
2068 // wxUTF8StringBuffer
2069 // ----------------------------------------------------------------------------
2070
2071 #if wxUSE_UNICODE_WCHAR
2072 wxUTF8StringBuffer::~wxUTF8StringBuffer()
2073 {
2074 wxMBConvStrictUTF8 conv;
2075 size_t wlen = conv.ToWChar(NULL, 0, m_buf);
2076 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2077
2078 wxStringInternalBuffer wbuf(m_str, wlen);
2079 conv.ToWChar(wbuf, wlen, m_buf);
2080 }
2081
2082 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2083 {
2084 wxCHECK_RET(m_lenSet, "length not set");
2085
2086 wxMBConvStrictUTF8 conv;
2087 size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
2088 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2089
2090 wxStringInternalBufferLength wbuf(m_str, wlen);
2091 conv.ToWChar(wbuf, wlen, m_buf, m_len);
2092 wbuf.SetLength(wlen);
2093 }
2094 #endif // wxUSE_UNICODE_WCHAR
2095
2096 // ----------------------------------------------------------------------------
2097 // wxCharBufferType<T>
2098 // ----------------------------------------------------------------------------
2099
2100 template<>
2101 wxCharTypeBuffer<char>::Data
2102 wxCharTypeBuffer<char>::NullData(NULL);
2103
2104 template<>
2105 wxCharTypeBuffer<wchar_t>::Data
2106 wxCharTypeBuffer<wchar_t>::NullData(NULL);