]> git.saurik.com Git - wxWidgets.git/blob - src/common/string.cpp
c97125d2c46b1c6bd73fa3cd270ea54bae912297
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
16
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #endif
28
29 #include <ctype.h>
30
31 #ifndef __WXWINCE__
32 #include <errno.h>
33 #endif
34
35 #include <string.h>
36 #include <stdlib.h>
37
38 #include "wx/hashmap.h"
39
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
46 #else
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
51 #endif
52
53
54 // ---------------------------------------------------------------------------
55 // static class variables definition
56 // ---------------------------------------------------------------------------
57
58 //According to STL _must_ be a -1 size_t
59 const size_t wxString::npos = (size_t) -1;
60
61 #if wxUSE_STRING_POS_CACHE
62 wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
63
64 // gdb seems to be unable to display thread-local variables correctly, at least
65 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
66 #ifdef __WXDEBUG__
67
68 struct wxStrCacheDumper
69 {
70 static void ShowAll()
71 {
72 puts("*** wxString cache dump:");
73 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
74 {
75 const wxString::Cache::Element&
76 c = wxString::ms_cache.cached[n];
77
78 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
79 n,
80 n == wxString::ms_cache.lastUsed ? " [*]" : "",
81 c.str,
82 (unsigned long)c.pos,
83 (unsigned long)c.impl,
84 (long)c.len);
85 }
86 }
87 };
88
89 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
90
91 #endif // __WXDEBUG__
92
93 #ifdef wxPROFILE_STRING_CACHE
94
95 wxString::CacheStats wxString::ms_cacheStats;
96
97 namespace
98 {
99
100 struct ShowCacheStats
101 {
102 ~ShowCacheStats()
103 {
104 const wxString::CacheStats& stats = wxString::ms_cacheStats;
105
106 if ( stats.postot )
107 {
108 puts("*** wxString cache statistics:");
109 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
110 stats.postot);
111 printf("\tHits %u (of which %u not used) or %.2f%%\n",
112 stats.poshits,
113 stats.mishits,
114 100.*float(stats.poshits - stats.mishits)/stats.postot);
115 printf("\tAverage position requested: %.2f\n",
116 float(stats.sumpos) / stats.postot);
117 printf("\tAverage offset after cached hint: %.2f\n",
118 float(stats.sumofs) / stats.postot);
119 }
120
121 if ( stats.lentot )
122 {
123 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
124 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
125 }
126 }
127 } s_showCacheStats;
128
129 } // anonymous namespace
130
131 #endif // wxPROFILE_STRING_CACHE
132
133 #endif // wxUSE_STRING_POS_CACHE
134
135 // ----------------------------------------------------------------------------
136 // global functions
137 // ----------------------------------------------------------------------------
138
139 #if wxUSE_STD_IOSTREAM
140
141 #include <iostream>
142
143 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
144 {
145 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
146 return os << (const char *)str.AsCharBuf();
147 #else
148 return os << str.AsInternal();
149 #endif
150 }
151
152 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
153 {
154 return os << str.c_str();
155 }
156
157 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
158 {
159 return os << str.data();
160 }
161
162 #ifndef __BORLANDC__
163 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
164 {
165 return os << str.data();
166 }
167 #endif
168
169 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
170
171 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
172 {
173 return wos << str.wc_str();
174 }
175
176 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
177 {
178 return wos << str.AsWChar();
179 }
180
181 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
182 {
183 return wos << str.data();
184 }
185
186 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
187
188 #endif // wxUSE_STD_IOSTREAM
189
190 // ===========================================================================
191 // wxString class core
192 // ===========================================================================
193
194 #if wxUSE_UNICODE_UTF8
195
196 void wxString::PosLenToImpl(size_t pos, size_t len,
197 size_t *implPos, size_t *implLen) const
198 {
199 if ( pos == npos )
200 {
201 *implPos = npos;
202 }
203 else // have valid start position
204 {
205 const const_iterator b = GetIterForNthChar(pos);
206 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
207 if ( len == npos )
208 {
209 *implLen = npos;
210 }
211 else // have valid length too
212 {
213 // we need to handle the case of length specifying a substring
214 // going beyond the end of the string, just as std::string does
215 const const_iterator e(end());
216 const_iterator i(b);
217 while ( len && i <= e )
218 {
219 ++i;
220 --len;
221 }
222
223 *implLen = i.impl() - b.impl();
224 }
225 }
226 }
227
228 #endif // wxUSE_UNICODE_UTF8
229
230 // ----------------------------------------------------------------------------
231 // wxCStrData converted strings caching
232 // ----------------------------------------------------------------------------
233
234 // FIXME-UTF8: temporarily disabled because it doesn't work with global
235 // string objects; re-enable after fixing this bug and benchmarking
236 // performance to see if using a hash is a good idea at all
237 #if 0
238
239 // For backward compatibility reasons, it must be possible to assign the value
240 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
241 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
242 // because the memory would be freed immediately, but it has to be valid as long
243 // as the string is not modified, so that code like this still works:
244 //
245 // const wxChar *s = str.c_str();
246 // while ( s ) { ... }
247
248 // FIXME-UTF8: not thread safe!
249 // FIXME-UTF8: we currently clear the cached conversion only when the string is
250 // destroyed, but we should do it when the string is modified, to
251 // keep memory usage down
252 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
253 // invalidated the cache on every change, we could keep the previous
254 // conversion
255 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
256 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
257
258 template<typename T>
259 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
260 {
261 typename T::iterator i = hash.find(wxConstCast(s, wxString));
262 if ( i != hash.end() )
263 {
264 free(i->second);
265 hash.erase(i);
266 }
267 }
268
269 #if wxUSE_UNICODE
270 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
271 // so we have to use wxString* here and const-cast when used
272 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
273 wxStringCharConversionCache);
274 static wxStringCharConversionCache gs_stringsCharCache;
275
276 const char* wxCStrData::AsChar() const
277 {
278 // remove previously cache value, if any (see FIXMEs above):
279 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
280
281 // convert the string and keep it:
282 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
283 m_str->mb_str().release();
284
285 return s + m_offset;
286 }
287 #endif // wxUSE_UNICODE
288
289 #if !wxUSE_UNICODE_WCHAR
290 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
291 wxStringWCharConversionCache);
292 static wxStringWCharConversionCache gs_stringsWCharCache;
293
294 const wchar_t* wxCStrData::AsWChar() const
295 {
296 // remove previously cache value, if any (see FIXMEs above):
297 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
298
299 // convert the string and keep it:
300 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
301 m_str->wc_str().release();
302
303 return s + m_offset;
304 }
305 #endif // !wxUSE_UNICODE_WCHAR
306
307 wxString::~wxString()
308 {
309 #if wxUSE_UNICODE
310 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
311 DeleteStringFromConversionCache(gs_stringsCharCache, this);
312 #endif
313 #if !wxUSE_UNICODE_WCHAR
314 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
315 #endif
316 }
317 #endif
318
319 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
320 const char* wxCStrData::AsChar() const
321 {
322 #if wxUSE_UNICODE_UTF8
323 if ( wxLocaleIsUtf8 )
324 return AsInternal();
325 #endif
326 // under non-UTF8 locales, we have to convert the internal UTF-8
327 // representation using wxConvLibc and cache the result
328
329 wxString *str = wxConstCast(m_str, wxString);
330
331 // convert the string:
332 //
333 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
334 // have it) but it's unfortunately not obvious to implement
335 // because we don't know how big buffer do we need for the
336 // given string length (in case of multibyte encodings, e.g.
337 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
338 //
339 // One idea would be to store more than just m_convertedToChar
340 // in wxString: then we could record the length of the string
341 // which was converted the last time and try to reuse the same
342 // buffer if the current length is not greater than it (this
343 // could still fail because string could have been modified in
344 // place but it would work most of the time, so we'd do it and
345 // only allocate the new buffer if in-place conversion returned
346 // an error). We could also store a bit saying if the string
347 // was modified since the last conversion (and update it in all
348 // operation modifying the string, of course) to avoid unneeded
349 // consequential conversions. But both of these ideas require
350 // adding more fields to wxString and require profiling results
351 // to be sure that we really gain enough from them to justify
352 // doing it.
353 wxCharBuffer buf(str->mb_str());
354
355 // if it failed, return empty string and not NULL to avoid crashes in code
356 // written with either wxWidgets 2 wxString or std::string behaviour in
357 // mind: neither of them ever returns NULL and so we shouldn't neither
358 if ( !buf )
359 return "";
360
361 if ( str->m_convertedToChar &&
362 strlen(buf) == strlen(str->m_convertedToChar) )
363 {
364 // keep the same buffer for as long as possible, so that several calls
365 // to c_str() in a row still work:
366 strcpy(str->m_convertedToChar, buf);
367 }
368 else
369 {
370 str->m_convertedToChar = buf.release();
371 }
372
373 // and keep it:
374 return str->m_convertedToChar + m_offset;
375 }
376 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
377
378 #if !wxUSE_UNICODE_WCHAR
379 const wchar_t* wxCStrData::AsWChar() const
380 {
381 wxString *str = wxConstCast(m_str, wxString);
382
383 // convert the string:
384 wxWCharBuffer buf(str->wc_str());
385
386 // notice that here, unlike above in AsChar(), conversion can't fail as our
387 // internal UTF-8 is always well-formed -- or the string was corrupted and
388 // all bets are off anyhow
389
390 // FIXME-UTF8: do the conversion in-place in the existing buffer
391 if ( str->m_convertedToWChar &&
392 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
393 {
394 // keep the same buffer for as long as possible, so that several calls
395 // to c_str() in a row still work:
396 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
397 }
398 else
399 {
400 str->m_convertedToWChar = buf.release();
401 }
402
403 // and keep it:
404 return str->m_convertedToWChar + m_offset;
405 }
406 #endif // !wxUSE_UNICODE_WCHAR
407
408 // ===========================================================================
409 // wxString class core
410 // ===========================================================================
411
412 // ---------------------------------------------------------------------------
413 // construction and conversion
414 // ---------------------------------------------------------------------------
415
416 #if wxUSE_UNICODE_WCHAR
417 /* static */
418 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
419 const wxMBConv& conv)
420 {
421 // anything to do?
422 if ( !psz || nLength == 0 )
423 return SubstrBufFromMB(L"", 0);
424
425 if ( nLength == npos )
426 nLength = wxNO_LEN;
427
428 size_t wcLen;
429 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
430 if ( !wcLen )
431 return SubstrBufFromMB(_T(""), 0);
432 else
433 return SubstrBufFromMB(wcBuf, wcLen);
434 }
435 #endif // wxUSE_UNICODE_WCHAR
436
437 #if wxUSE_UNICODE_UTF8
438 /* static */
439 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
440 const wxMBConv& conv)
441 {
442 // anything to do?
443 if ( !psz || nLength == 0 )
444 return SubstrBufFromMB("", 0);
445
446 // if psz is already in UTF-8, we don't have to do the roundtrip to
447 // wchar_t* and back:
448 if ( conv.IsUTF8() )
449 {
450 // we need to validate the input because UTF8 iterators assume valid
451 // UTF-8 sequence and psz may be invalid:
452 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
453 {
454 // we must pass the real string length to SubstrBufFromMB ctor
455 if ( nLength == npos )
456 nLength = psz ? strlen(psz) : 0;
457 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
458 }
459 // else: do the roundtrip through wchar_t*
460 }
461
462 if ( nLength == npos )
463 nLength = wxNO_LEN;
464
465 // first convert to wide string:
466 size_t wcLen;
467 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
468 if ( !wcLen )
469 return SubstrBufFromMB("", 0);
470
471 // and then to UTF-8:
472 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
473 // widechar -> UTF-8 conversion isn't supposed to ever fail:
474 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
475
476 return buf;
477 }
478 #endif // wxUSE_UNICODE_UTF8
479
480 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
481 /* static */
482 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
483 const wxMBConv& conv)
484 {
485 // anything to do?
486 if ( !pwz || nLength == 0 )
487 return SubstrBufFromWC("", 0);
488
489 if ( nLength == npos )
490 nLength = wxNO_LEN;
491
492 size_t mbLen;
493 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
494 if ( !mbLen )
495 return SubstrBufFromWC("", 0);
496 else
497 return SubstrBufFromWC(mbBuf, mbLen);
498 }
499 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
500
501
502 #if wxUSE_UNICODE_WCHAR
503
504 //Convert wxString in Unicode mode to a multi-byte string
505 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
506 {
507 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
508 }
509
510 #elif wxUSE_UNICODE_UTF8
511
512 const wxWCharBuffer wxString::wc_str() const
513 {
514 return wxMBConvStrictUTF8().cMB2WC
515 (
516 m_impl.c_str(),
517 m_impl.length() + 1, // size, not length
518 NULL
519 );
520 }
521
522 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
523 {
524 if ( conv.IsUTF8() )
525 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
526
527 // FIXME-UTF8: use wc_str() here once we have buffers with length
528
529 size_t wcLen;
530 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
531 (
532 m_impl.c_str(),
533 m_impl.length() + 1, // size
534 &wcLen
535 ));
536 if ( !wcLen )
537 return wxCharBuffer("");
538
539 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
540 }
541
542 #else // ANSI
543
544 //Converts this string to a wide character string if unicode
545 //mode is not enabled and wxUSE_WCHAR_T is enabled
546 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
547 {
548 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
549 }
550
551 #endif // Unicode/ANSI
552
553 // shrink to minimal size (releasing extra memory)
554 bool wxString::Shrink()
555 {
556 wxString tmp(begin(), end());
557 swap(tmp);
558 return tmp.length() == length();
559 }
560
561 // deprecated compatibility code:
562 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
563 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
564 {
565 return DoGetWriteBuf(nLen);
566 }
567
568 void wxString::UngetWriteBuf()
569 {
570 DoUngetWriteBuf();
571 }
572
573 void wxString::UngetWriteBuf(size_t nLen)
574 {
575 DoUngetWriteBuf(nLen);
576 }
577 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
578
579
580 // ---------------------------------------------------------------------------
581 // data access
582 // ---------------------------------------------------------------------------
583
584 // all functions are inline in string.h
585
586 // ---------------------------------------------------------------------------
587 // concatenation operators
588 // ---------------------------------------------------------------------------
589
590 /*
591 * concatenation functions come in 5 flavours:
592 * string + string
593 * char + string and string + char
594 * C str + string and string + C str
595 */
596
597 wxString operator+(const wxString& str1, const wxString& str2)
598 {
599 #if !wxUSE_STL_BASED_WXSTRING
600 wxASSERT( str1.IsValid() );
601 wxASSERT( str2.IsValid() );
602 #endif
603
604 wxString s = str1;
605 s += str2;
606
607 return s;
608 }
609
610 wxString operator+(const wxString& str, wxUniChar ch)
611 {
612 #if !wxUSE_STL_BASED_WXSTRING
613 wxASSERT( str.IsValid() );
614 #endif
615
616 wxString s = str;
617 s += ch;
618
619 return s;
620 }
621
622 wxString operator+(wxUniChar ch, const wxString& str)
623 {
624 #if !wxUSE_STL_BASED_WXSTRING
625 wxASSERT( str.IsValid() );
626 #endif
627
628 wxString s = ch;
629 s += str;
630
631 return s;
632 }
633
634 wxString operator+(const wxString& str, const char *psz)
635 {
636 #if !wxUSE_STL_BASED_WXSTRING
637 wxASSERT( str.IsValid() );
638 #endif
639
640 wxString s;
641 if ( !s.Alloc(strlen(psz) + str.length()) ) {
642 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
643 }
644 s += str;
645 s += psz;
646
647 return s;
648 }
649
650 wxString operator+(const wxString& str, const wchar_t *pwz)
651 {
652 #if !wxUSE_STL_BASED_WXSTRING
653 wxASSERT( str.IsValid() );
654 #endif
655
656 wxString s;
657 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
658 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
659 }
660 s += str;
661 s += pwz;
662
663 return s;
664 }
665
666 wxString operator+(const char *psz, const wxString& str)
667 {
668 #if !wxUSE_STL_BASED_WXSTRING
669 wxASSERT( str.IsValid() );
670 #endif
671
672 wxString s;
673 if ( !s.Alloc(strlen(psz) + str.length()) ) {
674 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
675 }
676 s = psz;
677 s += str;
678
679 return s;
680 }
681
682 wxString operator+(const wchar_t *pwz, const wxString& str)
683 {
684 #if !wxUSE_STL_BASED_WXSTRING
685 wxASSERT( str.IsValid() );
686 #endif
687
688 wxString s;
689 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
690 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
691 }
692 s = pwz;
693 s += str;
694
695 return s;
696 }
697
698 // ---------------------------------------------------------------------------
699 // string comparison
700 // ---------------------------------------------------------------------------
701
702 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
703 {
704 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
705 : wxToupper(GetChar(0u)) == wxToupper(c));
706 }
707
708 #ifdef HAVE_STD_STRING_COMPARE
709
710 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
711 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
712 // sort strings in characters code point order by sorting the byte sequence
713 // in byte values order (i.e. what strcmp() and memcmp() do).
714
715 int wxString::compare(const wxString& str) const
716 {
717 return m_impl.compare(str.m_impl);
718 }
719
720 int wxString::compare(size_t nStart, size_t nLen,
721 const wxString& str) const
722 {
723 size_t pos, len;
724 PosLenToImpl(nStart, nLen, &pos, &len);
725 return m_impl.compare(pos, len, str.m_impl);
726 }
727
728 int wxString::compare(size_t nStart, size_t nLen,
729 const wxString& str,
730 size_t nStart2, size_t nLen2) const
731 {
732 size_t pos, len;
733 PosLenToImpl(nStart, nLen, &pos, &len);
734
735 size_t pos2, len2;
736 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
737
738 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
739 }
740
741 int wxString::compare(const char* sz) const
742 {
743 return m_impl.compare(ImplStr(sz));
744 }
745
746 int wxString::compare(const wchar_t* sz) const
747 {
748 return m_impl.compare(ImplStr(sz));
749 }
750
751 int wxString::compare(size_t nStart, size_t nLen,
752 const char* sz, size_t nCount) const
753 {
754 size_t pos, len;
755 PosLenToImpl(nStart, nLen, &pos, &len);
756
757 SubstrBufFromMB str(ImplStr(sz, nCount));
758
759 return m_impl.compare(pos, len, str.data, str.len);
760 }
761
762 int wxString::compare(size_t nStart, size_t nLen,
763 const wchar_t* sz, size_t nCount) const
764 {
765 size_t pos, len;
766 PosLenToImpl(nStart, nLen, &pos, &len);
767
768 SubstrBufFromWC str(ImplStr(sz, nCount));
769
770 return m_impl.compare(pos, len, str.data, str.len);
771 }
772
773 #else // !HAVE_STD_STRING_COMPARE
774
775 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
776 const wxStringCharType* s2, size_t l2)
777 {
778 if( l1 == l2 )
779 return wxStringMemcmp(s1, s2, l1);
780 else if( l1 < l2 )
781 {
782 int ret = wxStringMemcmp(s1, s2, l1);
783 return ret == 0 ? -1 : ret;
784 }
785 else
786 {
787 int ret = wxStringMemcmp(s1, s2, l2);
788 return ret == 0 ? +1 : ret;
789 }
790 }
791
792 int wxString::compare(const wxString& str) const
793 {
794 return ::wxDoCmp(m_impl.data(), m_impl.length(),
795 str.m_impl.data(), str.m_impl.length());
796 }
797
798 int wxString::compare(size_t nStart, size_t nLen,
799 const wxString& str) const
800 {
801 wxASSERT(nStart <= length());
802 size_type strLen = length() - nStart;
803 nLen = strLen < nLen ? strLen : nLen;
804
805 size_t pos, len;
806 PosLenToImpl(nStart, nLen, &pos, &len);
807
808 return ::wxDoCmp(m_impl.data() + pos, len,
809 str.m_impl.data(), str.m_impl.length());
810 }
811
812 int wxString::compare(size_t nStart, size_t nLen,
813 const wxString& str,
814 size_t nStart2, size_t nLen2) const
815 {
816 wxASSERT(nStart <= length());
817 wxASSERT(nStart2 <= str.length());
818 size_type strLen = length() - nStart,
819 strLen2 = str.length() - nStart2;
820 nLen = strLen < nLen ? strLen : nLen;
821 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
822
823 size_t pos, len;
824 PosLenToImpl(nStart, nLen, &pos, &len);
825 size_t pos2, len2;
826 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
827
828 return ::wxDoCmp(m_impl.data() + pos, len,
829 str.m_impl.data() + pos2, len2);
830 }
831
832 int wxString::compare(const char* sz) const
833 {
834 SubstrBufFromMB str(ImplStr(sz, npos));
835 if ( str.len == npos )
836 str.len = wxStringStrlen(str.data);
837 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
838 }
839
840 int wxString::compare(const wchar_t* sz) const
841 {
842 SubstrBufFromWC str(ImplStr(sz, npos));
843 if ( str.len == npos )
844 str.len = wxStringStrlen(str.data);
845 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
846 }
847
848 int wxString::compare(size_t nStart, size_t nLen,
849 const char* sz, size_t nCount) const
850 {
851 wxASSERT(nStart <= length());
852 size_type strLen = length() - nStart;
853 nLen = strLen < nLen ? strLen : nLen;
854
855 size_t pos, len;
856 PosLenToImpl(nStart, nLen, &pos, &len);
857
858 SubstrBufFromMB str(ImplStr(sz, nCount));
859 if ( str.len == npos )
860 str.len = wxStringStrlen(str.data);
861
862 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
863 }
864
865 int wxString::compare(size_t nStart, size_t nLen,
866 const wchar_t* sz, size_t nCount) const
867 {
868 wxASSERT(nStart <= length());
869 size_type strLen = length() - nStart;
870 nLen = strLen < nLen ? strLen : nLen;
871
872 size_t pos, len;
873 PosLenToImpl(nStart, nLen, &pos, &len);
874
875 SubstrBufFromWC str(ImplStr(sz, nCount));
876 if ( str.len == npos )
877 str.len = wxStringStrlen(str.data);
878
879 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
880 }
881
882 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
883
884
885 // ---------------------------------------------------------------------------
886 // find_{first,last}_[not]_of functions
887 // ---------------------------------------------------------------------------
888
889 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
890
891 // NB: All these functions are implemented with the argument being wxChar*,
892 // i.e. widechar string in any Unicode build, even though native string
893 // representation is char* in the UTF-8 build. This is because we couldn't
894 // use memchr() to determine if a character is in a set encoded as UTF-8.
895
896 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
897 {
898 return find_first_of(sz, nStart, wxStrlen(sz));
899 }
900
901 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
902 {
903 return find_first_not_of(sz, nStart, wxStrlen(sz));
904 }
905
906 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
907 {
908 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
909
910 size_t idx = nStart;
911 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
912 {
913 if ( wxTmemchr(sz, *i, n) )
914 return idx;
915 }
916
917 return npos;
918 }
919
920 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
921 {
922 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
923
924 size_t idx = nStart;
925 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
926 {
927 if ( !wxTmemchr(sz, *i, n) )
928 return idx;
929 }
930
931 return npos;
932 }
933
934
935 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
936 {
937 return find_last_of(sz, nStart, wxStrlen(sz));
938 }
939
940 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
941 {
942 return find_last_not_of(sz, nStart, wxStrlen(sz));
943 }
944
945 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
946 {
947 size_t len = length();
948
949 if ( nStart == npos )
950 {
951 nStart = len - 1;
952 }
953 else
954 {
955 wxASSERT_MSG( nStart <= len, _T("invalid index") );
956 }
957
958 size_t idx = nStart;
959 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
960 i != rend(); --idx, ++i )
961 {
962 if ( wxTmemchr(sz, *i, n) )
963 return idx;
964 }
965
966 return npos;
967 }
968
969 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
970 {
971 size_t len = length();
972
973 if ( nStart == npos )
974 {
975 nStart = len - 1;
976 }
977 else
978 {
979 wxASSERT_MSG( nStart <= len, _T("invalid index") );
980 }
981
982 size_t idx = nStart;
983 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
984 i != rend(); --idx, ++i )
985 {
986 if ( !wxTmemchr(sz, *i, n) )
987 return idx;
988 }
989
990 return npos;
991 }
992
993 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
994 {
995 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
996
997 size_t idx = nStart;
998 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
999 {
1000 if ( *i != ch )
1001 return idx;
1002 }
1003
1004 return npos;
1005 }
1006
1007 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1008 {
1009 size_t len = length();
1010
1011 if ( nStart == npos )
1012 {
1013 nStart = len - 1;
1014 }
1015 else
1016 {
1017 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1018 }
1019
1020 size_t idx = nStart;
1021 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1022 i != rend(); --idx, ++i )
1023 {
1024 if ( *i != ch )
1025 return idx;
1026 }
1027
1028 return npos;
1029 }
1030
1031 // the functions above were implemented for wchar_t* arguments in Unicode
1032 // build and char* in ANSI build; below are implementations for the other
1033 // version:
1034 #if wxUSE_UNICODE
1035 #define wxOtherCharType char
1036 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1037 #else
1038 #define wxOtherCharType wchar_t
1039 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1040 #endif
1041
1042 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1043 { return find_first_of(STRCONV(sz), nStart); }
1044
1045 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1046 size_t n) const
1047 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1048 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1049 { return find_last_of(STRCONV(sz), nStart); }
1050 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1051 size_t n) const
1052 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1053 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1054 { return find_first_not_of(STRCONV(sz), nStart); }
1055 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1056 size_t n) const
1057 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1058 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1059 { return find_last_not_of(STRCONV(sz), nStart); }
1060 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1061 size_t n) const
1062 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1063
1064 #undef wxOtherCharType
1065 #undef STRCONV
1066
1067 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1068
1069 // ===========================================================================
1070 // other common string functions
1071 // ===========================================================================
1072
1073 int wxString::CmpNoCase(const wxString& s) const
1074 {
1075 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1076
1077 const_iterator i1 = begin();
1078 const_iterator end1 = end();
1079 const_iterator i2 = s.begin();
1080 const_iterator end2 = s.end();
1081
1082 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1083 {
1084 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1085 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1086 if ( lower1 != lower2 )
1087 return lower1 < lower2 ? -1 : 1;
1088 }
1089
1090 size_t len1 = length();
1091 size_t len2 = s.length();
1092
1093 if ( len1 < len2 )
1094 return -1;
1095 else if ( len1 > len2 )
1096 return 1;
1097 return 0;
1098 }
1099
1100
1101 #if wxUSE_UNICODE
1102
1103 #ifdef __MWERKS__
1104 #ifndef __SCHAR_MAX__
1105 #define __SCHAR_MAX__ 127
1106 #endif
1107 #endif
1108
1109 wxString wxString::FromAscii(const char *ascii, size_t len)
1110 {
1111 if (!ascii || len == 0)
1112 return wxEmptyString;
1113
1114 wxString res;
1115
1116 {
1117 wxStringInternalBuffer buf(res, len);
1118 wxStringCharType *dest = buf;
1119
1120 for ( ; len > 0; --len )
1121 {
1122 unsigned char c = (unsigned char)*ascii++;
1123 wxASSERT_MSG( c < 0x80,
1124 _T("Non-ASCII value passed to FromAscii().") );
1125
1126 *dest++ = (wchar_t)c;
1127 }
1128 }
1129
1130 return res;
1131 }
1132
1133 wxString wxString::FromAscii(const char *ascii)
1134 {
1135 return FromAscii(ascii, wxStrlen(ascii));
1136 }
1137
1138 wxString wxString::FromAscii(char ascii)
1139 {
1140 // What do we do with '\0' ?
1141
1142 unsigned char c = (unsigned char)ascii;
1143
1144 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1145
1146 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1147 return wxString(wxUniChar((wchar_t)c));
1148 }
1149
1150 const wxCharBuffer wxString::ToAscii() const
1151 {
1152 // this will allocate enough space for the terminating NUL too
1153 wxCharBuffer buffer(length());
1154 char *dest = buffer.data();
1155
1156 for ( const_iterator i = begin(); i != end(); ++i )
1157 {
1158 wxUniChar c(*i);
1159 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1160 *dest++ = c.IsAscii() ? (char)c : '_';
1161
1162 // the output string can't have embedded NULs anyhow, so we can safely
1163 // stop at first of them even if we do have any
1164 if ( !c )
1165 break;
1166 }
1167
1168 return buffer;
1169 }
1170
1171 #endif // wxUSE_UNICODE
1172
1173 // extract string of length nCount starting at nFirst
1174 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1175 {
1176 size_t nLen = length();
1177
1178 // default value of nCount is npos and means "till the end"
1179 if ( nCount == npos )
1180 {
1181 nCount = nLen - nFirst;
1182 }
1183
1184 // out-of-bounds requests return sensible things
1185 if ( nFirst + nCount > nLen )
1186 {
1187 nCount = nLen - nFirst;
1188 }
1189
1190 if ( nFirst > nLen )
1191 {
1192 // AllocCopy() will return empty string
1193 return wxEmptyString;
1194 }
1195
1196 wxString dest(*this, nFirst, nCount);
1197 if ( dest.length() != nCount )
1198 {
1199 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1200 }
1201
1202 return dest;
1203 }
1204
1205 // check that the string starts with prefix and return the rest of the string
1206 // in the provided pointer if it is not NULL, otherwise return false
1207 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1208 {
1209 if ( compare(0, prefix.length(), prefix) != 0 )
1210 return false;
1211
1212 if ( rest )
1213 {
1214 // put the rest of the string into provided pointer
1215 rest->assign(*this, prefix.length(), npos);
1216 }
1217
1218 return true;
1219 }
1220
1221
1222 // check that the string ends with suffix and return the rest of it in the
1223 // provided pointer if it is not NULL, otherwise return false
1224 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1225 {
1226 int start = length() - suffix.length();
1227
1228 if ( start < 0 || compare(start, npos, suffix) != 0 )
1229 return false;
1230
1231 if ( rest )
1232 {
1233 // put the rest of the string into provided pointer
1234 rest->assign(*this, 0, start);
1235 }
1236
1237 return true;
1238 }
1239
1240
1241 // extract nCount last (rightmost) characters
1242 wxString wxString::Right(size_t nCount) const
1243 {
1244 if ( nCount > length() )
1245 nCount = length();
1246
1247 wxString dest(*this, length() - nCount, nCount);
1248 if ( dest.length() != nCount ) {
1249 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1250 }
1251 return dest;
1252 }
1253
1254 // get all characters after the last occurence of ch
1255 // (returns the whole string if ch not found)
1256 wxString wxString::AfterLast(wxUniChar ch) const
1257 {
1258 wxString str;
1259 int iPos = Find(ch, true);
1260 if ( iPos == wxNOT_FOUND )
1261 str = *this;
1262 else
1263 str = wx_str() + iPos + 1;
1264
1265 return str;
1266 }
1267
1268 // extract nCount first (leftmost) characters
1269 wxString wxString::Left(size_t nCount) const
1270 {
1271 if ( nCount > length() )
1272 nCount = length();
1273
1274 wxString dest(*this, 0, nCount);
1275 if ( dest.length() != nCount ) {
1276 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1277 }
1278 return dest;
1279 }
1280
1281 // get all characters before the first occurence of ch
1282 // (returns the whole string if ch not found)
1283 wxString wxString::BeforeFirst(wxUniChar ch) const
1284 {
1285 int iPos = Find(ch);
1286 if ( iPos == wxNOT_FOUND ) iPos = length();
1287 return wxString(*this, 0, iPos);
1288 }
1289
1290 /// get all characters before the last occurence of ch
1291 /// (returns empty string if ch not found)
1292 wxString wxString::BeforeLast(wxUniChar ch) const
1293 {
1294 wxString str;
1295 int iPos = Find(ch, true);
1296 if ( iPos != wxNOT_FOUND && iPos != 0 )
1297 str = wxString(c_str(), iPos);
1298
1299 return str;
1300 }
1301
1302 /// get all characters after the first occurence of ch
1303 /// (returns empty string if ch not found)
1304 wxString wxString::AfterFirst(wxUniChar ch) const
1305 {
1306 wxString str;
1307 int iPos = Find(ch);
1308 if ( iPos != wxNOT_FOUND )
1309 str = wx_str() + iPos + 1;
1310
1311 return str;
1312 }
1313
1314 // replace first (or all) occurences of some substring with another one
1315 size_t wxString::Replace(const wxString& strOld,
1316 const wxString& strNew, bool bReplaceAll)
1317 {
1318 // if we tried to replace an empty string we'd enter an infinite loop below
1319 wxCHECK_MSG( !strOld.empty(), 0,
1320 _T("wxString::Replace(): invalid parameter") );
1321
1322 wxSTRING_INVALIDATE_CACHE();
1323
1324 size_t uiCount = 0; // count of replacements made
1325
1326 // optimize the special common case: replacement of one character by
1327 // another one (in UTF-8 case we can only do this for ASCII characters)
1328 //
1329 // benchmarks show that this special version is around 3 times faster
1330 // (depending on the proportion of matching characters and UTF-8/wchar_t
1331 // build)
1332 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1333 {
1334 const wxStringCharType chOld = strOld.m_impl[0],
1335 chNew = strNew.m_impl[0];
1336
1337 // this loop is the simplified version of the one below
1338 for ( size_t pos = 0; ; )
1339 {
1340 pos = m_impl.find(chOld, pos);
1341 if ( pos == npos )
1342 break;
1343
1344 m_impl[pos++] = chNew;
1345
1346 uiCount++;
1347
1348 if ( !bReplaceAll )
1349 break;
1350 }
1351 }
1352 else // general case
1353 {
1354 const size_t uiOldLen = strOld.m_impl.length();
1355 const size_t uiNewLen = strNew.m_impl.length();
1356
1357 for ( size_t pos = 0; ; )
1358 {
1359 pos = m_impl.find(strOld.m_impl, pos);
1360 if ( pos == npos )
1361 break;
1362
1363 // replace this occurrence of the old string with the new one
1364 m_impl.replace(pos, uiOldLen, strNew.m_impl);
1365
1366 // move up pos past the string that was replaced
1367 pos += uiNewLen;
1368
1369 // increase replace count
1370 uiCount++;
1371
1372 // stop after the first one?
1373 if ( !bReplaceAll )
1374 break;
1375 }
1376 }
1377
1378 return uiCount;
1379 }
1380
1381 bool wxString::IsAscii() const
1382 {
1383 for ( const_iterator i = begin(); i != end(); ++i )
1384 {
1385 if ( !(*i).IsAscii() )
1386 return false;
1387 }
1388
1389 return true;
1390 }
1391
1392 bool wxString::IsWord() const
1393 {
1394 for ( const_iterator i = begin(); i != end(); ++i )
1395 {
1396 if ( !wxIsalpha(*i) )
1397 return false;
1398 }
1399
1400 return true;
1401 }
1402
1403 bool wxString::IsNumber() const
1404 {
1405 if ( empty() )
1406 return true;
1407
1408 const_iterator i = begin();
1409
1410 if ( *i == _T('-') || *i == _T('+') )
1411 ++i;
1412
1413 for ( ; i != end(); ++i )
1414 {
1415 if ( !wxIsdigit(*i) )
1416 return false;
1417 }
1418
1419 return true;
1420 }
1421
1422 wxString wxString::Strip(stripType w) const
1423 {
1424 wxString s = *this;
1425 if ( w & leading ) s.Trim(false);
1426 if ( w & trailing ) s.Trim(true);
1427 return s;
1428 }
1429
1430 // ---------------------------------------------------------------------------
1431 // case conversion
1432 // ---------------------------------------------------------------------------
1433
1434 wxString& wxString::MakeUpper()
1435 {
1436 for ( iterator it = begin(), en = end(); it != en; ++it )
1437 *it = (wxChar)wxToupper(*it);
1438
1439 return *this;
1440 }
1441
1442 wxString& wxString::MakeLower()
1443 {
1444 for ( iterator it = begin(), en = end(); it != en; ++it )
1445 *it = (wxChar)wxTolower(*it);
1446
1447 return *this;
1448 }
1449
1450 wxString& wxString::MakeCapitalized()
1451 {
1452 const iterator en = end();
1453 iterator it = begin();
1454 if ( it != en )
1455 {
1456 *it = (wxChar)wxToupper(*it);
1457 for ( ++it; it != en; ++it )
1458 *it = (wxChar)wxTolower(*it);
1459 }
1460
1461 return *this;
1462 }
1463
1464 // ---------------------------------------------------------------------------
1465 // trimming and padding
1466 // ---------------------------------------------------------------------------
1467
1468 // some compilers (VC++ 6.0 not to name them) return true for a call to
1469 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1470 // to live with this by checking that the character is a 7 bit one - even if
1471 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1472 // space-like symbols somewhere except in the first 128 chars), it is arguably
1473 // still better than trimming away accented letters
1474 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1475
1476 // trims spaces (in the sense of isspace) from left or right side
1477 wxString& wxString::Trim(bool bFromRight)
1478 {
1479 // first check if we're going to modify the string at all
1480 if ( !empty() &&
1481 (
1482 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1483 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1484 )
1485 )
1486 {
1487 if ( bFromRight )
1488 {
1489 // find last non-space character
1490 reverse_iterator psz = rbegin();
1491 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1492 ++psz;
1493
1494 // truncate at trailing space start
1495 erase(psz.base(), end());
1496 }
1497 else
1498 {
1499 // find first non-space character
1500 iterator psz = begin();
1501 while ( (psz != end()) && wxSafeIsspace(*psz) )
1502 ++psz;
1503
1504 // fix up data and length
1505 erase(begin(), psz);
1506 }
1507 }
1508
1509 return *this;
1510 }
1511
1512 // adds nCount characters chPad to the string from either side
1513 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1514 {
1515 wxString s(chPad, nCount);
1516
1517 if ( bFromRight )
1518 *this += s;
1519 else
1520 {
1521 s += *this;
1522 swap(s);
1523 }
1524
1525 return *this;
1526 }
1527
1528 // truncate the string
1529 wxString& wxString::Truncate(size_t uiLen)
1530 {
1531 if ( uiLen < length() )
1532 {
1533 erase(begin() + uiLen, end());
1534 }
1535 //else: nothing to do, string is already short enough
1536
1537 return *this;
1538 }
1539
1540 // ---------------------------------------------------------------------------
1541 // finding (return wxNOT_FOUND if not found and index otherwise)
1542 // ---------------------------------------------------------------------------
1543
1544 // find a character
1545 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1546 {
1547 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1548
1549 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1550 }
1551
1552 // ----------------------------------------------------------------------------
1553 // conversion to numbers
1554 // ----------------------------------------------------------------------------
1555
1556 // The implementation of all the functions below is exactly the same so factor
1557 // it out. Note that number extraction works correctly on UTF-8 strings, so
1558 // we can use wxStringCharType and wx_str() for maximum efficiency.
1559
1560 #ifndef __WXWINCE__
1561 #define DO_IF_NOT_WINCE(x) x
1562 #else
1563 #define DO_IF_NOT_WINCE(x)
1564 #endif
1565
1566 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1567 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1568 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1569 \
1570 DO_IF_NOT_WINCE( errno = 0; ) \
1571 \
1572 const wxStringCharType *start = wx_str(); \
1573 wxStringCharType *end; \
1574 T val = func(start, &end, base); \
1575 \
1576 /* return true only if scan was stopped by the terminating NUL and */ \
1577 /* if the string was not empty to start with and no under/overflow */ \
1578 /* occurred: */ \
1579 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1580 return false; \
1581 *out = val; \
1582 return true
1583
1584 bool wxString::ToLong(long *pVal, int base) const
1585 {
1586 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
1587 }
1588
1589 bool wxString::ToULong(unsigned long *pVal, int base) const
1590 {
1591 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
1592 }
1593
1594 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1595 {
1596 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
1597 }
1598
1599 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1600 {
1601 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
1602 }
1603
1604 bool wxString::ToDouble(double *pVal) const
1605 {
1606 wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
1607
1608 DO_IF_NOT_WINCE( errno = 0; )
1609
1610 const wxChar *start = c_str();
1611 wxChar *end;
1612 double val = wxStrtod(start, &end);
1613
1614 // return true only if scan was stopped by the terminating NUL and if the
1615 // string was not empty to start with and no under/overflow occurred
1616 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1617 return false;
1618
1619 *pVal = val;
1620
1621 return true;
1622 }
1623
1624 // ---------------------------------------------------------------------------
1625 // formatted output
1626 // ---------------------------------------------------------------------------
1627
1628 #if !wxUSE_UTF8_LOCALE_ONLY
1629 /* static */
1630 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1631 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1632 #else
1633 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1634 #endif
1635 {
1636 va_list argptr;
1637 va_start(argptr, format);
1638
1639 wxString s;
1640 s.PrintfV(format, argptr);
1641
1642 va_end(argptr);
1643
1644 return s;
1645 }
1646 #endif // !wxUSE_UTF8_LOCALE_ONLY
1647
1648 #if wxUSE_UNICODE_UTF8
1649 /* static */
1650 wxString wxString::DoFormatUtf8(const char *format, ...)
1651 {
1652 va_list argptr;
1653 va_start(argptr, format);
1654
1655 wxString s;
1656 s.PrintfV(format, argptr);
1657
1658 va_end(argptr);
1659
1660 return s;
1661 }
1662 #endif // wxUSE_UNICODE_UTF8
1663
1664 /* static */
1665 wxString wxString::FormatV(const wxString& format, va_list argptr)
1666 {
1667 wxString s;
1668 s.PrintfV(format, argptr);
1669 return s;
1670 }
1671
1672 #if !wxUSE_UTF8_LOCALE_ONLY
1673 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1674 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1675 #else
1676 int wxString::DoPrintfWchar(const wxChar *format, ...)
1677 #endif
1678 {
1679 va_list argptr;
1680 va_start(argptr, format);
1681
1682 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1683 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1684 // because it's the only cast that works safely for downcasting when
1685 // multiple inheritance is used:
1686 wxString *str = static_cast<wxString*>(this);
1687 #else
1688 wxString *str = this;
1689 #endif
1690
1691 int iLen = str->PrintfV(format, argptr);
1692
1693 va_end(argptr);
1694
1695 return iLen;
1696 }
1697 #endif // !wxUSE_UTF8_LOCALE_ONLY
1698
1699 #if wxUSE_UNICODE_UTF8
1700 int wxString::DoPrintfUtf8(const char *format, ...)
1701 {
1702 va_list argptr;
1703 va_start(argptr, format);
1704
1705 int iLen = PrintfV(format, argptr);
1706
1707 va_end(argptr);
1708
1709 return iLen;
1710 }
1711 #endif // wxUSE_UNICODE_UTF8
1712
1713 /*
1714 Uses wxVsnprintf and places the result into the this string.
1715
1716 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1717 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1718 the ISO C99 (and thus SUSv3) standard the return value for the case of
1719 an undersized buffer is inconsistent. For conforming vsnprintf
1720 implementations the function must return the number of characters that
1721 would have been printed had the buffer been large enough. For conforming
1722 vswprintf implementations the function must return a negative number
1723 and set errno.
1724
1725 What vswprintf sets errno to is undefined but Darwin seems to set it to
1726 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1727 those are defined in the standard and backed up by several conformance
1728 statements. Note that ENOMEM mentioned in the manual page does not
1729 apply to swprintf, only wprintf and fwprintf.
1730
1731 Official manual page:
1732 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1733
1734 Some conformance statements (AIX, Solaris):
1735 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1736 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1737
1738 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1739 EILSEQ and EINVAL are specifically defined to mean the error is other than
1740 an undersized buffer and no other errno are defined we treat those two
1741 as meaning hard errors and everything else gets the old behavior which
1742 is to keep looping and increasing buffer size until the function succeeds.
1743
1744 In practice it's impossible to determine before compilation which behavior
1745 may be used. The vswprintf function may have vsnprintf-like behavior or
1746 vice-versa. Behavior detected on one release can theoretically change
1747 with an updated release. Not to mention that configure testing for it
1748 would require the test to be run on the host system, not the build system
1749 which makes cross compilation difficult. Therefore, we make no assumptions
1750 about behavior and try our best to handle every known case, including the
1751 case where wxVsnprintf returns a negative number and fails to set errno.
1752
1753 There is yet one more non-standard implementation and that is our own.
1754 Fortunately, that can be detected at compile-time.
1755
1756 On top of all that, ISO C99 explicitly defines snprintf to write a null
1757 character to the last position of the specified buffer. That would be at
1758 at the given buffer size minus 1. It is supposed to do this even if it
1759 turns out that the buffer is sized too small.
1760
1761 Darwin (tested on 10.5) follows the C99 behavior exactly.
1762
1763 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1764 errno even when it fails. However, it only seems to ever fail due
1765 to an undersized buffer.
1766 */
1767 #if wxUSE_UNICODE_UTF8
1768 template<typename BufferType>
1769 #else
1770 // we only need one version in non-UTF8 builds and at least two Windows
1771 // compilers have problems with this function template, so use just one
1772 // normal function here
1773 #endif
1774 static int DoStringPrintfV(wxString& str,
1775 const wxString& format, va_list argptr)
1776 {
1777 int size = 1024;
1778
1779 for ( ;; )
1780 {
1781 #if wxUSE_UNICODE_UTF8
1782 BufferType tmp(str, size + 1);
1783 typename BufferType::CharType *buf = tmp;
1784 #else
1785 wxStringBuffer tmp(str, size + 1);
1786 wxChar *buf = tmp;
1787 #endif
1788
1789 if ( !buf )
1790 {
1791 // out of memory
1792
1793 // in UTF-8 build, leaving uninitialized junk in the buffer
1794 // could result in invalid non-empty UTF-8 string, so just
1795 // reset the string to empty on failure:
1796 buf[0] = '\0';
1797 return -1;
1798 }
1799
1800 // wxVsnprintf() may modify the original arg pointer, so pass it
1801 // only a copy
1802 va_list argptrcopy;
1803 wxVaCopy(argptrcopy, argptr);
1804
1805 #ifndef __WXWINCE__
1806 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1807 errno = 0;
1808 #endif
1809 int len = wxVsnprintf(buf, size, format, argptrcopy);
1810 va_end(argptrcopy);
1811
1812 // some implementations of vsnprintf() don't NUL terminate
1813 // the string if there is not enough space for it so
1814 // always do it manually
1815 // FIXME: This really seems to be the wrong and would be an off-by-one
1816 // bug except the code above allocates an extra character.
1817 buf[size] = _T('\0');
1818
1819 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1820 // total number of characters which would have been written if the
1821 // buffer were large enough (newer standards such as Unix98)
1822 if ( len < 0 )
1823 {
1824 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1825 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1826 // is true if *both* of them use our own implementation,
1827 // otherwise we can't be sure
1828 #if wxUSE_WXVSNPRINTF
1829 // we know that our own implementation of wxVsnprintf() returns -1
1830 // only for a format error - thus there's something wrong with
1831 // the user's format string
1832 buf[0] = '\0';
1833 return -1;
1834 #else // possibly using system version
1835 // assume it only returns error if there is not enough space, but
1836 // as we don't know how much we need, double the current size of
1837 // the buffer
1838 #ifndef __WXWINCE__
1839 if( (errno == EILSEQ) || (errno == EINVAL) )
1840 // If errno was set to one of the two well-known hard errors
1841 // then fail immediately to avoid an infinite loop.
1842 return -1;
1843 else
1844 #endif // __WXWINCE__
1845 // still not enough, as we don't know how much we need, double the
1846 // current size of the buffer
1847 size *= 2;
1848 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1849 }
1850 else if ( len >= size )
1851 {
1852 #if wxUSE_WXVSNPRINTF
1853 // we know that our own implementation of wxVsnprintf() returns
1854 // size+1 when there's not enough space but that's not the size
1855 // of the required buffer!
1856 size *= 2; // so we just double the current size of the buffer
1857 #else
1858 // some vsnprintf() implementations NUL-terminate the buffer and
1859 // some don't in len == size case, to be safe always add 1
1860 // FIXME: I don't quite understand this comment. The vsnprintf
1861 // function is specifically defined to return the number of
1862 // characters printed not including the null terminator.
1863 // So OF COURSE you need to add 1 to get the right buffer size.
1864 // The following line is definitely correct, no question.
1865 size = len + 1;
1866 #endif
1867 }
1868 else // ok, there was enough space
1869 {
1870 break;
1871 }
1872 }
1873
1874 // we could have overshot
1875 str.Shrink();
1876
1877 return str.length();
1878 }
1879
1880 int wxString::PrintfV(const wxString& format, va_list argptr)
1881 {
1882 #if wxUSE_UNICODE_UTF8
1883 #if wxUSE_STL_BASED_WXSTRING
1884 typedef wxStringTypeBuffer<char> Utf8Buffer;
1885 #else
1886 typedef wxStringInternalBuffer Utf8Buffer;
1887 #endif
1888 #endif
1889
1890 #if wxUSE_UTF8_LOCALE_ONLY
1891 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1892 #else
1893 #if wxUSE_UNICODE_UTF8
1894 if ( wxLocaleIsUtf8 )
1895 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1896 else
1897 // wxChar* version
1898 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1899 #else
1900 return DoStringPrintfV(*this, format, argptr);
1901 #endif // UTF8/WCHAR
1902 #endif
1903 }
1904
1905 // ----------------------------------------------------------------------------
1906 // misc other operations
1907 // ----------------------------------------------------------------------------
1908
1909 // returns true if the string matches the pattern which may contain '*' and
1910 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1911 // of them)
1912 bool wxString::Matches(const wxString& mask) const
1913 {
1914 // I disable this code as it doesn't seem to be faster (in fact, it seems
1915 // to be much slower) than the old, hand-written code below and using it
1916 // here requires always linking with libregex even if the user code doesn't
1917 // use it
1918 #if 0 // wxUSE_REGEX
1919 // first translate the shell-like mask into a regex
1920 wxString pattern;
1921 pattern.reserve(wxStrlen(pszMask));
1922
1923 pattern += _T('^');
1924 while ( *pszMask )
1925 {
1926 switch ( *pszMask )
1927 {
1928 case _T('?'):
1929 pattern += _T('.');
1930 break;
1931
1932 case _T('*'):
1933 pattern += _T(".*");
1934 break;
1935
1936 case _T('^'):
1937 case _T('.'):
1938 case _T('$'):
1939 case _T('('):
1940 case _T(')'):
1941 case _T('|'):
1942 case _T('+'):
1943 case _T('\\'):
1944 // these characters are special in a RE, quote them
1945 // (however note that we don't quote '[' and ']' to allow
1946 // using them for Unix shell like matching)
1947 pattern += _T('\\');
1948 // fall through
1949
1950 default:
1951 pattern += *pszMask;
1952 }
1953
1954 pszMask++;
1955 }
1956 pattern += _T('$');
1957
1958 // and now use it
1959 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1960 #else // !wxUSE_REGEX
1961 // TODO: this is, of course, awfully inefficient...
1962
1963 // FIXME-UTF8: implement using iterators, remove #if
1964 #if wxUSE_UNICODE_UTF8
1965 wxWCharBuffer maskBuf = mask.wc_str();
1966 wxWCharBuffer txtBuf = wc_str();
1967 const wxChar *pszMask = maskBuf.data();
1968 const wxChar *pszTxt = txtBuf.data();
1969 #else
1970 const wxChar *pszMask = mask.wx_str();
1971 // the char currently being checked
1972 const wxChar *pszTxt = wx_str();
1973 #endif
1974
1975 // the last location where '*' matched
1976 const wxChar *pszLastStarInText = NULL;
1977 const wxChar *pszLastStarInMask = NULL;
1978
1979 match:
1980 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
1981 switch ( *pszMask ) {
1982 case wxT('?'):
1983 if ( *pszTxt == wxT('\0') )
1984 return false;
1985
1986 // pszTxt and pszMask will be incremented in the loop statement
1987
1988 break;
1989
1990 case wxT('*'):
1991 {
1992 // remember where we started to be able to backtrack later
1993 pszLastStarInText = pszTxt;
1994 pszLastStarInMask = pszMask;
1995
1996 // ignore special chars immediately following this one
1997 // (should this be an error?)
1998 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
1999 pszMask++;
2000
2001 // if there is nothing more, match
2002 if ( *pszMask == wxT('\0') )
2003 return true;
2004
2005 // are there any other metacharacters in the mask?
2006 size_t uiLenMask;
2007 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2008
2009 if ( pEndMask != NULL ) {
2010 // we have to match the string between two metachars
2011 uiLenMask = pEndMask - pszMask;
2012 }
2013 else {
2014 // we have to match the remainder of the string
2015 uiLenMask = wxStrlen(pszMask);
2016 }
2017
2018 wxString strToMatch(pszMask, uiLenMask);
2019 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2020 if ( pMatch == NULL )
2021 return false;
2022
2023 // -1 to compensate "++" in the loop
2024 pszTxt = pMatch + uiLenMask - 1;
2025 pszMask += uiLenMask - 1;
2026 }
2027 break;
2028
2029 default:
2030 if ( *pszMask != *pszTxt )
2031 return false;
2032 break;
2033 }
2034 }
2035
2036 // match only if nothing left
2037 if ( *pszTxt == wxT('\0') )
2038 return true;
2039
2040 // if we failed to match, backtrack if we can
2041 if ( pszLastStarInText ) {
2042 pszTxt = pszLastStarInText + 1;
2043 pszMask = pszLastStarInMask;
2044
2045 pszLastStarInText = NULL;
2046
2047 // don't bother resetting pszLastStarInMask, it's unnecessary
2048
2049 goto match;
2050 }
2051
2052 return false;
2053 #endif // wxUSE_REGEX/!wxUSE_REGEX
2054 }
2055
2056 // Count the number of chars
2057 int wxString::Freq(wxUniChar ch) const
2058 {
2059 int count = 0;
2060 for ( const_iterator i = begin(); i != end(); ++i )
2061 {
2062 if ( *i == ch )
2063 count ++;
2064 }
2065 return count;
2066 }
2067
2068 // ----------------------------------------------------------------------------
2069 // wxUTF8StringBuffer
2070 // ----------------------------------------------------------------------------
2071
2072 #if wxUSE_UNICODE_WCHAR
2073 wxUTF8StringBuffer::~wxUTF8StringBuffer()
2074 {
2075 wxMBConvStrictUTF8 conv;
2076 size_t wlen = conv.ToWChar(NULL, 0, m_buf);
2077 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2078
2079 wxStringInternalBuffer wbuf(m_str, wlen);
2080 conv.ToWChar(wbuf, wlen, m_buf);
2081 }
2082
2083 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2084 {
2085 wxCHECK_RET(m_lenSet, "length not set");
2086
2087 wxMBConvStrictUTF8 conv;
2088 size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
2089 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2090
2091 wxStringInternalBufferLength wbuf(m_str, wlen);
2092 conv.ToWChar(wbuf, wlen, m_buf, m_len);
2093 wbuf.SetLength(wlen);
2094 }
2095 #endif // wxUSE_UNICODE_WCHAR
2096
2097 // ----------------------------------------------------------------------------
2098 // wxCharBufferType<T>
2099 // ----------------------------------------------------------------------------
2100
2101 template<>
2102 wxCharTypeBuffer<char>::Data
2103 wxCharTypeBuffer<char>::NullData(NULL);
2104
2105 template<>
2106 wxCharTypeBuffer<wchar_t>::Data
2107 wxCharTypeBuffer<wchar_t>::NullData(NULL);