]> git.saurik.com Git - wxWidgets.git/blob - src/common/string.cpp
add very simple (but already exposing many problems) wxIPC benchmark
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
16
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #endif
28
29 #include <ctype.h>
30
31 #ifndef __WXWINCE__
32 #include <errno.h>
33 #endif
34
35 #include <string.h>
36 #include <stdlib.h>
37
38 #include "wx/hashmap.h"
39
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
46 #else
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
51 #endif
52
53
54 // ---------------------------------------------------------------------------
55 // static class variables definition
56 // ---------------------------------------------------------------------------
57
58 //According to STL _must_ be a -1 size_t
59 const size_t wxString::npos = (size_t) -1;
60
61 #if wxUSE_STRING_POS_CACHE
62
63 #ifdef wxHAS_COMPILER_TLS
64
65 wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
66
67 #else // !wxHAS_COMPILER_TLS
68
69 struct wxStrCacheInitializer
70 {
71 wxStrCacheInitializer()
72 {
73 // calling this function triggers s_cache initialization in it, and
74 // from now on it becomes safe to call from multiple threads
75 wxString::GetCache();
76 }
77 };
78
79 /*
80 wxString::Cache& wxString::GetCache()
81 {
82 static wxTLS_TYPE(Cache) s_cache;
83
84 return wxTLS_VALUE(s_cache);
85 }
86 */
87
88 static wxStrCacheInitializer gs_stringCacheInit;
89
90 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
91
92 // gdb seems to be unable to display thread-local variables correctly, at least
93 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
94 #ifdef __WXDEBUG__
95
96 struct wxStrCacheDumper
97 {
98 static void ShowAll()
99 {
100 puts("*** wxString cache dump:");
101 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
102 {
103 const wxString::Cache::Element&
104 c = wxString::GetCacheBegin()[n];
105
106 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
107 n,
108 n == wxString::LastUsedCacheElement() ? " [*]" : "",
109 c.str,
110 (unsigned long)c.pos,
111 (unsigned long)c.impl,
112 (long)c.len);
113 }
114 }
115 };
116
117 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
118
119 #endif // __WXDEBUG__
120
121 #ifdef wxPROFILE_STRING_CACHE
122
123 wxString::CacheStats wxString::ms_cacheStats;
124
125 struct wxStrCacheStatsDumper
126 {
127 ~wxStrCacheStatsDumper()
128 {
129 const wxString::CacheStats& stats = wxString::ms_cacheStats;
130
131 if ( stats.postot )
132 {
133 puts("*** wxString cache statistics:");
134 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
135 stats.postot);
136 printf("\tHits %u (of which %u not used) or %.2f%%\n",
137 stats.poshits,
138 stats.mishits,
139 100.*float(stats.poshits - stats.mishits)/stats.postot);
140 printf("\tAverage position requested: %.2f\n",
141 float(stats.sumpos) / stats.postot);
142 printf("\tAverage offset after cached hint: %.2f\n",
143 float(stats.sumofs) / stats.postot);
144 }
145
146 if ( stats.lentot )
147 {
148 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
149 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
150 }
151 }
152 };
153
154 static wxStrCacheStatsDumper s_showCacheStats;
155
156 #endif // wxPROFILE_STRING_CACHE
157
158 #endif // wxUSE_STRING_POS_CACHE
159
160 // ----------------------------------------------------------------------------
161 // global functions
162 // ----------------------------------------------------------------------------
163
164 #if wxUSE_STD_IOSTREAM
165
166 #include <iostream>
167
168 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
169 {
170 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
171 const wxCharBuffer buf(str.AsCharBuf());
172 if ( !buf )
173 os.clear(wxSTD ios_base::failbit);
174 else
175 os << buf.data();
176
177 return os;
178 #else
179 return os << str.AsInternal();
180 #endif
181 }
182
183 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
184 {
185 return os << str.c_str();
186 }
187
188 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
189 {
190 return os << str.data();
191 }
192
193 #ifndef __BORLANDC__
194 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
195 {
196 return os << str.data();
197 }
198 #endif
199
200 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
201
202 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
203 {
204 return wos << str.wc_str();
205 }
206
207 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
208 {
209 return wos << str.AsWChar();
210 }
211
212 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
213 {
214 return wos << str.data();
215 }
216
217 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
218
219 #endif // wxUSE_STD_IOSTREAM
220
221 // ===========================================================================
222 // wxString class core
223 // ===========================================================================
224
225 #if wxUSE_UNICODE_UTF8
226
227 void wxString::PosLenToImpl(size_t pos, size_t len,
228 size_t *implPos, size_t *implLen) const
229 {
230 if ( pos == npos )
231 {
232 *implPos = npos;
233 }
234 else // have valid start position
235 {
236 const const_iterator b = GetIterForNthChar(pos);
237 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
238 if ( len == npos )
239 {
240 *implLen = npos;
241 }
242 else // have valid length too
243 {
244 // we need to handle the case of length specifying a substring
245 // going beyond the end of the string, just as std::string does
246 const const_iterator e(end());
247 const_iterator i(b);
248 while ( len && i <= e )
249 {
250 ++i;
251 --len;
252 }
253
254 *implLen = i.impl() - b.impl();
255 }
256 }
257 }
258
259 #endif // wxUSE_UNICODE_UTF8
260
261 // ----------------------------------------------------------------------------
262 // wxCStrData converted strings caching
263 // ----------------------------------------------------------------------------
264
265 // FIXME-UTF8: temporarily disabled because it doesn't work with global
266 // string objects; re-enable after fixing this bug and benchmarking
267 // performance to see if using a hash is a good idea at all
268 #if 0
269
270 // For backward compatibility reasons, it must be possible to assign the value
271 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
272 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
273 // because the memory would be freed immediately, but it has to be valid as long
274 // as the string is not modified, so that code like this still works:
275 //
276 // const wxChar *s = str.c_str();
277 // while ( s ) { ... }
278
279 // FIXME-UTF8: not thread safe!
280 // FIXME-UTF8: we currently clear the cached conversion only when the string is
281 // destroyed, but we should do it when the string is modified, to
282 // keep memory usage down
283 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
284 // invalidated the cache on every change, we could keep the previous
285 // conversion
286 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
287 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
288
289 template<typename T>
290 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
291 {
292 typename T::iterator i = hash.find(wxConstCast(s, wxString));
293 if ( i != hash.end() )
294 {
295 free(i->second);
296 hash.erase(i);
297 }
298 }
299
300 #if wxUSE_UNICODE
301 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
302 // so we have to use wxString* here and const-cast when used
303 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
304 wxStringCharConversionCache);
305 static wxStringCharConversionCache gs_stringsCharCache;
306
307 const char* wxCStrData::AsChar() const
308 {
309 // remove previously cache value, if any (see FIXMEs above):
310 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
311
312 // convert the string and keep it:
313 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
314 m_str->mb_str().release();
315
316 return s + m_offset;
317 }
318 #endif // wxUSE_UNICODE
319
320 #if !wxUSE_UNICODE_WCHAR
321 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
322 wxStringWCharConversionCache);
323 static wxStringWCharConversionCache gs_stringsWCharCache;
324
325 const wchar_t* wxCStrData::AsWChar() const
326 {
327 // remove previously cache value, if any (see FIXMEs above):
328 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
329
330 // convert the string and keep it:
331 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
332 m_str->wc_str().release();
333
334 return s + m_offset;
335 }
336 #endif // !wxUSE_UNICODE_WCHAR
337
338 wxString::~wxString()
339 {
340 #if wxUSE_UNICODE
341 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
342 DeleteStringFromConversionCache(gs_stringsCharCache, this);
343 #endif
344 #if !wxUSE_UNICODE_WCHAR
345 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
346 #endif
347 }
348 #endif
349
350 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
351 const char* wxCStrData::AsChar() const
352 {
353 #if wxUSE_UNICODE_UTF8
354 if ( wxLocaleIsUtf8 )
355 return AsInternal();
356 #endif
357 // under non-UTF8 locales, we have to convert the internal UTF-8
358 // representation using wxConvLibc and cache the result
359
360 wxString *str = wxConstCast(m_str, wxString);
361
362 // convert the string:
363 //
364 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
365 // have it) but it's unfortunately not obvious to implement
366 // because we don't know how big buffer do we need for the
367 // given string length (in case of multibyte encodings, e.g.
368 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
369 //
370 // One idea would be to store more than just m_convertedToChar
371 // in wxString: then we could record the length of the string
372 // which was converted the last time and try to reuse the same
373 // buffer if the current length is not greater than it (this
374 // could still fail because string could have been modified in
375 // place but it would work most of the time, so we'd do it and
376 // only allocate the new buffer if in-place conversion returned
377 // an error). We could also store a bit saying if the string
378 // was modified since the last conversion (and update it in all
379 // operation modifying the string, of course) to avoid unneeded
380 // consequential conversions. But both of these ideas require
381 // adding more fields to wxString and require profiling results
382 // to be sure that we really gain enough from them to justify
383 // doing it.
384 wxCharBuffer buf(str->mb_str());
385
386 // if it failed, return empty string and not NULL to avoid crashes in code
387 // written with either wxWidgets 2 wxString or std::string behaviour in
388 // mind: neither of them ever returns NULL and so we shouldn't neither
389 if ( !buf )
390 return "";
391
392 if ( str->m_convertedToChar &&
393 strlen(buf) == strlen(str->m_convertedToChar) )
394 {
395 // keep the same buffer for as long as possible, so that several calls
396 // to c_str() in a row still work:
397 strcpy(str->m_convertedToChar, buf);
398 }
399 else
400 {
401 str->m_convertedToChar = buf.release();
402 }
403
404 // and keep it:
405 return str->m_convertedToChar + m_offset;
406 }
407 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
408
409 #if !wxUSE_UNICODE_WCHAR
410 const wchar_t* wxCStrData::AsWChar() const
411 {
412 wxString *str = wxConstCast(m_str, wxString);
413
414 // convert the string:
415 wxWCharBuffer buf(str->wc_str());
416
417 // notice that here, unlike above in AsChar(), conversion can't fail as our
418 // internal UTF-8 is always well-formed -- or the string was corrupted and
419 // all bets are off anyhow
420
421 // FIXME-UTF8: do the conversion in-place in the existing buffer
422 if ( str->m_convertedToWChar &&
423 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
424 {
425 // keep the same buffer for as long as possible, so that several calls
426 // to c_str() in a row still work:
427 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
428 }
429 else
430 {
431 str->m_convertedToWChar = buf.release();
432 }
433
434 // and keep it:
435 return str->m_convertedToWChar + m_offset;
436 }
437 #endif // !wxUSE_UNICODE_WCHAR
438
439 // ===========================================================================
440 // wxString class core
441 // ===========================================================================
442
443 // ---------------------------------------------------------------------------
444 // construction and conversion
445 // ---------------------------------------------------------------------------
446
447 #if wxUSE_UNICODE_WCHAR
448 /* static */
449 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
450 const wxMBConv& conv)
451 {
452 // anything to do?
453 if ( !psz || nLength == 0 )
454 return SubstrBufFromMB(L"", 0);
455
456 if ( nLength == npos )
457 nLength = wxNO_LEN;
458
459 size_t wcLen;
460 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
461 if ( !wcLen )
462 return SubstrBufFromMB(_T(""), 0);
463 else
464 return SubstrBufFromMB(wcBuf, wcLen);
465 }
466 #endif // wxUSE_UNICODE_WCHAR
467
468 #if wxUSE_UNICODE_UTF8
469 /* static */
470 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
471 const wxMBConv& conv)
472 {
473 // anything to do?
474 if ( !psz || nLength == 0 )
475 return SubstrBufFromMB("", 0);
476
477 // if psz is already in UTF-8, we don't have to do the roundtrip to
478 // wchar_t* and back:
479 if ( conv.IsUTF8() )
480 {
481 // we need to validate the input because UTF8 iterators assume valid
482 // UTF-8 sequence and psz may be invalid:
483 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
484 {
485 // we must pass the real string length to SubstrBufFromMB ctor
486 if ( nLength == npos )
487 nLength = psz ? strlen(psz) : 0;
488 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
489 }
490 // else: do the roundtrip through wchar_t*
491 }
492
493 if ( nLength == npos )
494 nLength = wxNO_LEN;
495
496 // first convert to wide string:
497 size_t wcLen;
498 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
499 if ( !wcLen )
500 return SubstrBufFromMB("", 0);
501
502 // and then to UTF-8:
503 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
504 // widechar -> UTF-8 conversion isn't supposed to ever fail:
505 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
506
507 return buf;
508 }
509 #endif // wxUSE_UNICODE_UTF8
510
511 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
512 /* static */
513 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
514 const wxMBConv& conv)
515 {
516 // anything to do?
517 if ( !pwz || nLength == 0 )
518 return SubstrBufFromWC("", 0);
519
520 if ( nLength == npos )
521 nLength = wxNO_LEN;
522
523 size_t mbLen;
524 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
525 if ( !mbLen )
526 return SubstrBufFromWC("", 0);
527 else
528 return SubstrBufFromWC(mbBuf, mbLen);
529 }
530 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
531
532
533 #if wxUSE_UNICODE_WCHAR
534
535 //Convert wxString in Unicode mode to a multi-byte string
536 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
537 {
538 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
539 }
540
541 #elif wxUSE_UNICODE_UTF8
542
543 const wxWCharBuffer wxString::wc_str() const
544 {
545 return wxMBConvStrictUTF8().cMB2WC
546 (
547 m_impl.c_str(),
548 m_impl.length() + 1, // size, not length
549 NULL
550 );
551 }
552
553 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
554 {
555 if ( conv.IsUTF8() )
556 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
557
558 // FIXME-UTF8: use wc_str() here once we have buffers with length
559
560 size_t wcLen;
561 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
562 (
563 m_impl.c_str(),
564 m_impl.length() + 1, // size
565 &wcLen
566 ));
567 if ( !wcLen )
568 return wxCharBuffer("");
569
570 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
571 }
572
573 #else // ANSI
574
575 //Converts this string to a wide character string if unicode
576 //mode is not enabled and wxUSE_WCHAR_T is enabled
577 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
578 {
579 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
580 }
581
582 #endif // Unicode/ANSI
583
584 // shrink to minimal size (releasing extra memory)
585 bool wxString::Shrink()
586 {
587 wxString tmp(begin(), end());
588 swap(tmp);
589 return tmp.length() == length();
590 }
591
592 // deprecated compatibility code:
593 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
594 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
595 {
596 return DoGetWriteBuf(nLen);
597 }
598
599 void wxString::UngetWriteBuf()
600 {
601 DoUngetWriteBuf();
602 }
603
604 void wxString::UngetWriteBuf(size_t nLen)
605 {
606 DoUngetWriteBuf(nLen);
607 }
608 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
609
610
611 // ---------------------------------------------------------------------------
612 // data access
613 // ---------------------------------------------------------------------------
614
615 // all functions are inline in string.h
616
617 // ---------------------------------------------------------------------------
618 // concatenation operators
619 // ---------------------------------------------------------------------------
620
621 /*
622 * concatenation functions come in 5 flavours:
623 * string + string
624 * char + string and string + char
625 * C str + string and string + C str
626 */
627
628 wxString operator+(const wxString& str1, const wxString& str2)
629 {
630 #if !wxUSE_STL_BASED_WXSTRING
631 wxASSERT( str1.IsValid() );
632 wxASSERT( str2.IsValid() );
633 #endif
634
635 wxString s = str1;
636 s += str2;
637
638 return s;
639 }
640
641 wxString operator+(const wxString& str, wxUniChar ch)
642 {
643 #if !wxUSE_STL_BASED_WXSTRING
644 wxASSERT( str.IsValid() );
645 #endif
646
647 wxString s = str;
648 s += ch;
649
650 return s;
651 }
652
653 wxString operator+(wxUniChar ch, const wxString& str)
654 {
655 #if !wxUSE_STL_BASED_WXSTRING
656 wxASSERT( str.IsValid() );
657 #endif
658
659 wxString s = ch;
660 s += str;
661
662 return s;
663 }
664
665 wxString operator+(const wxString& str, const char *psz)
666 {
667 #if !wxUSE_STL_BASED_WXSTRING
668 wxASSERT( str.IsValid() );
669 #endif
670
671 wxString s;
672 if ( !s.Alloc(strlen(psz) + str.length()) ) {
673 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
674 }
675 s += str;
676 s += psz;
677
678 return s;
679 }
680
681 wxString operator+(const wxString& str, const wchar_t *pwz)
682 {
683 #if !wxUSE_STL_BASED_WXSTRING
684 wxASSERT( str.IsValid() );
685 #endif
686
687 wxString s;
688 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
689 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
690 }
691 s += str;
692 s += pwz;
693
694 return s;
695 }
696
697 wxString operator+(const char *psz, const wxString& str)
698 {
699 #if !wxUSE_STL_BASED_WXSTRING
700 wxASSERT( str.IsValid() );
701 #endif
702
703 wxString s;
704 if ( !s.Alloc(strlen(psz) + str.length()) ) {
705 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
706 }
707 s = psz;
708 s += str;
709
710 return s;
711 }
712
713 wxString operator+(const wchar_t *pwz, const wxString& str)
714 {
715 #if !wxUSE_STL_BASED_WXSTRING
716 wxASSERT( str.IsValid() );
717 #endif
718
719 wxString s;
720 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
721 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
722 }
723 s = pwz;
724 s += str;
725
726 return s;
727 }
728
729 // ---------------------------------------------------------------------------
730 // string comparison
731 // ---------------------------------------------------------------------------
732
733 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
734 {
735 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
736 : wxToupper(GetChar(0u)) == wxToupper(c));
737 }
738
739 #ifdef HAVE_STD_STRING_COMPARE
740
741 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
742 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
743 // sort strings in characters code point order by sorting the byte sequence
744 // in byte values order (i.e. what strcmp() and memcmp() do).
745
746 int wxString::compare(const wxString& str) const
747 {
748 return m_impl.compare(str.m_impl);
749 }
750
751 int wxString::compare(size_t nStart, size_t nLen,
752 const wxString& str) const
753 {
754 size_t pos, len;
755 PosLenToImpl(nStart, nLen, &pos, &len);
756 return m_impl.compare(pos, len, str.m_impl);
757 }
758
759 int wxString::compare(size_t nStart, size_t nLen,
760 const wxString& str,
761 size_t nStart2, size_t nLen2) const
762 {
763 size_t pos, len;
764 PosLenToImpl(nStart, nLen, &pos, &len);
765
766 size_t pos2, len2;
767 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
768
769 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
770 }
771
772 int wxString::compare(const char* sz) const
773 {
774 return m_impl.compare(ImplStr(sz));
775 }
776
777 int wxString::compare(const wchar_t* sz) const
778 {
779 return m_impl.compare(ImplStr(sz));
780 }
781
782 int wxString::compare(size_t nStart, size_t nLen,
783 const char* sz, size_t nCount) const
784 {
785 size_t pos, len;
786 PosLenToImpl(nStart, nLen, &pos, &len);
787
788 SubstrBufFromMB str(ImplStr(sz, nCount));
789
790 return m_impl.compare(pos, len, str.data, str.len);
791 }
792
793 int wxString::compare(size_t nStart, size_t nLen,
794 const wchar_t* sz, size_t nCount) const
795 {
796 size_t pos, len;
797 PosLenToImpl(nStart, nLen, &pos, &len);
798
799 SubstrBufFromWC str(ImplStr(sz, nCount));
800
801 return m_impl.compare(pos, len, str.data, str.len);
802 }
803
804 #else // !HAVE_STD_STRING_COMPARE
805
806 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
807 const wxStringCharType* s2, size_t l2)
808 {
809 if( l1 == l2 )
810 return wxStringMemcmp(s1, s2, l1);
811 else if( l1 < l2 )
812 {
813 int ret = wxStringMemcmp(s1, s2, l1);
814 return ret == 0 ? -1 : ret;
815 }
816 else
817 {
818 int ret = wxStringMemcmp(s1, s2, l2);
819 return ret == 0 ? +1 : ret;
820 }
821 }
822
823 int wxString::compare(const wxString& str) const
824 {
825 return ::wxDoCmp(m_impl.data(), m_impl.length(),
826 str.m_impl.data(), str.m_impl.length());
827 }
828
829 int wxString::compare(size_t nStart, size_t nLen,
830 const wxString& str) const
831 {
832 wxASSERT(nStart <= length());
833 size_type strLen = length() - nStart;
834 nLen = strLen < nLen ? strLen : nLen;
835
836 size_t pos, len;
837 PosLenToImpl(nStart, nLen, &pos, &len);
838
839 return ::wxDoCmp(m_impl.data() + pos, len,
840 str.m_impl.data(), str.m_impl.length());
841 }
842
843 int wxString::compare(size_t nStart, size_t nLen,
844 const wxString& str,
845 size_t nStart2, size_t nLen2) const
846 {
847 wxASSERT(nStart <= length());
848 wxASSERT(nStart2 <= str.length());
849 size_type strLen = length() - nStart,
850 strLen2 = str.length() - nStart2;
851 nLen = strLen < nLen ? strLen : nLen;
852 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
853
854 size_t pos, len;
855 PosLenToImpl(nStart, nLen, &pos, &len);
856 size_t pos2, len2;
857 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
858
859 return ::wxDoCmp(m_impl.data() + pos, len,
860 str.m_impl.data() + pos2, len2);
861 }
862
863 int wxString::compare(const char* sz) const
864 {
865 SubstrBufFromMB str(ImplStr(sz, npos));
866 if ( str.len == npos )
867 str.len = wxStringStrlen(str.data);
868 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
869 }
870
871 int wxString::compare(const wchar_t* sz) const
872 {
873 SubstrBufFromWC str(ImplStr(sz, npos));
874 if ( str.len == npos )
875 str.len = wxStringStrlen(str.data);
876 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
877 }
878
879 int wxString::compare(size_t nStart, size_t nLen,
880 const char* sz, size_t nCount) const
881 {
882 wxASSERT(nStart <= length());
883 size_type strLen = length() - nStart;
884 nLen = strLen < nLen ? strLen : nLen;
885
886 size_t pos, len;
887 PosLenToImpl(nStart, nLen, &pos, &len);
888
889 SubstrBufFromMB str(ImplStr(sz, nCount));
890 if ( str.len == npos )
891 str.len = wxStringStrlen(str.data);
892
893 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
894 }
895
896 int wxString::compare(size_t nStart, size_t nLen,
897 const wchar_t* sz, size_t nCount) const
898 {
899 wxASSERT(nStart <= length());
900 size_type strLen = length() - nStart;
901 nLen = strLen < nLen ? strLen : nLen;
902
903 size_t pos, len;
904 PosLenToImpl(nStart, nLen, &pos, &len);
905
906 SubstrBufFromWC str(ImplStr(sz, nCount));
907 if ( str.len == npos )
908 str.len = wxStringStrlen(str.data);
909
910 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
911 }
912
913 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
914
915
916 // ---------------------------------------------------------------------------
917 // find_{first,last}_[not]_of functions
918 // ---------------------------------------------------------------------------
919
920 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
921
922 // NB: All these functions are implemented with the argument being wxChar*,
923 // i.e. widechar string in any Unicode build, even though native string
924 // representation is char* in the UTF-8 build. This is because we couldn't
925 // use memchr() to determine if a character is in a set encoded as UTF-8.
926
927 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
928 {
929 return find_first_of(sz, nStart, wxStrlen(sz));
930 }
931
932 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
933 {
934 return find_first_not_of(sz, nStart, wxStrlen(sz));
935 }
936
937 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
938 {
939 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
940
941 size_t idx = nStart;
942 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
943 {
944 if ( wxTmemchr(sz, *i, n) )
945 return idx;
946 }
947
948 return npos;
949 }
950
951 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
952 {
953 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
954
955 size_t idx = nStart;
956 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
957 {
958 if ( !wxTmemchr(sz, *i, n) )
959 return idx;
960 }
961
962 return npos;
963 }
964
965
966 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
967 {
968 return find_last_of(sz, nStart, wxStrlen(sz));
969 }
970
971 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
972 {
973 return find_last_not_of(sz, nStart, wxStrlen(sz));
974 }
975
976 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
977 {
978 size_t len = length();
979
980 if ( nStart == npos )
981 {
982 nStart = len - 1;
983 }
984 else
985 {
986 wxASSERT_MSG( nStart <= len, _T("invalid index") );
987 }
988
989 size_t idx = nStart;
990 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
991 i != rend(); --idx, ++i )
992 {
993 if ( wxTmemchr(sz, *i, n) )
994 return idx;
995 }
996
997 return npos;
998 }
999
1000 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
1001 {
1002 size_t len = length();
1003
1004 if ( nStart == npos )
1005 {
1006 nStart = len - 1;
1007 }
1008 else
1009 {
1010 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1011 }
1012
1013 size_t idx = nStart;
1014 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1015 i != rend(); --idx, ++i )
1016 {
1017 if ( !wxTmemchr(sz, *i, n) )
1018 return idx;
1019 }
1020
1021 return npos;
1022 }
1023
1024 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
1025 {
1026 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
1027
1028 size_t idx = nStart;
1029 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1030 {
1031 if ( *i != ch )
1032 return idx;
1033 }
1034
1035 return npos;
1036 }
1037
1038 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1039 {
1040 size_t len = length();
1041
1042 if ( nStart == npos )
1043 {
1044 nStart = len - 1;
1045 }
1046 else
1047 {
1048 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1049 }
1050
1051 size_t idx = nStart;
1052 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1053 i != rend(); --idx, ++i )
1054 {
1055 if ( *i != ch )
1056 return idx;
1057 }
1058
1059 return npos;
1060 }
1061
1062 // the functions above were implemented for wchar_t* arguments in Unicode
1063 // build and char* in ANSI build; below are implementations for the other
1064 // version:
1065 #if wxUSE_UNICODE
1066 #define wxOtherCharType char
1067 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1068 #else
1069 #define wxOtherCharType wchar_t
1070 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1071 #endif
1072
1073 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1074 { return find_first_of(STRCONV(sz), nStart); }
1075
1076 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1077 size_t n) const
1078 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1079 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1080 { return find_last_of(STRCONV(sz), nStart); }
1081 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1082 size_t n) const
1083 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1084 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1085 { return find_first_not_of(STRCONV(sz), nStart); }
1086 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1087 size_t n) const
1088 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1089 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1090 { return find_last_not_of(STRCONV(sz), nStart); }
1091 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1092 size_t n) const
1093 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1094
1095 #undef wxOtherCharType
1096 #undef STRCONV
1097
1098 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1099
1100 // ===========================================================================
1101 // other common string functions
1102 // ===========================================================================
1103
1104 int wxString::CmpNoCase(const wxString& s) const
1105 {
1106 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1107
1108 const_iterator i1 = begin();
1109 const_iterator end1 = end();
1110 const_iterator i2 = s.begin();
1111 const_iterator end2 = s.end();
1112
1113 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1114 {
1115 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1116 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1117 if ( lower1 != lower2 )
1118 return lower1 < lower2 ? -1 : 1;
1119 }
1120
1121 size_t len1 = length();
1122 size_t len2 = s.length();
1123
1124 if ( len1 < len2 )
1125 return -1;
1126 else if ( len1 > len2 )
1127 return 1;
1128 return 0;
1129 }
1130
1131
1132 #if wxUSE_UNICODE
1133
1134 #ifdef __MWERKS__
1135 #ifndef __SCHAR_MAX__
1136 #define __SCHAR_MAX__ 127
1137 #endif
1138 #endif
1139
1140 wxString wxString::FromAscii(const char *ascii, size_t len)
1141 {
1142 if (!ascii || len == 0)
1143 return wxEmptyString;
1144
1145 wxString res;
1146
1147 {
1148 wxStringInternalBuffer buf(res, len);
1149 wxStringCharType *dest = buf;
1150
1151 for ( ; len > 0; --len )
1152 {
1153 unsigned char c = (unsigned char)*ascii++;
1154 wxASSERT_MSG( c < 0x80,
1155 _T("Non-ASCII value passed to FromAscii().") );
1156
1157 *dest++ = (wchar_t)c;
1158 }
1159 }
1160
1161 return res;
1162 }
1163
1164 wxString wxString::FromAscii(const char *ascii)
1165 {
1166 return FromAscii(ascii, wxStrlen(ascii));
1167 }
1168
1169 wxString wxString::FromAscii(char ascii)
1170 {
1171 // What do we do with '\0' ?
1172
1173 unsigned char c = (unsigned char)ascii;
1174
1175 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1176
1177 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1178 return wxString(wxUniChar((wchar_t)c));
1179 }
1180
1181 const wxCharBuffer wxString::ToAscii() const
1182 {
1183 // this will allocate enough space for the terminating NUL too
1184 wxCharBuffer buffer(length());
1185 char *dest = buffer.data();
1186
1187 for ( const_iterator i = begin(); i != end(); ++i )
1188 {
1189 wxUniChar c(*i);
1190 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1191 *dest++ = c.IsAscii() ? (char)c : '_';
1192
1193 // the output string can't have embedded NULs anyhow, so we can safely
1194 // stop at first of them even if we do have any
1195 if ( !c )
1196 break;
1197 }
1198
1199 return buffer;
1200 }
1201
1202 #endif // wxUSE_UNICODE
1203
1204 // extract string of length nCount starting at nFirst
1205 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1206 {
1207 size_t nLen = length();
1208
1209 // default value of nCount is npos and means "till the end"
1210 if ( nCount == npos )
1211 {
1212 nCount = nLen - nFirst;
1213 }
1214
1215 // out-of-bounds requests return sensible things
1216 if ( nFirst + nCount > nLen )
1217 {
1218 nCount = nLen - nFirst;
1219 }
1220
1221 if ( nFirst > nLen )
1222 {
1223 // AllocCopy() will return empty string
1224 return wxEmptyString;
1225 }
1226
1227 wxString dest(*this, nFirst, nCount);
1228 if ( dest.length() != nCount )
1229 {
1230 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1231 }
1232
1233 return dest;
1234 }
1235
1236 // check that the string starts with prefix and return the rest of the string
1237 // in the provided pointer if it is not NULL, otherwise return false
1238 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1239 {
1240 if ( compare(0, prefix.length(), prefix) != 0 )
1241 return false;
1242
1243 if ( rest )
1244 {
1245 // put the rest of the string into provided pointer
1246 rest->assign(*this, prefix.length(), npos);
1247 }
1248
1249 return true;
1250 }
1251
1252
1253 // check that the string ends with suffix and return the rest of it in the
1254 // provided pointer if it is not NULL, otherwise return false
1255 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1256 {
1257 int start = length() - suffix.length();
1258
1259 if ( start < 0 || compare(start, npos, suffix) != 0 )
1260 return false;
1261
1262 if ( rest )
1263 {
1264 // put the rest of the string into provided pointer
1265 rest->assign(*this, 0, start);
1266 }
1267
1268 return true;
1269 }
1270
1271
1272 // extract nCount last (rightmost) characters
1273 wxString wxString::Right(size_t nCount) const
1274 {
1275 if ( nCount > length() )
1276 nCount = length();
1277
1278 wxString dest(*this, length() - nCount, nCount);
1279 if ( dest.length() != nCount ) {
1280 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1281 }
1282 return dest;
1283 }
1284
1285 // get all characters after the last occurrence of ch
1286 // (returns the whole string if ch not found)
1287 wxString wxString::AfterLast(wxUniChar ch) const
1288 {
1289 wxString str;
1290 int iPos = Find(ch, true);
1291 if ( iPos == wxNOT_FOUND )
1292 str = *this;
1293 else
1294 str.assign(*this, iPos + 1, npos);
1295
1296 return str;
1297 }
1298
1299 // extract nCount first (leftmost) characters
1300 wxString wxString::Left(size_t nCount) const
1301 {
1302 if ( nCount > length() )
1303 nCount = length();
1304
1305 wxString dest(*this, 0, nCount);
1306 if ( dest.length() != nCount ) {
1307 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1308 }
1309 return dest;
1310 }
1311
1312 // get all characters before the first occurrence of ch
1313 // (returns the whole string if ch not found)
1314 wxString wxString::BeforeFirst(wxUniChar ch) const
1315 {
1316 int iPos = Find(ch);
1317 if ( iPos == wxNOT_FOUND )
1318 iPos = length();
1319 return wxString(*this, 0, iPos);
1320 }
1321
1322 /// get all characters before the last occurrence of ch
1323 /// (returns empty string if ch not found)
1324 wxString wxString::BeforeLast(wxUniChar ch) const
1325 {
1326 wxString str;
1327 int iPos = Find(ch, true);
1328 if ( iPos != wxNOT_FOUND && iPos != 0 )
1329 str = wxString(c_str(), iPos);
1330
1331 return str;
1332 }
1333
1334 /// get all characters after the first occurrence of ch
1335 /// (returns empty string if ch not found)
1336 wxString wxString::AfterFirst(wxUniChar ch) const
1337 {
1338 wxString str;
1339 int iPos = Find(ch);
1340 if ( iPos != wxNOT_FOUND )
1341 str.assign(*this, iPos + 1, npos);
1342
1343 return str;
1344 }
1345
1346 // replace first (or all) occurrences of some substring with another one
1347 size_t wxString::Replace(const wxString& strOld,
1348 const wxString& strNew, bool bReplaceAll)
1349 {
1350 // if we tried to replace an empty string we'd enter an infinite loop below
1351 wxCHECK_MSG( !strOld.empty(), 0,
1352 _T("wxString::Replace(): invalid parameter") );
1353
1354 wxSTRING_INVALIDATE_CACHE();
1355
1356 size_t uiCount = 0; // count of replacements made
1357
1358 // optimize the special common case: replacement of one character by
1359 // another one (in UTF-8 case we can only do this for ASCII characters)
1360 //
1361 // benchmarks show that this special version is around 3 times faster
1362 // (depending on the proportion of matching characters and UTF-8/wchar_t
1363 // build)
1364 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1365 {
1366 const wxStringCharType chOld = strOld.m_impl[0],
1367 chNew = strNew.m_impl[0];
1368
1369 // this loop is the simplified version of the one below
1370 for ( size_t pos = 0; ; )
1371 {
1372 pos = m_impl.find(chOld, pos);
1373 if ( pos == npos )
1374 break;
1375
1376 m_impl[pos++] = chNew;
1377
1378 uiCount++;
1379
1380 if ( !bReplaceAll )
1381 break;
1382 }
1383 }
1384 else // general case
1385 {
1386 const size_t uiOldLen = strOld.m_impl.length();
1387 const size_t uiNewLen = strNew.m_impl.length();
1388
1389 for ( size_t pos = 0; ; )
1390 {
1391 pos = m_impl.find(strOld.m_impl, pos);
1392 if ( pos == npos )
1393 break;
1394
1395 // replace this occurrence of the old string with the new one
1396 m_impl.replace(pos, uiOldLen, strNew.m_impl);
1397
1398 // move up pos past the string that was replaced
1399 pos += uiNewLen;
1400
1401 // increase replace count
1402 uiCount++;
1403
1404 // stop after the first one?
1405 if ( !bReplaceAll )
1406 break;
1407 }
1408 }
1409
1410 return uiCount;
1411 }
1412
1413 bool wxString::IsAscii() const
1414 {
1415 for ( const_iterator i = begin(); i != end(); ++i )
1416 {
1417 if ( !(*i).IsAscii() )
1418 return false;
1419 }
1420
1421 return true;
1422 }
1423
1424 bool wxString::IsWord() const
1425 {
1426 for ( const_iterator i = begin(); i != end(); ++i )
1427 {
1428 if ( !wxIsalpha(*i) )
1429 return false;
1430 }
1431
1432 return true;
1433 }
1434
1435 bool wxString::IsNumber() const
1436 {
1437 if ( empty() )
1438 return true;
1439
1440 const_iterator i = begin();
1441
1442 if ( *i == _T('-') || *i == _T('+') )
1443 ++i;
1444
1445 for ( ; i != end(); ++i )
1446 {
1447 if ( !wxIsdigit(*i) )
1448 return false;
1449 }
1450
1451 return true;
1452 }
1453
1454 wxString wxString::Strip(stripType w) const
1455 {
1456 wxString s = *this;
1457 if ( w & leading ) s.Trim(false);
1458 if ( w & trailing ) s.Trim(true);
1459 return s;
1460 }
1461
1462 // ---------------------------------------------------------------------------
1463 // case conversion
1464 // ---------------------------------------------------------------------------
1465
1466 wxString& wxString::MakeUpper()
1467 {
1468 for ( iterator it = begin(), en = end(); it != en; ++it )
1469 *it = (wxChar)wxToupper(*it);
1470
1471 return *this;
1472 }
1473
1474 wxString& wxString::MakeLower()
1475 {
1476 for ( iterator it = begin(), en = end(); it != en; ++it )
1477 *it = (wxChar)wxTolower(*it);
1478
1479 return *this;
1480 }
1481
1482 wxString& wxString::MakeCapitalized()
1483 {
1484 const iterator en = end();
1485 iterator it = begin();
1486 if ( it != en )
1487 {
1488 *it = (wxChar)wxToupper(*it);
1489 for ( ++it; it != en; ++it )
1490 *it = (wxChar)wxTolower(*it);
1491 }
1492
1493 return *this;
1494 }
1495
1496 // ---------------------------------------------------------------------------
1497 // trimming and padding
1498 // ---------------------------------------------------------------------------
1499
1500 // some compilers (VC++ 6.0 not to name them) return true for a call to
1501 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1502 // to live with this by checking that the character is a 7 bit one - even if
1503 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1504 // space-like symbols somewhere except in the first 128 chars), it is arguably
1505 // still better than trimming away accented letters
1506 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1507
1508 // trims spaces (in the sense of isspace) from left or right side
1509 wxString& wxString::Trim(bool bFromRight)
1510 {
1511 // first check if we're going to modify the string at all
1512 if ( !empty() &&
1513 (
1514 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1515 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1516 )
1517 )
1518 {
1519 if ( bFromRight )
1520 {
1521 // find last non-space character
1522 reverse_iterator psz = rbegin();
1523 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1524 ++psz;
1525
1526 // truncate at trailing space start
1527 erase(psz.base(), end());
1528 }
1529 else
1530 {
1531 // find first non-space character
1532 iterator psz = begin();
1533 while ( (psz != end()) && wxSafeIsspace(*psz) )
1534 ++psz;
1535
1536 // fix up data and length
1537 erase(begin(), psz);
1538 }
1539 }
1540
1541 return *this;
1542 }
1543
1544 // adds nCount characters chPad to the string from either side
1545 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1546 {
1547 wxString s(chPad, nCount);
1548
1549 if ( bFromRight )
1550 *this += s;
1551 else
1552 {
1553 s += *this;
1554 swap(s);
1555 }
1556
1557 return *this;
1558 }
1559
1560 // truncate the string
1561 wxString& wxString::Truncate(size_t uiLen)
1562 {
1563 if ( uiLen < length() )
1564 {
1565 erase(begin() + uiLen, end());
1566 }
1567 //else: nothing to do, string is already short enough
1568
1569 return *this;
1570 }
1571
1572 // ---------------------------------------------------------------------------
1573 // finding (return wxNOT_FOUND if not found and index otherwise)
1574 // ---------------------------------------------------------------------------
1575
1576 // find a character
1577 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1578 {
1579 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1580
1581 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1582 }
1583
1584 // ----------------------------------------------------------------------------
1585 // conversion to numbers
1586 // ----------------------------------------------------------------------------
1587
1588 // The implementation of all the functions below is exactly the same so factor
1589 // it out. Note that number extraction works correctly on UTF-8 strings, so
1590 // we can use wxStringCharType and wx_str() for maximum efficiency.
1591
1592 #ifndef __WXWINCE__
1593 #define DO_IF_NOT_WINCE(x) x
1594 #else
1595 #define DO_IF_NOT_WINCE(x)
1596 #endif
1597
1598 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1599 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1600 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1601 \
1602 DO_IF_NOT_WINCE( errno = 0; ) \
1603 \
1604 const wxStringCharType *start = wx_str(); \
1605 wxStringCharType *end; \
1606 T val = func(start, &end, base); \
1607 \
1608 /* return true only if scan was stopped by the terminating NUL and */ \
1609 /* if the string was not empty to start with and no under/overflow */ \
1610 /* occurred: */ \
1611 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1612 return false; \
1613 *out = val; \
1614 return true
1615
1616 bool wxString::ToLong(long *pVal, int base) const
1617 {
1618 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
1619 }
1620
1621 bool wxString::ToULong(unsigned long *pVal, int base) const
1622 {
1623 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
1624 }
1625
1626 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1627 {
1628 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
1629 }
1630
1631 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1632 {
1633 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
1634 }
1635
1636 bool wxString::ToDouble(double *pVal) const
1637 {
1638 wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
1639
1640 DO_IF_NOT_WINCE( errno = 0; )
1641
1642 const wxChar *start = c_str();
1643 wxChar *end;
1644 double val = wxStrtod(start, &end);
1645
1646 // return true only if scan was stopped by the terminating NUL and if the
1647 // string was not empty to start with and no under/overflow occurred
1648 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1649 return false;
1650
1651 *pVal = val;
1652
1653 return true;
1654 }
1655
1656 // ---------------------------------------------------------------------------
1657 // formatted output
1658 // ---------------------------------------------------------------------------
1659
1660 #if !wxUSE_UTF8_LOCALE_ONLY
1661 /* static */
1662 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1663 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1664 #else
1665 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1666 #endif
1667 {
1668 va_list argptr;
1669 va_start(argptr, format);
1670
1671 wxString s;
1672 s.PrintfV(format, argptr);
1673
1674 va_end(argptr);
1675
1676 return s;
1677 }
1678 #endif // !wxUSE_UTF8_LOCALE_ONLY
1679
1680 #if wxUSE_UNICODE_UTF8
1681 /* static */
1682 wxString wxString::DoFormatUtf8(const char *format, ...)
1683 {
1684 va_list argptr;
1685 va_start(argptr, format);
1686
1687 wxString s;
1688 s.PrintfV(format, argptr);
1689
1690 va_end(argptr);
1691
1692 return s;
1693 }
1694 #endif // wxUSE_UNICODE_UTF8
1695
1696 /* static */
1697 wxString wxString::FormatV(const wxString& format, va_list argptr)
1698 {
1699 wxString s;
1700 s.PrintfV(format, argptr);
1701 return s;
1702 }
1703
1704 #if !wxUSE_UTF8_LOCALE_ONLY
1705 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1706 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1707 #else
1708 int wxString::DoPrintfWchar(const wxChar *format, ...)
1709 #endif
1710 {
1711 va_list argptr;
1712 va_start(argptr, format);
1713
1714 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1715 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1716 // because it's the only cast that works safely for downcasting when
1717 // multiple inheritance is used:
1718 wxString *str = static_cast<wxString*>(this);
1719 #else
1720 wxString *str = this;
1721 #endif
1722
1723 int iLen = str->PrintfV(format, argptr);
1724
1725 va_end(argptr);
1726
1727 return iLen;
1728 }
1729 #endif // !wxUSE_UTF8_LOCALE_ONLY
1730
1731 #if wxUSE_UNICODE_UTF8
1732 int wxString::DoPrintfUtf8(const char *format, ...)
1733 {
1734 va_list argptr;
1735 va_start(argptr, format);
1736
1737 int iLen = PrintfV(format, argptr);
1738
1739 va_end(argptr);
1740
1741 return iLen;
1742 }
1743 #endif // wxUSE_UNICODE_UTF8
1744
1745 /*
1746 Uses wxVsnprintf and places the result into the this string.
1747
1748 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1749 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1750 the ISO C99 (and thus SUSv3) standard the return value for the case of
1751 an undersized buffer is inconsistent. For conforming vsnprintf
1752 implementations the function must return the number of characters that
1753 would have been printed had the buffer been large enough. For conforming
1754 vswprintf implementations the function must return a negative number
1755 and set errno.
1756
1757 What vswprintf sets errno to is undefined but Darwin seems to set it to
1758 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1759 those are defined in the standard and backed up by several conformance
1760 statements. Note that ENOMEM mentioned in the manual page does not
1761 apply to swprintf, only wprintf and fwprintf.
1762
1763 Official manual page:
1764 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1765
1766 Some conformance statements (AIX, Solaris):
1767 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1768 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1769
1770 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1771 EILSEQ and EINVAL are specifically defined to mean the error is other than
1772 an undersized buffer and no other errno are defined we treat those two
1773 as meaning hard errors and everything else gets the old behavior which
1774 is to keep looping and increasing buffer size until the function succeeds.
1775
1776 In practice it's impossible to determine before compilation which behavior
1777 may be used. The vswprintf function may have vsnprintf-like behavior or
1778 vice-versa. Behavior detected on one release can theoretically change
1779 with an updated release. Not to mention that configure testing for it
1780 would require the test to be run on the host system, not the build system
1781 which makes cross compilation difficult. Therefore, we make no assumptions
1782 about behavior and try our best to handle every known case, including the
1783 case where wxVsnprintf returns a negative number and fails to set errno.
1784
1785 There is yet one more non-standard implementation and that is our own.
1786 Fortunately, that can be detected at compile-time.
1787
1788 On top of all that, ISO C99 explicitly defines snprintf to write a null
1789 character to the last position of the specified buffer. That would be at
1790 at the given buffer size minus 1. It is supposed to do this even if it
1791 turns out that the buffer is sized too small.
1792
1793 Darwin (tested on 10.5) follows the C99 behavior exactly.
1794
1795 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1796 errno even when it fails. However, it only seems to ever fail due
1797 to an undersized buffer.
1798 */
1799 #if wxUSE_UNICODE_UTF8
1800 template<typename BufferType>
1801 #else
1802 // we only need one version in non-UTF8 builds and at least two Windows
1803 // compilers have problems with this function template, so use just one
1804 // normal function here
1805 #endif
1806 static int DoStringPrintfV(wxString& str,
1807 const wxString& format, va_list argptr)
1808 {
1809 int size = 1024;
1810
1811 for ( ;; )
1812 {
1813 #if wxUSE_UNICODE_UTF8
1814 BufferType tmp(str, size + 1);
1815 typename BufferType::CharType *buf = tmp;
1816 #else
1817 wxStringBuffer tmp(str, size + 1);
1818 wxChar *buf = tmp;
1819 #endif
1820
1821 if ( !buf )
1822 {
1823 // out of memory
1824
1825 // in UTF-8 build, leaving uninitialized junk in the buffer
1826 // could result in invalid non-empty UTF-8 string, so just
1827 // reset the string to empty on failure:
1828 buf[0] = '\0';
1829 return -1;
1830 }
1831
1832 // wxVsnprintf() may modify the original arg pointer, so pass it
1833 // only a copy
1834 va_list argptrcopy;
1835 wxVaCopy(argptrcopy, argptr);
1836
1837 #ifndef __WXWINCE__
1838 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1839 errno = 0;
1840 #endif
1841 int len = wxVsnprintf(buf, size, format, argptrcopy);
1842 va_end(argptrcopy);
1843
1844 // some implementations of vsnprintf() don't NUL terminate
1845 // the string if there is not enough space for it so
1846 // always do it manually
1847 // FIXME: This really seems to be the wrong and would be an off-by-one
1848 // bug except the code above allocates an extra character.
1849 buf[size] = _T('\0');
1850
1851 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1852 // total number of characters which would have been written if the
1853 // buffer were large enough (newer standards such as Unix98)
1854 if ( len < 0 )
1855 {
1856 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1857 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1858 // is true if *both* of them use our own implementation,
1859 // otherwise we can't be sure
1860 #if wxUSE_WXVSNPRINTF
1861 // we know that our own implementation of wxVsnprintf() returns -1
1862 // only for a format error - thus there's something wrong with
1863 // the user's format string
1864 buf[0] = '\0';
1865 return -1;
1866 #else // possibly using system version
1867 // assume it only returns error if there is not enough space, but
1868 // as we don't know how much we need, double the current size of
1869 // the buffer
1870 #ifndef __WXWINCE__
1871 if( (errno == EILSEQ) || (errno == EINVAL) )
1872 // If errno was set to one of the two well-known hard errors
1873 // then fail immediately to avoid an infinite loop.
1874 return -1;
1875 else
1876 #endif // __WXWINCE__
1877 // still not enough, as we don't know how much we need, double the
1878 // current size of the buffer
1879 size *= 2;
1880 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1881 }
1882 else if ( len >= size )
1883 {
1884 #if wxUSE_WXVSNPRINTF
1885 // we know that our own implementation of wxVsnprintf() returns
1886 // size+1 when there's not enough space but that's not the size
1887 // of the required buffer!
1888 size *= 2; // so we just double the current size of the buffer
1889 #else
1890 // some vsnprintf() implementations NUL-terminate the buffer and
1891 // some don't in len == size case, to be safe always add 1
1892 // FIXME: I don't quite understand this comment. The vsnprintf
1893 // function is specifically defined to return the number of
1894 // characters printed not including the null terminator.
1895 // So OF COURSE you need to add 1 to get the right buffer size.
1896 // The following line is definitely correct, no question.
1897 size = len + 1;
1898 #endif
1899 }
1900 else // ok, there was enough space
1901 {
1902 break;
1903 }
1904 }
1905
1906 // we could have overshot
1907 str.Shrink();
1908
1909 return str.length();
1910 }
1911
1912 int wxString::PrintfV(const wxString& format, va_list argptr)
1913 {
1914 #if wxUSE_UNICODE_UTF8
1915 #if wxUSE_STL_BASED_WXSTRING
1916 typedef wxStringTypeBuffer<char> Utf8Buffer;
1917 #else
1918 typedef wxStringInternalBuffer Utf8Buffer;
1919 #endif
1920 #endif
1921
1922 #if wxUSE_UTF8_LOCALE_ONLY
1923 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1924 #else
1925 #if wxUSE_UNICODE_UTF8
1926 if ( wxLocaleIsUtf8 )
1927 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1928 else
1929 // wxChar* version
1930 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1931 #else
1932 return DoStringPrintfV(*this, format, argptr);
1933 #endif // UTF8/WCHAR
1934 #endif
1935 }
1936
1937 // ----------------------------------------------------------------------------
1938 // misc other operations
1939 // ----------------------------------------------------------------------------
1940
1941 // returns true if the string matches the pattern which may contain '*' and
1942 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1943 // of them)
1944 bool wxString::Matches(const wxString& mask) const
1945 {
1946 // I disable this code as it doesn't seem to be faster (in fact, it seems
1947 // to be much slower) than the old, hand-written code below and using it
1948 // here requires always linking with libregex even if the user code doesn't
1949 // use it
1950 #if 0 // wxUSE_REGEX
1951 // first translate the shell-like mask into a regex
1952 wxString pattern;
1953 pattern.reserve(wxStrlen(pszMask));
1954
1955 pattern += _T('^');
1956 while ( *pszMask )
1957 {
1958 switch ( *pszMask )
1959 {
1960 case _T('?'):
1961 pattern += _T('.');
1962 break;
1963
1964 case _T('*'):
1965 pattern += _T(".*");
1966 break;
1967
1968 case _T('^'):
1969 case _T('.'):
1970 case _T('$'):
1971 case _T('('):
1972 case _T(')'):
1973 case _T('|'):
1974 case _T('+'):
1975 case _T('\\'):
1976 // these characters are special in a RE, quote them
1977 // (however note that we don't quote '[' and ']' to allow
1978 // using them for Unix shell like matching)
1979 pattern += _T('\\');
1980 // fall through
1981
1982 default:
1983 pattern += *pszMask;
1984 }
1985
1986 pszMask++;
1987 }
1988 pattern += _T('$');
1989
1990 // and now use it
1991 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1992 #else // !wxUSE_REGEX
1993 // TODO: this is, of course, awfully inefficient...
1994
1995 // FIXME-UTF8: implement using iterators, remove #if
1996 #if wxUSE_UNICODE_UTF8
1997 wxWCharBuffer maskBuf = mask.wc_str();
1998 wxWCharBuffer txtBuf = wc_str();
1999 const wxChar *pszMask = maskBuf.data();
2000 const wxChar *pszTxt = txtBuf.data();
2001 #else
2002 const wxChar *pszMask = mask.wx_str();
2003 // the char currently being checked
2004 const wxChar *pszTxt = wx_str();
2005 #endif
2006
2007 // the last location where '*' matched
2008 const wxChar *pszLastStarInText = NULL;
2009 const wxChar *pszLastStarInMask = NULL;
2010
2011 match:
2012 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
2013 switch ( *pszMask ) {
2014 case wxT('?'):
2015 if ( *pszTxt == wxT('\0') )
2016 return false;
2017
2018 // pszTxt and pszMask will be incremented in the loop statement
2019
2020 break;
2021
2022 case wxT('*'):
2023 {
2024 // remember where we started to be able to backtrack later
2025 pszLastStarInText = pszTxt;
2026 pszLastStarInMask = pszMask;
2027
2028 // ignore special chars immediately following this one
2029 // (should this be an error?)
2030 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
2031 pszMask++;
2032
2033 // if there is nothing more, match
2034 if ( *pszMask == wxT('\0') )
2035 return true;
2036
2037 // are there any other metacharacters in the mask?
2038 size_t uiLenMask;
2039 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2040
2041 if ( pEndMask != NULL ) {
2042 // we have to match the string between two metachars
2043 uiLenMask = pEndMask - pszMask;
2044 }
2045 else {
2046 // we have to match the remainder of the string
2047 uiLenMask = wxStrlen(pszMask);
2048 }
2049
2050 wxString strToMatch(pszMask, uiLenMask);
2051 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2052 if ( pMatch == NULL )
2053 return false;
2054
2055 // -1 to compensate "++" in the loop
2056 pszTxt = pMatch + uiLenMask - 1;
2057 pszMask += uiLenMask - 1;
2058 }
2059 break;
2060
2061 default:
2062 if ( *pszMask != *pszTxt )
2063 return false;
2064 break;
2065 }
2066 }
2067
2068 // match only if nothing left
2069 if ( *pszTxt == wxT('\0') )
2070 return true;
2071
2072 // if we failed to match, backtrack if we can
2073 if ( pszLastStarInText ) {
2074 pszTxt = pszLastStarInText + 1;
2075 pszMask = pszLastStarInMask;
2076
2077 pszLastStarInText = NULL;
2078
2079 // don't bother resetting pszLastStarInMask, it's unnecessary
2080
2081 goto match;
2082 }
2083
2084 return false;
2085 #endif // wxUSE_REGEX/!wxUSE_REGEX
2086 }
2087
2088 // Count the number of chars
2089 int wxString::Freq(wxUniChar ch) const
2090 {
2091 int count = 0;
2092 for ( const_iterator i = begin(); i != end(); ++i )
2093 {
2094 if ( *i == ch )
2095 count ++;
2096 }
2097 return count;
2098 }
2099
2100 // ----------------------------------------------------------------------------
2101 // wxUTF8StringBuffer
2102 // ----------------------------------------------------------------------------
2103
2104 #if wxUSE_UNICODE_WCHAR
2105 wxUTF8StringBuffer::~wxUTF8StringBuffer()
2106 {
2107 wxMBConvStrictUTF8 conv;
2108 size_t wlen = conv.ToWChar(NULL, 0, m_buf);
2109 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2110
2111 wxStringInternalBuffer wbuf(m_str, wlen);
2112 conv.ToWChar(wbuf, wlen, m_buf);
2113 }
2114
2115 wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2116 {
2117 wxCHECK_RET(m_lenSet, "length not set");
2118
2119 wxMBConvStrictUTF8 conv;
2120 size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
2121 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2122
2123 wxStringInternalBufferLength wbuf(m_str, wlen);
2124 conv.ToWChar(wbuf, wlen, m_buf, m_len);
2125 wbuf.SetLength(wlen);
2126 }
2127 #endif // wxUSE_UNICODE_WCHAR
2128
2129 // ----------------------------------------------------------------------------
2130 // wxCharBufferType<T>
2131 // ----------------------------------------------------------------------------
2132
2133 #ifndef __VMS_BROKEN_TEMPLATES
2134 template<>
2135 #endif
2136 wxCharTypeBuffer<char>::Data
2137 wxCharTypeBuffer<char>::NullData(NULL);
2138
2139 #ifndef __VMS_BROKEN_TEMPLATES
2140 template<>
2141 #endif
2142 wxCharTypeBuffer<wchar_t>::Data
2143 wxCharTypeBuffer<wchar_t>::NullData(NULL);