always remember to test IsOk() after using wxFileInputStream
[wxWidgets.git] / src / common / string.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/string.cpp
3 // Purpose: wxString class
4 // Author: Vadim Zeitlin, Ryan Norton
5 // Modified by:
6 // Created: 29/01/98
7 // RCS-ID: $Id$
8 // Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
9 // (c) 2004 Ryan Norton <wxprojects@comcast.net>
10 // Licence: wxWindows licence
11 /////////////////////////////////////////////////////////////////////////////
12
13 // ===========================================================================
14 // headers, declarations, constants
15 // ===========================================================================
16
17 // For compilers that support precompilation, includes "wx.h".
18 #include "wx/wxprec.h"
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WX_PRECOMP
25 #include "wx/string.h"
26 #include "wx/wxcrtvararg.h"
27 #endif
28
29 #include <ctype.h>
30
31 #ifndef __WXWINCE__
32 #include <errno.h>
33 #endif
34
35 #include <string.h>
36 #include <stdlib.h>
37
38 #include "wx/hashmap.h"
39
40 // string handling functions used by wxString:
41 #if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
46 #else
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
51 #endif
52
53 // ----------------------------------------------------------------------------
54 // global variables
55 // ----------------------------------------------------------------------------
56
57 namespace wxPrivate
58 {
59
60 static UntypedBufferData s_untypedNullData(NULL);
61
62 UntypedBufferData * const untypedNullDataPtr = &s_untypedNullData;
63
64 } // namespace wxPrivate
65
66 // ---------------------------------------------------------------------------
67 // static class variables definition
68 // ---------------------------------------------------------------------------
69
70 //According to STL _must_ be a -1 size_t
71 const size_t wxString::npos = (size_t) -1;
72
73 #if wxUSE_STRING_POS_CACHE
74
75 #ifdef wxHAS_COMPILER_TLS
76
77 wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
78
79 #else // !wxHAS_COMPILER_TLS
80
81 struct wxStrCacheInitializer
82 {
83 wxStrCacheInitializer()
84 {
85 // calling this function triggers s_cache initialization in it, and
86 // from now on it becomes safe to call from multiple threads
87 wxString::GetCache();
88 }
89 };
90
91 /*
92 wxString::Cache& wxString::GetCache()
93 {
94 static wxTLS_TYPE(Cache) s_cache;
95
96 return wxTLS_VALUE(s_cache);
97 }
98 */
99
100 static wxStrCacheInitializer gs_stringCacheInit;
101
102 #endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
103
104 // gdb seems to be unable to display thread-local variables correctly, at least
105 // not my 6.4.98 version under amd64, so provide this debugging helper to do it
106 #ifdef __WXDEBUG__
107
108 struct wxStrCacheDumper
109 {
110 static void ShowAll()
111 {
112 puts("*** wxString cache dump:");
113 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
114 {
115 const wxString::Cache::Element&
116 c = wxString::GetCacheBegin()[n];
117
118 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
119 n,
120 n == wxString::LastUsedCacheElement() ? " [*]" : "",
121 c.str,
122 (unsigned long)c.pos,
123 (unsigned long)c.impl,
124 (long)c.len);
125 }
126 }
127 };
128
129 void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
130
131 #endif // __WXDEBUG__
132
133 #ifdef wxPROFILE_STRING_CACHE
134
135 wxString::CacheStats wxString::ms_cacheStats;
136
137 struct wxStrCacheStatsDumper
138 {
139 ~wxStrCacheStatsDumper()
140 {
141 const wxString::CacheStats& stats = wxString::ms_cacheStats;
142
143 if ( stats.postot )
144 {
145 puts("*** wxString cache statistics:");
146 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
147 stats.postot);
148 printf("\tHits %u (of which %u not used) or %.2f%%\n",
149 stats.poshits,
150 stats.mishits,
151 100.*float(stats.poshits - stats.mishits)/stats.postot);
152 printf("\tAverage position requested: %.2f\n",
153 float(stats.sumpos) / stats.postot);
154 printf("\tAverage offset after cached hint: %.2f\n",
155 float(stats.sumofs) / stats.postot);
156 }
157
158 if ( stats.lentot )
159 {
160 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
161 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
162 }
163 }
164 };
165
166 static wxStrCacheStatsDumper s_showCacheStats;
167
168 #endif // wxPROFILE_STRING_CACHE
169
170 #endif // wxUSE_STRING_POS_CACHE
171
172 // ----------------------------------------------------------------------------
173 // global functions
174 // ----------------------------------------------------------------------------
175
176 #if wxUSE_STD_IOSTREAM
177
178 #include <iostream>
179
180 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
181 {
182 #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
183 const wxCharBuffer buf(str.AsCharBuf());
184 if ( !buf )
185 os.clear(wxSTD ios_base::failbit);
186 else
187 os << buf.data();
188
189 return os;
190 #else
191 return os << str.AsInternal();
192 #endif
193 }
194
195 wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
196 {
197 return os << str.c_str();
198 }
199
200 wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
201 {
202 return os << str.data();
203 }
204
205 #ifndef __BORLANDC__
206 wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
207 {
208 return os << str.data();
209 }
210 #endif
211
212 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
213
214 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
215 {
216 return wos << str.wc_str();
217 }
218
219 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
220 {
221 return wos << str.AsWChar();
222 }
223
224 wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
225 {
226 return wos << str.data();
227 }
228
229 #endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
230
231 #endif // wxUSE_STD_IOSTREAM
232
233 // ===========================================================================
234 // wxString class core
235 // ===========================================================================
236
237 #if wxUSE_UNICODE_UTF8
238
239 void wxString::PosLenToImpl(size_t pos, size_t len,
240 size_t *implPos, size_t *implLen) const
241 {
242 if ( pos == npos )
243 {
244 *implPos = npos;
245 }
246 else // have valid start position
247 {
248 const const_iterator b = GetIterForNthChar(pos);
249 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
250 if ( len == npos )
251 {
252 *implLen = npos;
253 }
254 else // have valid length too
255 {
256 // we need to handle the case of length specifying a substring
257 // going beyond the end of the string, just as std::string does
258 const const_iterator e(end());
259 const_iterator i(b);
260 while ( len && i <= e )
261 {
262 ++i;
263 --len;
264 }
265
266 *implLen = i.impl() - b.impl();
267 }
268 }
269 }
270
271 #endif // wxUSE_UNICODE_UTF8
272
273 // ----------------------------------------------------------------------------
274 // wxCStrData converted strings caching
275 // ----------------------------------------------------------------------------
276
277 // FIXME-UTF8: temporarily disabled because it doesn't work with global
278 // string objects; re-enable after fixing this bug and benchmarking
279 // performance to see if using a hash is a good idea at all
280 #if 0
281
282 // For backward compatibility reasons, it must be possible to assign the value
283 // returned by wxString::c_str() to a char* or wchar_t* variable and work with
284 // it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
285 // because the memory would be freed immediately, but it has to be valid as long
286 // as the string is not modified, so that code like this still works:
287 //
288 // const wxChar *s = str.c_str();
289 // while ( s ) { ... }
290
291 // FIXME-UTF8: not thread safe!
292 // FIXME-UTF8: we currently clear the cached conversion only when the string is
293 // destroyed, but we should do it when the string is modified, to
294 // keep memory usage down
295 // FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
296 // invalidated the cache on every change, we could keep the previous
297 // conversion
298 // FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
299 // to use mb_str() or wc_str() instead of (const [w]char*)c_str()
300
301 template<typename T>
302 static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
303 {
304 typename T::iterator i = hash.find(wxConstCast(s, wxString));
305 if ( i != hash.end() )
306 {
307 free(i->second);
308 hash.erase(i);
309 }
310 }
311
312 #if wxUSE_UNICODE
313 // NB: non-STL implementation doesn't compile with "const wxString*" key type,
314 // so we have to use wxString* here and const-cast when used
315 WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
316 wxStringCharConversionCache);
317 static wxStringCharConversionCache gs_stringsCharCache;
318
319 const char* wxCStrData::AsChar() const
320 {
321 // remove previously cache value, if any (see FIXMEs above):
322 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
323
324 // convert the string and keep it:
325 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
326 m_str->mb_str().release();
327
328 return s + m_offset;
329 }
330 #endif // wxUSE_UNICODE
331
332 #if !wxUSE_UNICODE_WCHAR
333 WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
334 wxStringWCharConversionCache);
335 static wxStringWCharConversionCache gs_stringsWCharCache;
336
337 const wchar_t* wxCStrData::AsWChar() const
338 {
339 // remove previously cache value, if any (see FIXMEs above):
340 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
341
342 // convert the string and keep it:
343 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
344 m_str->wc_str().release();
345
346 return s + m_offset;
347 }
348 #endif // !wxUSE_UNICODE_WCHAR
349
350 wxString::~wxString()
351 {
352 #if wxUSE_UNICODE
353 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
354 DeleteStringFromConversionCache(gs_stringsCharCache, this);
355 #endif
356 #if !wxUSE_UNICODE_WCHAR
357 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
358 #endif
359 }
360 #endif
361
362 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
363 const char* wxCStrData::AsChar() const
364 {
365 #if wxUSE_UNICODE_UTF8
366 if ( wxLocaleIsUtf8 )
367 return AsInternal();
368 #endif
369 // under non-UTF8 locales, we have to convert the internal UTF-8
370 // representation using wxConvLibc and cache the result
371
372 wxString *str = wxConstCast(m_str, wxString);
373
374 // convert the string:
375 //
376 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
377 // have it) but it's unfortunately not obvious to implement
378 // because we don't know how big buffer do we need for the
379 // given string length (in case of multibyte encodings, e.g.
380 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
381 //
382 // One idea would be to store more than just m_convertedToChar
383 // in wxString: then we could record the length of the string
384 // which was converted the last time and try to reuse the same
385 // buffer if the current length is not greater than it (this
386 // could still fail because string could have been modified in
387 // place but it would work most of the time, so we'd do it and
388 // only allocate the new buffer if in-place conversion returned
389 // an error). We could also store a bit saying if the string
390 // was modified since the last conversion (and update it in all
391 // operation modifying the string, of course) to avoid unneeded
392 // consequential conversions. But both of these ideas require
393 // adding more fields to wxString and require profiling results
394 // to be sure that we really gain enough from them to justify
395 // doing it.
396 wxCharBuffer buf(str->mb_str());
397
398 // if it failed, return empty string and not NULL to avoid crashes in code
399 // written with either wxWidgets 2 wxString or std::string behaviour in
400 // mind: neither of them ever returns NULL and so we shouldn't neither
401 if ( !buf )
402 return "";
403
404 if ( str->m_convertedToChar &&
405 strlen(buf) == strlen(str->m_convertedToChar) )
406 {
407 // keep the same buffer for as long as possible, so that several calls
408 // to c_str() in a row still work:
409 strcpy(str->m_convertedToChar, buf);
410 }
411 else
412 {
413 str->m_convertedToChar = buf.release();
414 }
415
416 // and keep it:
417 return str->m_convertedToChar + m_offset;
418 }
419 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
420
421 #if !wxUSE_UNICODE_WCHAR
422 const wchar_t* wxCStrData::AsWChar() const
423 {
424 wxString *str = wxConstCast(m_str, wxString);
425
426 // convert the string:
427 wxWCharBuffer buf(str->wc_str());
428
429 // notice that here, unlike above in AsChar(), conversion can't fail as our
430 // internal UTF-8 is always well-formed -- or the string was corrupted and
431 // all bets are off anyhow
432
433 // FIXME-UTF8: do the conversion in-place in the existing buffer
434 if ( str->m_convertedToWChar &&
435 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
436 {
437 // keep the same buffer for as long as possible, so that several calls
438 // to c_str() in a row still work:
439 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
440 }
441 else
442 {
443 str->m_convertedToWChar = buf.release();
444 }
445
446 // and keep it:
447 return str->m_convertedToWChar + m_offset;
448 }
449 #endif // !wxUSE_UNICODE_WCHAR
450
451 // ===========================================================================
452 // wxString class core
453 // ===========================================================================
454
455 // ---------------------------------------------------------------------------
456 // construction and conversion
457 // ---------------------------------------------------------------------------
458
459 #if wxUSE_UNICODE_WCHAR
460 /* static */
461 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
462 const wxMBConv& conv)
463 {
464 // anything to do?
465 if ( !psz || nLength == 0 )
466 return SubstrBufFromMB(L"", 0);
467
468 if ( nLength == npos )
469 nLength = wxNO_LEN;
470
471 size_t wcLen;
472 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
473 if ( !wcLen )
474 return SubstrBufFromMB(_T(""), 0);
475 else
476 return SubstrBufFromMB(wcBuf, wcLen);
477 }
478 #endif // wxUSE_UNICODE_WCHAR
479
480 #if wxUSE_UNICODE_UTF8
481 /* static */
482 wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
483 const wxMBConv& conv)
484 {
485 // anything to do?
486 if ( !psz || nLength == 0 )
487 return SubstrBufFromMB("", 0);
488
489 // if psz is already in UTF-8, we don't have to do the roundtrip to
490 // wchar_t* and back:
491 if ( conv.IsUTF8() )
492 {
493 // we need to validate the input because UTF8 iterators assume valid
494 // UTF-8 sequence and psz may be invalid:
495 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
496 {
497 // we must pass the real string length to SubstrBufFromMB ctor
498 if ( nLength == npos )
499 nLength = psz ? strlen(psz) : 0;
500 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
501 }
502 // else: do the roundtrip through wchar_t*
503 }
504
505 if ( nLength == npos )
506 nLength = wxNO_LEN;
507
508 // first convert to wide string:
509 size_t wcLen;
510 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
511 if ( !wcLen )
512 return SubstrBufFromMB("", 0);
513
514 // and then to UTF-8:
515 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
516 // widechar -> UTF-8 conversion isn't supposed to ever fail:
517 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
518
519 return buf;
520 }
521 #endif // wxUSE_UNICODE_UTF8
522
523 #if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
524 /* static */
525 wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
526 const wxMBConv& conv)
527 {
528 // anything to do?
529 if ( !pwz || nLength == 0 )
530 return SubstrBufFromWC("", 0);
531
532 if ( nLength == npos )
533 nLength = wxNO_LEN;
534
535 size_t mbLen;
536 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
537 if ( !mbLen )
538 return SubstrBufFromWC("", 0);
539 else
540 return SubstrBufFromWC(mbBuf, mbLen);
541 }
542 #endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
543
544
545 #if wxUSE_UNICODE_WCHAR
546
547 //Convert wxString in Unicode mode to a multi-byte string
548 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
549 {
550 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
551 }
552
553 #elif wxUSE_UNICODE_UTF8
554
555 const wxWCharBuffer wxString::wc_str() const
556 {
557 return wxMBConvStrictUTF8().cMB2WC
558 (
559 m_impl.c_str(),
560 m_impl.length() + 1, // size, not length
561 NULL
562 );
563 }
564
565 const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
566 {
567 if ( conv.IsUTF8() )
568 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
569
570 // FIXME-UTF8: use wc_str() here once we have buffers with length
571
572 size_t wcLen;
573 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
574 (
575 m_impl.c_str(),
576 m_impl.length() + 1, // size
577 &wcLen
578 ));
579 if ( !wcLen )
580 return wxCharBuffer("");
581
582 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
583 }
584
585 #else // ANSI
586
587 //Converts this string to a wide character string if unicode
588 //mode is not enabled and wxUSE_WCHAR_T is enabled
589 const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
590 {
591 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
592 }
593
594 #endif // Unicode/ANSI
595
596 // shrink to minimal size (releasing extra memory)
597 bool wxString::Shrink()
598 {
599 wxString tmp(begin(), end());
600 swap(tmp);
601 return tmp.length() == length();
602 }
603
604 // deprecated compatibility code:
605 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
606 wxStringCharType *wxString::GetWriteBuf(size_t nLen)
607 {
608 return DoGetWriteBuf(nLen);
609 }
610
611 void wxString::UngetWriteBuf()
612 {
613 DoUngetWriteBuf();
614 }
615
616 void wxString::UngetWriteBuf(size_t nLen)
617 {
618 DoUngetWriteBuf(nLen);
619 }
620 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
621
622
623 // ---------------------------------------------------------------------------
624 // data access
625 // ---------------------------------------------------------------------------
626
627 // all functions are inline in string.h
628
629 // ---------------------------------------------------------------------------
630 // concatenation operators
631 // ---------------------------------------------------------------------------
632
633 /*
634 * concatenation functions come in 5 flavours:
635 * string + string
636 * char + string and string + char
637 * C str + string and string + C str
638 */
639
640 wxString operator+(const wxString& str1, const wxString& str2)
641 {
642 #if !wxUSE_STL_BASED_WXSTRING
643 wxASSERT( str1.IsValid() );
644 wxASSERT( str2.IsValid() );
645 #endif
646
647 wxString s = str1;
648 s += str2;
649
650 return s;
651 }
652
653 wxString operator+(const wxString& str, wxUniChar ch)
654 {
655 #if !wxUSE_STL_BASED_WXSTRING
656 wxASSERT( str.IsValid() );
657 #endif
658
659 wxString s = str;
660 s += ch;
661
662 return s;
663 }
664
665 wxString operator+(wxUniChar ch, const wxString& str)
666 {
667 #if !wxUSE_STL_BASED_WXSTRING
668 wxASSERT( str.IsValid() );
669 #endif
670
671 wxString s = ch;
672 s += str;
673
674 return s;
675 }
676
677 wxString operator+(const wxString& str, const char *psz)
678 {
679 #if !wxUSE_STL_BASED_WXSTRING
680 wxASSERT( str.IsValid() );
681 #endif
682
683 wxString s;
684 if ( !s.Alloc(strlen(psz) + str.length()) ) {
685 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
686 }
687 s += str;
688 s += psz;
689
690 return s;
691 }
692
693 wxString operator+(const wxString& str, const wchar_t *pwz)
694 {
695 #if !wxUSE_STL_BASED_WXSTRING
696 wxASSERT( str.IsValid() );
697 #endif
698
699 wxString s;
700 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
701 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
702 }
703 s += str;
704 s += pwz;
705
706 return s;
707 }
708
709 wxString operator+(const char *psz, const wxString& str)
710 {
711 #if !wxUSE_STL_BASED_WXSTRING
712 wxASSERT( str.IsValid() );
713 #endif
714
715 wxString s;
716 if ( !s.Alloc(strlen(psz) + str.length()) ) {
717 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
718 }
719 s = psz;
720 s += str;
721
722 return s;
723 }
724
725 wxString operator+(const wchar_t *pwz, const wxString& str)
726 {
727 #if !wxUSE_STL_BASED_WXSTRING
728 wxASSERT( str.IsValid() );
729 #endif
730
731 wxString s;
732 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
733 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
734 }
735 s = pwz;
736 s += str;
737
738 return s;
739 }
740
741 // ---------------------------------------------------------------------------
742 // string comparison
743 // ---------------------------------------------------------------------------
744
745 bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
746 {
747 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
748 : wxToupper(GetChar(0u)) == wxToupper(c));
749 }
750
751 #ifdef HAVE_STD_STRING_COMPARE
752
753 // NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
754 // UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
755 // sort strings in characters code point order by sorting the byte sequence
756 // in byte values order (i.e. what strcmp() and memcmp() do).
757
758 int wxString::compare(const wxString& str) const
759 {
760 return m_impl.compare(str.m_impl);
761 }
762
763 int wxString::compare(size_t nStart, size_t nLen,
764 const wxString& str) const
765 {
766 size_t pos, len;
767 PosLenToImpl(nStart, nLen, &pos, &len);
768 return m_impl.compare(pos, len, str.m_impl);
769 }
770
771 int wxString::compare(size_t nStart, size_t nLen,
772 const wxString& str,
773 size_t nStart2, size_t nLen2) const
774 {
775 size_t pos, len;
776 PosLenToImpl(nStart, nLen, &pos, &len);
777
778 size_t pos2, len2;
779 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
780
781 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
782 }
783
784 int wxString::compare(const char* sz) const
785 {
786 return m_impl.compare(ImplStr(sz));
787 }
788
789 int wxString::compare(const wchar_t* sz) const
790 {
791 return m_impl.compare(ImplStr(sz));
792 }
793
794 int wxString::compare(size_t nStart, size_t nLen,
795 const char* sz, size_t nCount) const
796 {
797 size_t pos, len;
798 PosLenToImpl(nStart, nLen, &pos, &len);
799
800 SubstrBufFromMB str(ImplStr(sz, nCount));
801
802 return m_impl.compare(pos, len, str.data, str.len);
803 }
804
805 int wxString::compare(size_t nStart, size_t nLen,
806 const wchar_t* sz, size_t nCount) const
807 {
808 size_t pos, len;
809 PosLenToImpl(nStart, nLen, &pos, &len);
810
811 SubstrBufFromWC str(ImplStr(sz, nCount));
812
813 return m_impl.compare(pos, len, str.data, str.len);
814 }
815
816 #else // !HAVE_STD_STRING_COMPARE
817
818 static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
819 const wxStringCharType* s2, size_t l2)
820 {
821 if( l1 == l2 )
822 return wxStringMemcmp(s1, s2, l1);
823 else if( l1 < l2 )
824 {
825 int ret = wxStringMemcmp(s1, s2, l1);
826 return ret == 0 ? -1 : ret;
827 }
828 else
829 {
830 int ret = wxStringMemcmp(s1, s2, l2);
831 return ret == 0 ? +1 : ret;
832 }
833 }
834
835 int wxString::compare(const wxString& str) const
836 {
837 return ::wxDoCmp(m_impl.data(), m_impl.length(),
838 str.m_impl.data(), str.m_impl.length());
839 }
840
841 int wxString::compare(size_t nStart, size_t nLen,
842 const wxString& str) const
843 {
844 wxASSERT(nStart <= length());
845 size_type strLen = length() - nStart;
846 nLen = strLen < nLen ? strLen : nLen;
847
848 size_t pos, len;
849 PosLenToImpl(nStart, nLen, &pos, &len);
850
851 return ::wxDoCmp(m_impl.data() + pos, len,
852 str.m_impl.data(), str.m_impl.length());
853 }
854
855 int wxString::compare(size_t nStart, size_t nLen,
856 const wxString& str,
857 size_t nStart2, size_t nLen2) const
858 {
859 wxASSERT(nStart <= length());
860 wxASSERT(nStart2 <= str.length());
861 size_type strLen = length() - nStart,
862 strLen2 = str.length() - nStart2;
863 nLen = strLen < nLen ? strLen : nLen;
864 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
865
866 size_t pos, len;
867 PosLenToImpl(nStart, nLen, &pos, &len);
868 size_t pos2, len2;
869 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
870
871 return ::wxDoCmp(m_impl.data() + pos, len,
872 str.m_impl.data() + pos2, len2);
873 }
874
875 int wxString::compare(const char* sz) const
876 {
877 SubstrBufFromMB str(ImplStr(sz, npos));
878 if ( str.len == npos )
879 str.len = wxStringStrlen(str.data);
880 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
881 }
882
883 int wxString::compare(const wchar_t* sz) const
884 {
885 SubstrBufFromWC str(ImplStr(sz, npos));
886 if ( str.len == npos )
887 str.len = wxStringStrlen(str.data);
888 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
889 }
890
891 int wxString::compare(size_t nStart, size_t nLen,
892 const char* sz, size_t nCount) const
893 {
894 wxASSERT(nStart <= length());
895 size_type strLen = length() - nStart;
896 nLen = strLen < nLen ? strLen : nLen;
897
898 size_t pos, len;
899 PosLenToImpl(nStart, nLen, &pos, &len);
900
901 SubstrBufFromMB str(ImplStr(sz, nCount));
902 if ( str.len == npos )
903 str.len = wxStringStrlen(str.data);
904
905 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
906 }
907
908 int wxString::compare(size_t nStart, size_t nLen,
909 const wchar_t* sz, size_t nCount) const
910 {
911 wxASSERT(nStart <= length());
912 size_type strLen = length() - nStart;
913 nLen = strLen < nLen ? strLen : nLen;
914
915 size_t pos, len;
916 PosLenToImpl(nStart, nLen, &pos, &len);
917
918 SubstrBufFromWC str(ImplStr(sz, nCount));
919 if ( str.len == npos )
920 str.len = wxStringStrlen(str.data);
921
922 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
923 }
924
925 #endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
926
927
928 // ---------------------------------------------------------------------------
929 // find_{first,last}_[not]_of functions
930 // ---------------------------------------------------------------------------
931
932 #if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
933
934 // NB: All these functions are implemented with the argument being wxChar*,
935 // i.e. widechar string in any Unicode build, even though native string
936 // representation is char* in the UTF-8 build. This is because we couldn't
937 // use memchr() to determine if a character is in a set encoded as UTF-8.
938
939 size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
940 {
941 return find_first_of(sz, nStart, wxStrlen(sz));
942 }
943
944 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
945 {
946 return find_first_not_of(sz, nStart, wxStrlen(sz));
947 }
948
949 size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
950 {
951 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
952
953 size_t idx = nStart;
954 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
955 {
956 if ( wxTmemchr(sz, *i, n) )
957 return idx;
958 }
959
960 return npos;
961 }
962
963 size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
964 {
965 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
966
967 size_t idx = nStart;
968 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
969 {
970 if ( !wxTmemchr(sz, *i, n) )
971 return idx;
972 }
973
974 return npos;
975 }
976
977
978 size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
979 {
980 return find_last_of(sz, nStart, wxStrlen(sz));
981 }
982
983 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
984 {
985 return find_last_not_of(sz, nStart, wxStrlen(sz));
986 }
987
988 size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
989 {
990 size_t len = length();
991
992 if ( nStart == npos )
993 {
994 nStart = len - 1;
995 }
996 else
997 {
998 wxASSERT_MSG( nStart <= len, _T("invalid index") );
999 }
1000
1001 size_t idx = nStart;
1002 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1003 i != rend(); --idx, ++i )
1004 {
1005 if ( wxTmemchr(sz, *i, n) )
1006 return idx;
1007 }
1008
1009 return npos;
1010 }
1011
1012 size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
1013 {
1014 size_t len = length();
1015
1016 if ( nStart == npos )
1017 {
1018 nStart = len - 1;
1019 }
1020 else
1021 {
1022 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1023 }
1024
1025 size_t idx = nStart;
1026 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1027 i != rend(); --idx, ++i )
1028 {
1029 if ( !wxTmemchr(sz, *i, n) )
1030 return idx;
1031 }
1032
1033 return npos;
1034 }
1035
1036 size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
1037 {
1038 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
1039
1040 size_t idx = nStart;
1041 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1042 {
1043 if ( *i != ch )
1044 return idx;
1045 }
1046
1047 return npos;
1048 }
1049
1050 size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1051 {
1052 size_t len = length();
1053
1054 if ( nStart == npos )
1055 {
1056 nStart = len - 1;
1057 }
1058 else
1059 {
1060 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1061 }
1062
1063 size_t idx = nStart;
1064 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1065 i != rend(); --idx, ++i )
1066 {
1067 if ( *i != ch )
1068 return idx;
1069 }
1070
1071 return npos;
1072 }
1073
1074 // the functions above were implemented for wchar_t* arguments in Unicode
1075 // build and char* in ANSI build; below are implementations for the other
1076 // version:
1077 #if wxUSE_UNICODE
1078 #define wxOtherCharType char
1079 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1080 #else
1081 #define wxOtherCharType wchar_t
1082 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1083 #endif
1084
1085 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1086 { return find_first_of(STRCONV(sz), nStart); }
1087
1088 size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1089 size_t n) const
1090 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1091 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1092 { return find_last_of(STRCONV(sz), nStart); }
1093 size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1094 size_t n) const
1095 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1096 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1097 { return find_first_not_of(STRCONV(sz), nStart); }
1098 size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1099 size_t n) const
1100 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1101 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1102 { return find_last_not_of(STRCONV(sz), nStart); }
1103 size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1104 size_t n) const
1105 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1106
1107 #undef wxOtherCharType
1108 #undef STRCONV
1109
1110 #endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1111
1112 // ===========================================================================
1113 // other common string functions
1114 // ===========================================================================
1115
1116 int wxString::CmpNoCase(const wxString& s) const
1117 {
1118 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1119
1120 const_iterator i1 = begin();
1121 const_iterator end1 = end();
1122 const_iterator i2 = s.begin();
1123 const_iterator end2 = s.end();
1124
1125 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
1126 {
1127 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1128 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1129 if ( lower1 != lower2 )
1130 return lower1 < lower2 ? -1 : 1;
1131 }
1132
1133 size_t len1 = length();
1134 size_t len2 = s.length();
1135
1136 if ( len1 < len2 )
1137 return -1;
1138 else if ( len1 > len2 )
1139 return 1;
1140 return 0;
1141 }
1142
1143
1144 #if wxUSE_UNICODE
1145
1146 #ifdef __MWERKS__
1147 #ifndef __SCHAR_MAX__
1148 #define __SCHAR_MAX__ 127
1149 #endif
1150 #endif
1151
1152 wxString wxString::FromAscii(const char *ascii, size_t len)
1153 {
1154 if (!ascii || len == 0)
1155 return wxEmptyString;
1156
1157 wxString res;
1158
1159 {
1160 wxStringInternalBuffer buf(res, len);
1161 wxStringCharType *dest = buf;
1162
1163 for ( ; len > 0; --len )
1164 {
1165 unsigned char c = (unsigned char)*ascii++;
1166 wxASSERT_MSG( c < 0x80,
1167 _T("Non-ASCII value passed to FromAscii().") );
1168
1169 *dest++ = (wchar_t)c;
1170 }
1171 }
1172
1173 return res;
1174 }
1175
1176 wxString wxString::FromAscii(const char *ascii)
1177 {
1178 return FromAscii(ascii, wxStrlen(ascii));
1179 }
1180
1181 wxString wxString::FromAscii(char ascii)
1182 {
1183 // What do we do with '\0' ?
1184
1185 unsigned char c = (unsigned char)ascii;
1186
1187 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1188
1189 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1190 return wxString(wxUniChar((wchar_t)c));
1191 }
1192
1193 const wxCharBuffer wxString::ToAscii() const
1194 {
1195 // this will allocate enough space for the terminating NUL too
1196 wxCharBuffer buffer(length());
1197 char *dest = buffer.data();
1198
1199 for ( const_iterator i = begin(); i != end(); ++i )
1200 {
1201 wxUniChar c(*i);
1202 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1203 *dest++ = c.IsAscii() ? (char)c : '_';
1204
1205 // the output string can't have embedded NULs anyhow, so we can safely
1206 // stop at first of them even if we do have any
1207 if ( !c )
1208 break;
1209 }
1210
1211 return buffer;
1212 }
1213
1214 #endif // wxUSE_UNICODE
1215
1216 // extract string of length nCount starting at nFirst
1217 wxString wxString::Mid(size_t nFirst, size_t nCount) const
1218 {
1219 size_t nLen = length();
1220
1221 // default value of nCount is npos and means "till the end"
1222 if ( nCount == npos )
1223 {
1224 nCount = nLen - nFirst;
1225 }
1226
1227 // out-of-bounds requests return sensible things
1228 if ( nFirst + nCount > nLen )
1229 {
1230 nCount = nLen - nFirst;
1231 }
1232
1233 if ( nFirst > nLen )
1234 {
1235 // AllocCopy() will return empty string
1236 return wxEmptyString;
1237 }
1238
1239 wxString dest(*this, nFirst, nCount);
1240 if ( dest.length() != nCount )
1241 {
1242 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1243 }
1244
1245 return dest;
1246 }
1247
1248 // check that the string starts with prefix and return the rest of the string
1249 // in the provided pointer if it is not NULL, otherwise return false
1250 bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
1251 {
1252 if ( compare(0, prefix.length(), prefix) != 0 )
1253 return false;
1254
1255 if ( rest )
1256 {
1257 // put the rest of the string into provided pointer
1258 rest->assign(*this, prefix.length(), npos);
1259 }
1260
1261 return true;
1262 }
1263
1264
1265 // check that the string ends with suffix and return the rest of it in the
1266 // provided pointer if it is not NULL, otherwise return false
1267 bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
1268 {
1269 int start = length() - suffix.length();
1270
1271 if ( start < 0 || compare(start, npos, suffix) != 0 )
1272 return false;
1273
1274 if ( rest )
1275 {
1276 // put the rest of the string into provided pointer
1277 rest->assign(*this, 0, start);
1278 }
1279
1280 return true;
1281 }
1282
1283
1284 // extract nCount last (rightmost) characters
1285 wxString wxString::Right(size_t nCount) const
1286 {
1287 if ( nCount > length() )
1288 nCount = length();
1289
1290 wxString dest(*this, length() - nCount, nCount);
1291 if ( dest.length() != nCount ) {
1292 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1293 }
1294 return dest;
1295 }
1296
1297 // get all characters after the last occurrence of ch
1298 // (returns the whole string if ch not found)
1299 wxString wxString::AfterLast(wxUniChar ch) const
1300 {
1301 wxString str;
1302 int iPos = Find(ch, true);
1303 if ( iPos == wxNOT_FOUND )
1304 str = *this;
1305 else
1306 str.assign(*this, iPos + 1, npos);
1307
1308 return str;
1309 }
1310
1311 // extract nCount first (leftmost) characters
1312 wxString wxString::Left(size_t nCount) const
1313 {
1314 if ( nCount > length() )
1315 nCount = length();
1316
1317 wxString dest(*this, 0, nCount);
1318 if ( dest.length() != nCount ) {
1319 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1320 }
1321 return dest;
1322 }
1323
1324 // get all characters before the first occurrence of ch
1325 // (returns the whole string if ch not found)
1326 wxString wxString::BeforeFirst(wxUniChar ch) const
1327 {
1328 int iPos = Find(ch);
1329 if ( iPos == wxNOT_FOUND )
1330 iPos = length();
1331 return wxString(*this, 0, iPos);
1332 }
1333
1334 /// get all characters before the last occurrence of ch
1335 /// (returns empty string if ch not found)
1336 wxString wxString::BeforeLast(wxUniChar ch) const
1337 {
1338 wxString str;
1339 int iPos = Find(ch, true);
1340 if ( iPos != wxNOT_FOUND && iPos != 0 )
1341 str = wxString(c_str(), iPos);
1342
1343 return str;
1344 }
1345
1346 /// get all characters after the first occurrence of ch
1347 /// (returns empty string if ch not found)
1348 wxString wxString::AfterFirst(wxUniChar ch) const
1349 {
1350 wxString str;
1351 int iPos = Find(ch);
1352 if ( iPos != wxNOT_FOUND )
1353 str.assign(*this, iPos + 1, npos);
1354
1355 return str;
1356 }
1357
1358 // replace first (or all) occurrences of some substring with another one
1359 size_t wxString::Replace(const wxString& strOld,
1360 const wxString& strNew, bool bReplaceAll)
1361 {
1362 // if we tried to replace an empty string we'd enter an infinite loop below
1363 wxCHECK_MSG( !strOld.empty(), 0,
1364 _T("wxString::Replace(): invalid parameter") );
1365
1366 wxSTRING_INVALIDATE_CACHE();
1367
1368 size_t uiCount = 0; // count of replacements made
1369
1370 // optimize the special common case: replacement of one character by
1371 // another one (in UTF-8 case we can only do this for ASCII characters)
1372 //
1373 // benchmarks show that this special version is around 3 times faster
1374 // (depending on the proportion of matching characters and UTF-8/wchar_t
1375 // build)
1376 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1377 {
1378 const wxStringCharType chOld = strOld.m_impl[0],
1379 chNew = strNew.m_impl[0];
1380
1381 // this loop is the simplified version of the one below
1382 for ( size_t pos = 0; ; )
1383 {
1384 pos = m_impl.find(chOld, pos);
1385 if ( pos == npos )
1386 break;
1387
1388 m_impl[pos++] = chNew;
1389
1390 uiCount++;
1391
1392 if ( !bReplaceAll )
1393 break;
1394 }
1395 }
1396 else // general case
1397 {
1398 const size_t uiOldLen = strOld.m_impl.length();
1399 const size_t uiNewLen = strNew.m_impl.length();
1400
1401 for ( size_t pos = 0; ; )
1402 {
1403 pos = m_impl.find(strOld.m_impl, pos);
1404 if ( pos == npos )
1405 break;
1406
1407 // replace this occurrence of the old string with the new one
1408 m_impl.replace(pos, uiOldLen, strNew.m_impl);
1409
1410 // move up pos past the string that was replaced
1411 pos += uiNewLen;
1412
1413 // increase replace count
1414 uiCount++;
1415
1416 // stop after the first one?
1417 if ( !bReplaceAll )
1418 break;
1419 }
1420 }
1421
1422 return uiCount;
1423 }
1424
1425 bool wxString::IsAscii() const
1426 {
1427 for ( const_iterator i = begin(); i != end(); ++i )
1428 {
1429 if ( !(*i).IsAscii() )
1430 return false;
1431 }
1432
1433 return true;
1434 }
1435
1436 bool wxString::IsWord() const
1437 {
1438 for ( const_iterator i = begin(); i != end(); ++i )
1439 {
1440 if ( !wxIsalpha(*i) )
1441 return false;
1442 }
1443
1444 return true;
1445 }
1446
1447 bool wxString::IsNumber() const
1448 {
1449 if ( empty() )
1450 return true;
1451
1452 const_iterator i = begin();
1453
1454 if ( *i == _T('-') || *i == _T('+') )
1455 ++i;
1456
1457 for ( ; i != end(); ++i )
1458 {
1459 if ( !wxIsdigit(*i) )
1460 return false;
1461 }
1462
1463 return true;
1464 }
1465
1466 wxString wxString::Strip(stripType w) const
1467 {
1468 wxString s = *this;
1469 if ( w & leading ) s.Trim(false);
1470 if ( w & trailing ) s.Trim(true);
1471 return s;
1472 }
1473
1474 // ---------------------------------------------------------------------------
1475 // case conversion
1476 // ---------------------------------------------------------------------------
1477
1478 wxString& wxString::MakeUpper()
1479 {
1480 for ( iterator it = begin(), en = end(); it != en; ++it )
1481 *it = (wxChar)wxToupper(*it);
1482
1483 return *this;
1484 }
1485
1486 wxString& wxString::MakeLower()
1487 {
1488 for ( iterator it = begin(), en = end(); it != en; ++it )
1489 *it = (wxChar)wxTolower(*it);
1490
1491 return *this;
1492 }
1493
1494 wxString& wxString::MakeCapitalized()
1495 {
1496 const iterator en = end();
1497 iterator it = begin();
1498 if ( it != en )
1499 {
1500 *it = (wxChar)wxToupper(*it);
1501 for ( ++it; it != en; ++it )
1502 *it = (wxChar)wxTolower(*it);
1503 }
1504
1505 return *this;
1506 }
1507
1508 // ---------------------------------------------------------------------------
1509 // trimming and padding
1510 // ---------------------------------------------------------------------------
1511
1512 // some compilers (VC++ 6.0 not to name them) return true for a call to
1513 // isspace('\xEA') in the C locale which seems to be broken to me, but we have
1514 // to live with this by checking that the character is a 7 bit one - even if
1515 // this may fail to detect some spaces (I don't know if Unicode doesn't have
1516 // space-like symbols somewhere except in the first 128 chars), it is arguably
1517 // still better than trimming away accented letters
1518 inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1519
1520 // trims spaces (in the sense of isspace) from left or right side
1521 wxString& wxString::Trim(bool bFromRight)
1522 {
1523 // first check if we're going to modify the string at all
1524 if ( !empty() &&
1525 (
1526 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1527 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1528 )
1529 )
1530 {
1531 if ( bFromRight )
1532 {
1533 // find last non-space character
1534 reverse_iterator psz = rbegin();
1535 while ( (psz != rend()) && wxSafeIsspace(*psz) )
1536 ++psz;
1537
1538 // truncate at trailing space start
1539 erase(psz.base(), end());
1540 }
1541 else
1542 {
1543 // find first non-space character
1544 iterator psz = begin();
1545 while ( (psz != end()) && wxSafeIsspace(*psz) )
1546 ++psz;
1547
1548 // fix up data and length
1549 erase(begin(), psz);
1550 }
1551 }
1552
1553 return *this;
1554 }
1555
1556 // adds nCount characters chPad to the string from either side
1557 wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
1558 {
1559 wxString s(chPad, nCount);
1560
1561 if ( bFromRight )
1562 *this += s;
1563 else
1564 {
1565 s += *this;
1566 swap(s);
1567 }
1568
1569 return *this;
1570 }
1571
1572 // truncate the string
1573 wxString& wxString::Truncate(size_t uiLen)
1574 {
1575 if ( uiLen < length() )
1576 {
1577 erase(begin() + uiLen, end());
1578 }
1579 //else: nothing to do, string is already short enough
1580
1581 return *this;
1582 }
1583
1584 // ---------------------------------------------------------------------------
1585 // finding (return wxNOT_FOUND if not found and index otherwise)
1586 // ---------------------------------------------------------------------------
1587
1588 // find a character
1589 int wxString::Find(wxUniChar ch, bool bFromEnd) const
1590 {
1591 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
1592
1593 return (idx == npos) ? wxNOT_FOUND : (int)idx;
1594 }
1595
1596 // ----------------------------------------------------------------------------
1597 // conversion to numbers
1598 // ----------------------------------------------------------------------------
1599
1600 // The implementation of all the functions below is exactly the same so factor
1601 // it out. Note that number extraction works correctly on UTF-8 strings, so
1602 // we can use wxStringCharType and wx_str() for maximum efficiency.
1603
1604 #ifndef __WXWINCE__
1605 #define DO_IF_NOT_WINCE(x) x
1606 #else
1607 #define DO_IF_NOT_WINCE(x)
1608 #endif
1609
1610 #define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1611 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
1612 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1613 \
1614 DO_IF_NOT_WINCE( errno = 0; ) \
1615 \
1616 const wxStringCharType *start = wx_str(); \
1617 wxStringCharType *end; \
1618 T val = func(start, &end, base); \
1619 \
1620 /* return true only if scan was stopped by the terminating NUL and */ \
1621 /* if the string was not empty to start with and no under/overflow */ \
1622 /* occurred: */ \
1623 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1624 return false; \
1625 *out = val; \
1626 return true
1627
1628 bool wxString::ToLong(long *pVal, int base) const
1629 {
1630 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
1631 }
1632
1633 bool wxString::ToULong(unsigned long *pVal, int base) const
1634 {
1635 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
1636 }
1637
1638 bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
1639 {
1640 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
1641 }
1642
1643 bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
1644 {
1645 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
1646 }
1647
1648 bool wxString::ToDouble(double *pVal) const
1649 {
1650 wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
1651
1652 DO_IF_NOT_WINCE( errno = 0; )
1653
1654 const wxChar *start = c_str();
1655 wxChar *end;
1656 double val = wxStrtod(start, &end);
1657
1658 // return true only if scan was stopped by the terminating NUL and if the
1659 // string was not empty to start with and no under/overflow occurred
1660 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1661 return false;
1662
1663 *pVal = val;
1664
1665 return true;
1666 }
1667
1668 // ---------------------------------------------------------------------------
1669 // formatted output
1670 // ---------------------------------------------------------------------------
1671
1672 #if !wxUSE_UTF8_LOCALE_ONLY
1673 /* static */
1674 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1675 wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
1676 #else
1677 wxString wxString::DoFormatWchar(const wxChar *format, ...)
1678 #endif
1679 {
1680 va_list argptr;
1681 va_start(argptr, format);
1682
1683 wxString s;
1684 s.PrintfV(format, argptr);
1685
1686 va_end(argptr);
1687
1688 return s;
1689 }
1690 #endif // !wxUSE_UTF8_LOCALE_ONLY
1691
1692 #if wxUSE_UNICODE_UTF8
1693 /* static */
1694 wxString wxString::DoFormatUtf8(const char *format, ...)
1695 {
1696 va_list argptr;
1697 va_start(argptr, format);
1698
1699 wxString s;
1700 s.PrintfV(format, argptr);
1701
1702 va_end(argptr);
1703
1704 return s;
1705 }
1706 #endif // wxUSE_UNICODE_UTF8
1707
1708 /* static */
1709 wxString wxString::FormatV(const wxString& format, va_list argptr)
1710 {
1711 wxString s;
1712 s.PrintfV(format, argptr);
1713 return s;
1714 }
1715
1716 #if !wxUSE_UTF8_LOCALE_ONLY
1717 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1718 int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
1719 #else
1720 int wxString::DoPrintfWchar(const wxChar *format, ...)
1721 #endif
1722 {
1723 va_list argptr;
1724 va_start(argptr, format);
1725
1726 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1727 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1728 // because it's the only cast that works safely for downcasting when
1729 // multiple inheritance is used:
1730 wxString *str = static_cast<wxString*>(this);
1731 #else
1732 wxString *str = this;
1733 #endif
1734
1735 int iLen = str->PrintfV(format, argptr);
1736
1737 va_end(argptr);
1738
1739 return iLen;
1740 }
1741 #endif // !wxUSE_UTF8_LOCALE_ONLY
1742
1743 #if wxUSE_UNICODE_UTF8
1744 int wxString::DoPrintfUtf8(const char *format, ...)
1745 {
1746 va_list argptr;
1747 va_start(argptr, format);
1748
1749 int iLen = PrintfV(format, argptr);
1750
1751 va_end(argptr);
1752
1753 return iLen;
1754 }
1755 #endif // wxUSE_UNICODE_UTF8
1756
1757 /*
1758 Uses wxVsnprintf and places the result into the this string.
1759
1760 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1761 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1762 the ISO C99 (and thus SUSv3) standard the return value for the case of
1763 an undersized buffer is inconsistent. For conforming vsnprintf
1764 implementations the function must return the number of characters that
1765 would have been printed had the buffer been large enough. For conforming
1766 vswprintf implementations the function must return a negative number
1767 and set errno.
1768
1769 What vswprintf sets errno to is undefined but Darwin seems to set it to
1770 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1771 those are defined in the standard and backed up by several conformance
1772 statements. Note that ENOMEM mentioned in the manual page does not
1773 apply to swprintf, only wprintf and fwprintf.
1774
1775 Official manual page:
1776 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1777
1778 Some conformance statements (AIX, Solaris):
1779 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1780 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1781
1782 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1783 EILSEQ and EINVAL are specifically defined to mean the error is other than
1784 an undersized buffer and no other errno are defined we treat those two
1785 as meaning hard errors and everything else gets the old behavior which
1786 is to keep looping and increasing buffer size until the function succeeds.
1787
1788 In practice it's impossible to determine before compilation which behavior
1789 may be used. The vswprintf function may have vsnprintf-like behavior or
1790 vice-versa. Behavior detected on one release can theoretically change
1791 with an updated release. Not to mention that configure testing for it
1792 would require the test to be run on the host system, not the build system
1793 which makes cross compilation difficult. Therefore, we make no assumptions
1794 about behavior and try our best to handle every known case, including the
1795 case where wxVsnprintf returns a negative number and fails to set errno.
1796
1797 There is yet one more non-standard implementation and that is our own.
1798 Fortunately, that can be detected at compile-time.
1799
1800 On top of all that, ISO C99 explicitly defines snprintf to write a null
1801 character to the last position of the specified buffer. That would be at
1802 at the given buffer size minus 1. It is supposed to do this even if it
1803 turns out that the buffer is sized too small.
1804
1805 Darwin (tested on 10.5) follows the C99 behavior exactly.
1806
1807 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1808 errno even when it fails. However, it only seems to ever fail due
1809 to an undersized buffer.
1810 */
1811 #if wxUSE_UNICODE_UTF8
1812 template<typename BufferType>
1813 #else
1814 // we only need one version in non-UTF8 builds and at least two Windows
1815 // compilers have problems with this function template, so use just one
1816 // normal function here
1817 #endif
1818 static int DoStringPrintfV(wxString& str,
1819 const wxString& format, va_list argptr)
1820 {
1821 int size = 1024;
1822
1823 for ( ;; )
1824 {
1825 #if wxUSE_UNICODE_UTF8
1826 BufferType tmp(str, size + 1);
1827 typename BufferType::CharType *buf = tmp;
1828 #else
1829 wxStringBuffer tmp(str, size + 1);
1830 wxChar *buf = tmp;
1831 #endif
1832
1833 if ( !buf )
1834 {
1835 // out of memory
1836
1837 // in UTF-8 build, leaving uninitialized junk in the buffer
1838 // could result in invalid non-empty UTF-8 string, so just
1839 // reset the string to empty on failure:
1840 buf[0] = '\0';
1841 return -1;
1842 }
1843
1844 // wxVsnprintf() may modify the original arg pointer, so pass it
1845 // only a copy
1846 va_list argptrcopy;
1847 wxVaCopy(argptrcopy, argptr);
1848
1849 #ifndef __WXWINCE__
1850 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1851 errno = 0;
1852 #endif
1853 int len = wxVsnprintf(buf, size, format, argptrcopy);
1854 va_end(argptrcopy);
1855
1856 // some implementations of vsnprintf() don't NUL terminate
1857 // the string if there is not enough space for it so
1858 // always do it manually
1859 // FIXME: This really seems to be the wrong and would be an off-by-one
1860 // bug except the code above allocates an extra character.
1861 buf[size] = _T('\0');
1862
1863 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1864 // total number of characters which would have been written if the
1865 // buffer were large enough (newer standards such as Unix98)
1866 if ( len < 0 )
1867 {
1868 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1869 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1870 // is true if *both* of them use our own implementation,
1871 // otherwise we can't be sure
1872 #if wxUSE_WXVSNPRINTF
1873 // we know that our own implementation of wxVsnprintf() returns -1
1874 // only for a format error - thus there's something wrong with
1875 // the user's format string
1876 buf[0] = '\0';
1877 return -1;
1878 #else // possibly using system version
1879 // assume it only returns error if there is not enough space, but
1880 // as we don't know how much we need, double the current size of
1881 // the buffer
1882 #ifndef __WXWINCE__
1883 if( (errno == EILSEQ) || (errno == EINVAL) )
1884 // If errno was set to one of the two well-known hard errors
1885 // then fail immediately to avoid an infinite loop.
1886 return -1;
1887 else
1888 #endif // __WXWINCE__
1889 // still not enough, as we don't know how much we need, double the
1890 // current size of the buffer
1891 size *= 2;
1892 #endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
1893 }
1894 else if ( len >= size )
1895 {
1896 #if wxUSE_WXVSNPRINTF
1897 // we know that our own implementation of wxVsnprintf() returns
1898 // size+1 when there's not enough space but that's not the size
1899 // of the required buffer!
1900 size *= 2; // so we just double the current size of the buffer
1901 #else
1902 // some vsnprintf() implementations NUL-terminate the buffer and
1903 // some don't in len == size case, to be safe always add 1
1904 // FIXME: I don't quite understand this comment. The vsnprintf
1905 // function is specifically defined to return the number of
1906 // characters printed not including the null terminator.
1907 // So OF COURSE you need to add 1 to get the right buffer size.
1908 // The following line is definitely correct, no question.
1909 size = len + 1;
1910 #endif
1911 }
1912 else // ok, there was enough space
1913 {
1914 break;
1915 }
1916 }
1917
1918 // we could have overshot
1919 str.Shrink();
1920
1921 return str.length();
1922 }
1923
1924 int wxString::PrintfV(const wxString& format, va_list argptr)
1925 {
1926 #if wxUSE_UNICODE_UTF8
1927 #if wxUSE_STL_BASED_WXSTRING
1928 typedef wxStringTypeBuffer<char> Utf8Buffer;
1929 #else
1930 typedef wxStringInternalBuffer Utf8Buffer;
1931 #endif
1932 #endif
1933
1934 #if wxUSE_UTF8_LOCALE_ONLY
1935 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1936 #else
1937 #if wxUSE_UNICODE_UTF8
1938 if ( wxLocaleIsUtf8 )
1939 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
1940 else
1941 // wxChar* version
1942 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
1943 #else
1944 return DoStringPrintfV(*this, format, argptr);
1945 #endif // UTF8/WCHAR
1946 #endif
1947 }
1948
1949 // ----------------------------------------------------------------------------
1950 // misc other operations
1951 // ----------------------------------------------------------------------------
1952
1953 // returns true if the string matches the pattern which may contain '*' and
1954 // '?' metacharacters (as usual, '?' matches any character and '*' any number
1955 // of them)
1956 bool wxString::Matches(const wxString& mask) const
1957 {
1958 // I disable this code as it doesn't seem to be faster (in fact, it seems
1959 // to be much slower) than the old, hand-written code below and using it
1960 // here requires always linking with libregex even if the user code doesn't
1961 // use it
1962 #if 0 // wxUSE_REGEX
1963 // first translate the shell-like mask into a regex
1964 wxString pattern;
1965 pattern.reserve(wxStrlen(pszMask));
1966
1967 pattern += _T('^');
1968 while ( *pszMask )
1969 {
1970 switch ( *pszMask )
1971 {
1972 case _T('?'):
1973 pattern += _T('.');
1974 break;
1975
1976 case _T('*'):
1977 pattern += _T(".*");
1978 break;
1979
1980 case _T('^'):
1981 case _T('.'):
1982 case _T('$'):
1983 case _T('('):
1984 case _T(')'):
1985 case _T('|'):
1986 case _T('+'):
1987 case _T('\\'):
1988 // these characters are special in a RE, quote them
1989 // (however note that we don't quote '[' and ']' to allow
1990 // using them for Unix shell like matching)
1991 pattern += _T('\\');
1992 // fall through
1993
1994 default:
1995 pattern += *pszMask;
1996 }
1997
1998 pszMask++;
1999 }
2000 pattern += _T('$');
2001
2002 // and now use it
2003 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2004 #else // !wxUSE_REGEX
2005 // TODO: this is, of course, awfully inefficient...
2006
2007 // FIXME-UTF8: implement using iterators, remove #if
2008 #if wxUSE_UNICODE_UTF8
2009 wxWCharBuffer maskBuf = mask.wc_str();
2010 wxWCharBuffer txtBuf = wc_str();
2011 const wxChar *pszMask = maskBuf.data();
2012 const wxChar *pszTxt = txtBuf.data();
2013 #else
2014 const wxChar *pszMask = mask.wx_str();
2015 // the char currently being checked
2016 const wxChar *pszTxt = wx_str();
2017 #endif
2018
2019 // the last location where '*' matched
2020 const wxChar *pszLastStarInText = NULL;
2021 const wxChar *pszLastStarInMask = NULL;
2022
2023 match:
2024 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
2025 switch ( *pszMask ) {
2026 case wxT('?'):
2027 if ( *pszTxt == wxT('\0') )
2028 return false;
2029
2030 // pszTxt and pszMask will be incremented in the loop statement
2031
2032 break;
2033
2034 case wxT('*'):
2035 {
2036 // remember where we started to be able to backtrack later
2037 pszLastStarInText = pszTxt;
2038 pszLastStarInMask = pszMask;
2039
2040 // ignore special chars immediately following this one
2041 // (should this be an error?)
2042 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
2043 pszMask++;
2044
2045 // if there is nothing more, match
2046 if ( *pszMask == wxT('\0') )
2047 return true;
2048
2049 // are there any other metacharacters in the mask?
2050 size_t uiLenMask;
2051 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
2052
2053 if ( pEndMask != NULL ) {
2054 // we have to match the string between two metachars
2055 uiLenMask = pEndMask - pszMask;
2056 }
2057 else {
2058 // we have to match the remainder of the string
2059 uiLenMask = wxStrlen(pszMask);
2060 }
2061
2062 wxString strToMatch(pszMask, uiLenMask);
2063 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
2064 if ( pMatch == NULL )
2065 return false;
2066
2067 // -1 to compensate "++" in the loop
2068 pszTxt = pMatch + uiLenMask - 1;
2069 pszMask += uiLenMask - 1;
2070 }
2071 break;
2072
2073 default:
2074 if ( *pszMask != *pszTxt )
2075 return false;
2076 break;
2077 }
2078 }
2079
2080 // match only if nothing left
2081 if ( *pszTxt == wxT('\0') )
2082 return true;
2083
2084 // if we failed to match, backtrack if we can
2085 if ( pszLastStarInText ) {
2086 pszTxt = pszLastStarInText + 1;
2087 pszMask = pszLastStarInMask;
2088
2089 pszLastStarInText = NULL;
2090
2091 // don't bother resetting pszLastStarInMask, it's unnecessary
2092
2093 goto match;
2094 }
2095
2096 return false;
2097 #endif // wxUSE_REGEX/!wxUSE_REGEX
2098 }
2099
2100 // Count the number of chars
2101 int wxString::Freq(wxUniChar ch) const
2102 {
2103 int count = 0;
2104 for ( const_iterator i = begin(); i != end(); ++i )
2105 {
2106 if ( *i == ch )
2107 count ++;
2108 }
2109 return count;
2110 }
2111