]> git.saurik.com Git - wxWidgets.git/blame - src/common/string.cpp
define wxString::iterator::iterator_category correctly if wxUSE_STD_STRING and not...
[wxWidgets.git] / src / common / string.cpp
CommitLineData
c801d85f 1/////////////////////////////////////////////////////////////////////////////
8898456d 2// Name: src/common/string.cpp
c801d85f 3// Purpose: wxString class
59059feb 4// Author: Vadim Zeitlin, Ryan Norton
c801d85f
KB
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
59059feb 9// (c) 2004 Ryan Norton <wxprojects@comcast.net>
65571936 10// Licence: wxWindows licence
c801d85f
KB
11/////////////////////////////////////////////////////////////////////////////
12
c801d85f
KB
13// ===========================================================================
14// headers, declarations, constants
15// ===========================================================================
16
17// For compilers that support precompilation, includes "wx.h".
18#include "wx/wxprec.h"
19
20#ifdef __BORLANDC__
8898456d 21 #pragma hdrstop
c801d85f
KB
22#endif
23
24#ifndef WX_PRECOMP
8898456d 25 #include "wx/string.h"
2523e9b7 26 #include "wx/wxcrtvararg.h"
6b769f3d 27#endif
c801d85f
KB
28
29#include <ctype.h>
92df97b8
WS
30
31#ifndef __WXWINCE__
32 #include <errno.h>
33#endif
34
c801d85f
KB
35#include <string.h>
36#include <stdlib.h>
9a08c20e 37
8116a0c5 38#include "wx/hashmap.h"
8f93a29f
VS
39
40// string handling functions used by wxString:
41#if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
46#else
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
a7ea63e2
VS
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
51#endif
8f93a29f 52
e87b7833 53
a7ea63e2
VS
54// ---------------------------------------------------------------------------
55// static class variables definition
56// ---------------------------------------------------------------------------
e87b7833 57
a7ea63e2
VS
58//According to STL _must_ be a -1 size_t
59const size_t wxString::npos = (size_t) -1;
8f93a29f 60
68482dc5 61#if wxUSE_STRING_POS_CACHE
68482dc5
VZ
62
63// gdb seems to be unable to display thread-local variables correctly, at least
64// not my 6.4.98 version under amd64, so provide this debugging helper to do it
65#ifdef __WXDEBUG__
66
67struct wxStrCacheDumper
68{
69 static void ShowAll()
70 {
71 puts("*** wxString cache dump:");
72 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
73 {
74 const wxString::Cache::Element&
8b73c531 75 c = wxString::GetCacheBegin()[n];
68482dc5
VZ
76
77 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
78 n,
8b73c531 79 n == wxString::LastUsedCacheElement() ? " [*]" : "",
68482dc5
VZ
80 c.str,
81 (unsigned long)c.pos,
82 (unsigned long)c.impl,
83 (long)c.len);
84 }
85 }
86};
87
88void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
89
90#endif // __WXDEBUG__
91
92#ifdef wxPROFILE_STRING_CACHE
93
94wxString::CacheStats wxString::ms_cacheStats;
95
96namespace
97{
98
99struct ShowCacheStats
100{
101 ~ShowCacheStats()
102 {
103 const wxString::CacheStats& stats = wxString::ms_cacheStats;
104
105 if ( stats.postot )
106 {
107 puts("*** wxString cache statistics:");
108 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
109 stats.postot);
110 printf("\tHits %u (of which %u not used) or %.2f%%\n",
111 stats.poshits,
112 stats.mishits,
113 100.*float(stats.poshits - stats.mishits)/stats.postot);
114 printf("\tAverage position requested: %.2f\n",
115 float(stats.sumpos) / stats.postot);
116 printf("\tAverage offset after cached hint: %.2f\n",
117 float(stats.sumofs) / stats.postot);
118 }
119
120 if ( stats.lentot )
121 {
122 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
123 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
124 }
125 }
126} s_showCacheStats;
127
128} // anonymous namespace
129
130#endif // wxPROFILE_STRING_CACHE
131
132#endif // wxUSE_STRING_POS_CACHE
133
a7ea63e2
VS
134// ----------------------------------------------------------------------------
135// global functions
136// ----------------------------------------------------------------------------
e87b7833 137
a7ea63e2 138#if wxUSE_STD_IOSTREAM
8f93a29f 139
a7ea63e2 140#include <iostream>
8f93a29f 141
a7ea63e2 142wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
8f93a29f 143{
7a906e1a
VZ
144#if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
145 return os << (const char *)str.AsCharBuf();
a7ea63e2 146#else
7a906e1a 147 return os << str.AsInternal();
a7ea63e2 148#endif
8f93a29f
VS
149}
150
04abe4bc
VS
151wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
152{
153 return os << str.c_str();
154}
155
156wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
157{
158 return os << str.data();
159}
160
161#ifndef __BORLANDC__
162wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
163{
164 return os << str.data();
165}
166#endif
167
6a6ea041 168#if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594
VZ
169
170wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
171{
172 return wos << str.wc_str();
173}
174
175wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
176{
177 return wos << str.AsWChar();
178}
179
180wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
181{
182 return wos << str.data();
183}
184
6a6ea041 185#endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594 186
a7ea63e2 187#endif // wxUSE_STD_IOSTREAM
e87b7833 188
81727065
VS
189// ===========================================================================
190// wxString class core
191// ===========================================================================
192
193#if wxUSE_UNICODE_UTF8
194
81727065
VS
195void wxString::PosLenToImpl(size_t pos, size_t len,
196 size_t *implPos, size_t *implLen) const
197{
198 if ( pos == npos )
68482dc5 199 {
81727065 200 *implPos = npos;
68482dc5
VZ
201 }
202 else // have valid start position
81727065 203 {
68482dc5
VZ
204 const const_iterator b = GetIterForNthChar(pos);
205 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
81727065 206 if ( len == npos )
68482dc5 207 {
81727065 208 *implLen = npos;
68482dc5
VZ
209 }
210 else // have valid length too
81727065 211 {
68482dc5
VZ
212 // we need to handle the case of length specifying a substring
213 // going beyond the end of the string, just as std::string does
214 const const_iterator e(end());
215 const_iterator i(b);
216 while ( len && i <= e )
217 {
218 ++i;
219 --len;
220 }
221
222 *implLen = i.impl() - b.impl();
81727065
VS
223 }
224 }
225}
226
227#endif // wxUSE_UNICODE_UTF8
228
11aac4ba
VS
229// ----------------------------------------------------------------------------
230// wxCStrData converted strings caching
231// ----------------------------------------------------------------------------
232
132276cf
VS
233// FIXME-UTF8: temporarily disabled because it doesn't work with global
234// string objects; re-enable after fixing this bug and benchmarking
235// performance to see if using a hash is a good idea at all
236#if 0
237
11aac4ba
VS
238// For backward compatibility reasons, it must be possible to assign the value
239// returned by wxString::c_str() to a char* or wchar_t* variable and work with
240// it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
241// because the memory would be freed immediately, but it has to be valid as long
242// as the string is not modified, so that code like this still works:
243//
244// const wxChar *s = str.c_str();
245// while ( s ) { ... }
246
247// FIXME-UTF8: not thread safe!
248// FIXME-UTF8: we currently clear the cached conversion only when the string is
249// destroyed, but we should do it when the string is modified, to
250// keep memory usage down
251// FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
252// invalidated the cache on every change, we could keep the previous
253// conversion
254// FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
255// to use mb_str() or wc_str() instead of (const [w]char*)c_str()
256
257template<typename T>
258static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
259{
6c4ebcda 260 typename T::iterator i = hash.find(wxConstCast(s, wxString));
11aac4ba
VS
261 if ( i != hash.end() )
262 {
263 free(i->second);
264 hash.erase(i);
265 }
266}
267
268#if wxUSE_UNICODE
6c4ebcda
VS
269// NB: non-STL implementation doesn't compile with "const wxString*" key type,
270// so we have to use wxString* here and const-cast when used
11aac4ba
VS
271WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
272 wxStringCharConversionCache);
273static wxStringCharConversionCache gs_stringsCharCache;
274
275const char* wxCStrData::AsChar() const
276{
277 // remove previously cache value, if any (see FIXMEs above):
278 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
279
280 // convert the string and keep it:
6c4ebcda
VS
281 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
282 m_str->mb_str().release();
11aac4ba
VS
283
284 return s + m_offset;
285}
286#endif // wxUSE_UNICODE
287
288#if !wxUSE_UNICODE_WCHAR
289WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
290 wxStringWCharConversionCache);
291static wxStringWCharConversionCache gs_stringsWCharCache;
292
293const wchar_t* wxCStrData::AsWChar() const
294{
295 // remove previously cache value, if any (see FIXMEs above):
296 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
297
298 // convert the string and keep it:
6c4ebcda
VS
299 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
300 m_str->wc_str().release();
11aac4ba
VS
301
302 return s + m_offset;
303}
304#endif // !wxUSE_UNICODE_WCHAR
305
11aac4ba
VS
306wxString::~wxString()
307{
308#if wxUSE_UNICODE
309 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
310 DeleteStringFromConversionCache(gs_stringsCharCache, this);
311#endif
312#if !wxUSE_UNICODE_WCHAR
313 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
314#endif
315}
132276cf
VS
316#endif
317
111d9948 318#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
132276cf
VS
319const char* wxCStrData::AsChar() const
320{
111d9948
VS
321#if wxUSE_UNICODE_UTF8
322 if ( wxLocaleIsUtf8 )
323 return AsInternal();
324#endif
325 // under non-UTF8 locales, we have to convert the internal UTF-8
326 // representation using wxConvLibc and cache the result
327
132276cf 328 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
329
330 // convert the string:
2a7431e1
VZ
331 //
332 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
333 // have it) but it's unfortunately not obvious to implement
334 // because we don't know how big buffer do we need for the
335 // given string length (in case of multibyte encodings, e.g.
336 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
337 //
338 // One idea would be to store more than just m_convertedToChar
339 // in wxString: then we could record the length of the string
340 // which was converted the last time and try to reuse the same
341 // buffer if the current length is not greater than it (this
342 // could still fail because string could have been modified in
343 // place but it would work most of the time, so we'd do it and
344 // only allocate the new buffer if in-place conversion returned
345 // an error). We could also store a bit saying if the string
346 // was modified since the last conversion (and update it in all
347 // operation modifying the string, of course) to avoid unneeded
348 // consequential conversions. But both of these ideas require
349 // adding more fields to wxString and require profiling results
350 // to be sure that we really gain enough from them to justify
351 // doing it.
05f32fc3
VS
352 wxCharBuffer buf(str->mb_str());
353
28be59b4
VZ
354 // if it failed, return empty string and not NULL to avoid crashes in code
355 // written with either wxWidgets 2 wxString or std::string behaviour in
356 // mind: neither of them ever returns NULL and so we shouldn't neither
357 if ( !buf )
358 return "";
359
05f32fc3
VS
360 if ( str->m_convertedToChar &&
361 strlen(buf) == strlen(str->m_convertedToChar) )
362 {
363 // keep the same buffer for as long as possible, so that several calls
364 // to c_str() in a row still work:
365 strcpy(str->m_convertedToChar, buf);
366 }
367 else
368 {
369 str->m_convertedToChar = buf.release();
370 }
371
372 // and keep it:
132276cf
VS
373 return str->m_convertedToChar + m_offset;
374}
111d9948 375#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
132276cf
VS
376
377#if !wxUSE_UNICODE_WCHAR
378const wchar_t* wxCStrData::AsWChar() const
379{
380 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
381
382 // convert the string:
383 wxWCharBuffer buf(str->wc_str());
384
28be59b4
VZ
385 // notice that here, unlike above in AsChar(), conversion can't fail as our
386 // internal UTF-8 is always well-formed -- or the string was corrupted and
387 // all bets are off anyhow
388
05f32fc3
VS
389 // FIXME-UTF8: do the conversion in-place in the existing buffer
390 if ( str->m_convertedToWChar &&
391 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
392 {
393 // keep the same buffer for as long as possible, so that several calls
394 // to c_str() in a row still work:
395 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
396 }
397 else
398 {
399 str->m_convertedToWChar = buf.release();
400 }
401
402 // and keep it:
132276cf
VS
403 return str->m_convertedToWChar + m_offset;
404}
405#endif // !wxUSE_UNICODE_WCHAR
406
407// ===========================================================================
408// wxString class core
409// ===========================================================================
410
411// ---------------------------------------------------------------------------
412// construction and conversion
413// ---------------------------------------------------------------------------
11aac4ba 414
81727065 415#if wxUSE_UNICODE_WCHAR
8f93a29f
VS
416/* static */
417wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
04abe4bc 418 const wxMBConv& conv)
8f93a29f
VS
419{
420 // anything to do?
421 if ( !psz || nLength == 0 )
81727065 422 return SubstrBufFromMB(L"", 0);
8f93a29f
VS
423
424 if ( nLength == npos )
425 nLength = wxNO_LEN;
426
427 size_t wcLen;
428 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
429 if ( !wcLen )
81727065 430 return SubstrBufFromMB(_T(""), 0);
8f93a29f
VS
431 else
432 return SubstrBufFromMB(wcBuf, wcLen);
433}
81727065
VS
434#endif // wxUSE_UNICODE_WCHAR
435
436#if wxUSE_UNICODE_UTF8
437/* static */
438wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
439 const wxMBConv& conv)
440{
81727065
VS
441 // anything to do?
442 if ( !psz || nLength == 0 )
443 return SubstrBufFromMB("", 0);
444
111d9948
VS
445 // if psz is already in UTF-8, we don't have to do the roundtrip to
446 // wchar_t* and back:
447 if ( conv.IsUTF8() )
448 {
449 // we need to validate the input because UTF8 iterators assume valid
450 // UTF-8 sequence and psz may be invalid:
451 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
452 {
9ef1ad0d
VZ
453 // we must pass the real string length to SubstrBufFromMB ctor
454 if ( nLength == npos )
455 nLength = psz ? strlen(psz) : 0;
111d9948
VS
456 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
457 }
458 // else: do the roundtrip through wchar_t*
459 }
460
81727065
VS
461 if ( nLength == npos )
462 nLength = wxNO_LEN;
463
464 // first convert to wide string:
465 size_t wcLen;
466 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
467 if ( !wcLen )
468 return SubstrBufFromMB("", 0);
469
470 // and then to UTF-8:
4fdfe2f3 471 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
81727065
VS
472 // widechar -> UTF-8 conversion isn't supposed to ever fail:
473 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
474
475 return buf;
476}
477#endif // wxUSE_UNICODE_UTF8
478
479#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
480/* static */
481wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
04abe4bc 482 const wxMBConv& conv)
8f93a29f
VS
483{
484 // anything to do?
485 if ( !pwz || nLength == 0 )
81727065 486 return SubstrBufFromWC("", 0);
8f93a29f
VS
487
488 if ( nLength == npos )
489 nLength = wxNO_LEN;
490
491 size_t mbLen;
492 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
493 if ( !mbLen )
81727065 494 return SubstrBufFromWC("", 0);
8f93a29f
VS
495 else
496 return SubstrBufFromWC(mbBuf, mbLen);
497}
81727065 498#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
499
500
81727065 501#if wxUSE_UNICODE_WCHAR
e87b7833 502
06386448 503//Convert wxString in Unicode mode to a multi-byte string
830f8f11 504const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
265d5cce 505{
81727065 506 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
e87b7833
MB
507}
508
81727065 509#elif wxUSE_UNICODE_UTF8
e87b7833 510
81727065
VS
511const wxWCharBuffer wxString::wc_str() const
512{
4fdfe2f3
VZ
513 return wxMBConvStrictUTF8().cMB2WC
514 (
515 m_impl.c_str(),
516 m_impl.length() + 1, // size, not length
517 NULL
518 );
81727065
VS
519}
520
521const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
522{
111d9948
VS
523 if ( conv.IsUTF8() )
524 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
525
81727065
VS
526 // FIXME-UTF8: use wc_str() here once we have buffers with length
527
528 size_t wcLen;
4fdfe2f3
VZ
529 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
530 (
531 m_impl.c_str(),
532 m_impl.length() + 1, // size
533 &wcLen
534 ));
81727065
VS
535 if ( !wcLen )
536 return wxCharBuffer("");
537
4f696af8 538 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
81727065
VS
539}
540
541#else // ANSI
eec47cc6 542
7663d0d4 543//Converts this string to a wide character string if unicode
06386448 544//mode is not enabled and wxUSE_WCHAR_T is enabled
830f8f11 545const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
265d5cce 546{
81727065 547 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
265d5cce 548}
7663d0d4 549
e87b7833
MB
550#endif // Unicode/ANSI
551
552// shrink to minimal size (releasing extra memory)
553bool wxString::Shrink()
554{
555 wxString tmp(begin(), end());
556 swap(tmp);
557 return tmp.length() == length();
558}
559
d8a4b666 560// deprecated compatibility code:
a7ea63e2 561#if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
c87a0bc8 562wxStringCharType *wxString::GetWriteBuf(size_t nLen)
d8a4b666
VS
563{
564 return DoGetWriteBuf(nLen);
565}
566
567void wxString::UngetWriteBuf()
568{
569 DoUngetWriteBuf();
570}
571
572void wxString::UngetWriteBuf(size_t nLen)
573{
574 DoUngetWriteBuf(nLen);
575}
a7ea63e2 576#endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
e87b7833 577
d8a4b666 578
e87b7833
MB
579// ---------------------------------------------------------------------------
580// data access
581// ---------------------------------------------------------------------------
582
583// all functions are inline in string.h
584
585// ---------------------------------------------------------------------------
e8f59039 586// concatenation operators
e87b7833
MB
587// ---------------------------------------------------------------------------
588
c801d85f 589/*
c801d85f
KB
590 * concatenation functions come in 5 flavours:
591 * string + string
592 * char + string and string + char
593 * C str + string and string + C str
594 */
595
b1801e0e 596wxString operator+(const wxString& str1, const wxString& str2)
c801d85f 597{
992527a5 598#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
599 wxASSERT( str1.IsValid() );
600 wxASSERT( str2.IsValid() );
e87b7833 601#endif
097c080b 602
3458e408
WS
603 wxString s = str1;
604 s += str2;
3168a13f 605
3458e408 606 return s;
c801d85f
KB
607}
608
c9f78968 609wxString operator+(const wxString& str, wxUniChar ch)
c801d85f 610{
992527a5 611#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 612 wxASSERT( str.IsValid() );
e87b7833 613#endif
3168a13f 614
3458e408
WS
615 wxString s = str;
616 s += ch;
097c080b 617
3458e408 618 return s;
c801d85f
KB
619}
620
c9f78968 621wxString operator+(wxUniChar ch, const wxString& str)
c801d85f 622{
992527a5 623#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 624 wxASSERT( str.IsValid() );
e87b7833 625#endif
097c080b 626
3458e408
WS
627 wxString s = ch;
628 s += str;
3168a13f 629
3458e408 630 return s;
c801d85f
KB
631}
632
8f93a29f 633wxString operator+(const wxString& str, const char *psz)
c801d85f 634{
992527a5 635#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 636 wxASSERT( str.IsValid() );
e87b7833 637#endif
097c080b 638
3458e408 639 wxString s;
8f93a29f 640 if ( !s.Alloc(strlen(psz) + str.length()) ) {
3458e408
WS
641 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
642 }
643 s += str;
644 s += psz;
3168a13f 645
3458e408 646 return s;
c801d85f
KB
647}
648
8f93a29f 649wxString operator+(const wxString& str, const wchar_t *pwz)
c801d85f 650{
992527a5 651#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
652 wxASSERT( str.IsValid() );
653#endif
654
655 wxString s;
656 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
657 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
658 }
659 s += str;
660 s += pwz;
661
662 return s;
663}
664
665wxString operator+(const char *psz, const wxString& str)
666{
a7ea63e2
VS
667#if !wxUSE_STL_BASED_WXSTRING
668 wxASSERT( str.IsValid() );
669#endif
670
671 wxString s;
672 if ( !s.Alloc(strlen(psz) + str.length()) ) {
673 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
674 }
675 s = psz;
676 s += str;
677
678 return s;
679}
680
681wxString operator+(const wchar_t *pwz, const wxString& str)
682{
683#if !wxUSE_STL_BASED_WXSTRING
684 wxASSERT( str.IsValid() );
685#endif
686
687 wxString s;
688 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
689 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
690 }
691 s = pwz;
692 s += str;
693
694 return s;
695}
696
697// ---------------------------------------------------------------------------
698// string comparison
699// ---------------------------------------------------------------------------
700
52de37c7
VS
701bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
702{
703 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
704 : wxToupper(GetChar(0u)) == wxToupper(c));
705}
706
a7ea63e2
VS
707#ifdef HAVE_STD_STRING_COMPARE
708
709// NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
710// UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
711// sort strings in characters code point order by sorting the byte sequence
712// in byte values order (i.e. what strcmp() and memcmp() do).
713
714int wxString::compare(const wxString& str) const
715{
716 return m_impl.compare(str.m_impl);
717}
718
719int wxString::compare(size_t nStart, size_t nLen,
720 const wxString& str) const
721{
722 size_t pos, len;
723 PosLenToImpl(nStart, nLen, &pos, &len);
724 return m_impl.compare(pos, len, str.m_impl);
725}
726
727int wxString::compare(size_t nStart, size_t nLen,
728 const wxString& str,
729 size_t nStart2, size_t nLen2) const
730{
731 size_t pos, len;
732 PosLenToImpl(nStart, nLen, &pos, &len);
733
734 size_t pos2, len2;
735 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
736
737 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
738}
739
740int wxString::compare(const char* sz) const
741{
742 return m_impl.compare(ImplStr(sz));
743}
744
745int wxString::compare(const wchar_t* sz) const
746{
747 return m_impl.compare(ImplStr(sz));
748}
749
750int wxString::compare(size_t nStart, size_t nLen,
751 const char* sz, size_t nCount) const
752{
753 size_t pos, len;
754 PosLenToImpl(nStart, nLen, &pos, &len);
755
756 SubstrBufFromMB str(ImplStr(sz, nCount));
757
758 return m_impl.compare(pos, len, str.data, str.len);
759}
760
761int wxString::compare(size_t nStart, size_t nLen,
762 const wchar_t* sz, size_t nCount) const
763{
764 size_t pos, len;
765 PosLenToImpl(nStart, nLen, &pos, &len);
766
767 SubstrBufFromWC str(ImplStr(sz, nCount));
768
769 return m_impl.compare(pos, len, str.data, str.len);
770}
771
772#else // !HAVE_STD_STRING_COMPARE
773
774static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
775 const wxStringCharType* s2, size_t l2)
776{
777 if( l1 == l2 )
778 return wxStringMemcmp(s1, s2, l1);
779 else if( l1 < l2 )
780 {
781 int ret = wxStringMemcmp(s1, s2, l1);
782 return ret == 0 ? -1 : ret;
783 }
784 else
785 {
786 int ret = wxStringMemcmp(s1, s2, l2);
787 return ret == 0 ? +1 : ret;
788 }
789}
790
791int wxString::compare(const wxString& str) const
792{
793 return ::wxDoCmp(m_impl.data(), m_impl.length(),
794 str.m_impl.data(), str.m_impl.length());
795}
796
797int wxString::compare(size_t nStart, size_t nLen,
798 const wxString& str) const
799{
800 wxASSERT(nStart <= length());
801 size_type strLen = length() - nStart;
802 nLen = strLen < nLen ? strLen : nLen;
803
804 size_t pos, len;
805 PosLenToImpl(nStart, nLen, &pos, &len);
806
807 return ::wxDoCmp(m_impl.data() + pos, len,
808 str.m_impl.data(), str.m_impl.length());
809}
810
811int wxString::compare(size_t nStart, size_t nLen,
812 const wxString& str,
813 size_t nStart2, size_t nLen2) const
814{
815 wxASSERT(nStart <= length());
816 wxASSERT(nStart2 <= str.length());
817 size_type strLen = length() - nStart,
818 strLen2 = str.length() - nStart2;
819 nLen = strLen < nLen ? strLen : nLen;
820 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
821
822 size_t pos, len;
823 PosLenToImpl(nStart, nLen, &pos, &len);
824 size_t pos2, len2;
825 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
826
827 return ::wxDoCmp(m_impl.data() + pos, len,
828 str.m_impl.data() + pos2, len2);
829}
830
831int wxString::compare(const char* sz) const
832{
833 SubstrBufFromMB str(ImplStr(sz, npos));
834 if ( str.len == npos )
835 str.len = wxStringStrlen(str.data);
836 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
837}
838
839int wxString::compare(const wchar_t* sz) const
840{
841 SubstrBufFromWC str(ImplStr(sz, npos));
842 if ( str.len == npos )
843 str.len = wxStringStrlen(str.data);
844 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
845}
846
847int wxString::compare(size_t nStart, size_t nLen,
848 const char* sz, size_t nCount) const
849{
850 wxASSERT(nStart <= length());
851 size_type strLen = length() - nStart;
852 nLen = strLen < nLen ? strLen : nLen;
097c080b 853
a7ea63e2
VS
854 size_t pos, len;
855 PosLenToImpl(nStart, nLen, &pos, &len);
3168a13f 856
a7ea63e2
VS
857 SubstrBufFromMB str(ImplStr(sz, nCount));
858 if ( str.len == npos )
859 str.len = wxStringStrlen(str.data);
860
861 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
c801d85f
KB
862}
863
a7ea63e2
VS
864int wxString::compare(size_t nStart, size_t nLen,
865 const wchar_t* sz, size_t nCount) const
8f93a29f 866{
a7ea63e2
VS
867 wxASSERT(nStart <= length());
868 size_type strLen = length() - nStart;
869 nLen = strLen < nLen ? strLen : nLen;
8f93a29f 870
a7ea63e2
VS
871 size_t pos, len;
872 PosLenToImpl(nStart, nLen, &pos, &len);
8f93a29f 873
a7ea63e2
VS
874 SubstrBufFromWC str(ImplStr(sz, nCount));
875 if ( str.len == npos )
876 str.len = wxStringStrlen(str.data);
877
878 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
8f93a29f
VS
879}
880
a7ea63e2
VS
881#endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
882
883
8f93a29f
VS
884// ---------------------------------------------------------------------------
885// find_{first,last}_[not]_of functions
886// ---------------------------------------------------------------------------
887
888#if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
c801d85f 889
8f93a29f
VS
890// NB: All these functions are implemented with the argument being wxChar*,
891// i.e. widechar string in any Unicode build, even though native string
892// representation is char* in the UTF-8 build. This is because we couldn't
893// use memchr() to determine if a character is in a set encoded as UTF-8.
894
895size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
dcb68102 896{
8f93a29f 897 return find_first_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
898}
899
8f93a29f 900size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
dcb68102 901{
8f93a29f 902 return find_first_not_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
903}
904
8f93a29f 905size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 906{
8f93a29f 907 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
dcb68102 908
8f93a29f
VS
909 size_t idx = nStart;
910 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 911 {
8f93a29f
VS
912 if ( wxTmemchr(sz, *i, n) )
913 return idx;
dcb68102 914 }
8f93a29f
VS
915
916 return npos;
917}
918
919size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
920{
921 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
922
923 size_t idx = nStart;
924 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 925 {
8f93a29f
VS
926 if ( !wxTmemchr(sz, *i, n) )
927 return idx;
928 }
929
930 return npos;
931}
932
933
934size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
935{
936 return find_last_of(sz, nStart, wxStrlen(sz));
937}
938
939size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
940{
941 return find_last_not_of(sz, nStart, wxStrlen(sz));
942}
943
944size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
945{
946 size_t len = length();
947
948 if ( nStart == npos )
949 {
950 nStart = len - 1;
dcb68102 951 }
2c09fb3b 952 else
dcb68102 953 {
8f93a29f 954 wxASSERT_MSG( nStart <= len, _T("invalid index") );
dcb68102 955 }
8f93a29f
VS
956
957 size_t idx = nStart;
958 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
959 i != rend(); --idx, ++i )
960 {
961 if ( wxTmemchr(sz, *i, n) )
962 return idx;
963 }
964
965 return npos;
dcb68102
RN
966}
967
8f93a29f 968size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 969{
8f93a29f
VS
970 size_t len = length();
971
972 if ( nStart == npos )
973 {
974 nStart = len - 1;
975 }
976 else
977 {
978 wxASSERT_MSG( nStart <= len, _T("invalid index") );
979 }
980
981 size_t idx = nStart;
982 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
983 i != rend(); --idx, ++i )
984 {
985 if ( !wxTmemchr(sz, *i, n) )
986 return idx;
987 }
988
989 return npos;
dcb68102
RN
990}
991
8f93a29f 992size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
dcb68102 993{
8f93a29f
VS
994 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
995
996 size_t idx = nStart;
997 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
998 {
999 if ( *i != ch )
1000 return idx;
1001 }
1002
1003 return npos;
1004}
1005
1006size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1007{
1008 size_t len = length();
1009
1010 if ( nStart == npos )
1011 {
1012 nStart = len - 1;
1013 }
1014 else
1015 {
1016 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1017 }
1018
1019 size_t idx = nStart;
1020 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1021 i != rend(); --idx, ++i )
1022 {
1023 if ( *i != ch )
1024 return idx;
1025 }
1026
1027 return npos;
1028}
1029
1030// the functions above were implemented for wchar_t* arguments in Unicode
1031// build and char* in ANSI build; below are implementations for the other
1032// version:
1033#if wxUSE_UNICODE
1034 #define wxOtherCharType char
1035 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1036#else
1037 #define wxOtherCharType wchar_t
1038 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1039#endif
1040
1041size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1042 { return find_first_of(STRCONV(sz), nStart); }
1043
1044size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1045 size_t n) const
1046 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1047size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1048 { return find_last_of(STRCONV(sz), nStart); }
1049size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1050 size_t n) const
1051 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1052size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1053 { return find_first_not_of(STRCONV(sz), nStart); }
1054size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1055 size_t n) const
1056 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1057size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1058 { return find_last_not_of(STRCONV(sz), nStart); }
1059size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1060 size_t n) const
1061 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1062
1063#undef wxOtherCharType
1064#undef STRCONV
1065
1066#endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1067
1068// ===========================================================================
1069// other common string functions
1070// ===========================================================================
1071
1072int wxString::CmpNoCase(const wxString& s) const
1073{
1074 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1075
8f93a29f
VS
1076 const_iterator i1 = begin();
1077 const_iterator end1 = end();
1078 const_iterator i2 = s.begin();
1079 const_iterator end2 = s.end();
1080
0d8b0f94 1081 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
8f93a29f
VS
1082 {
1083 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1084 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1085 if ( lower1 != lower2 )
1086 return lower1 < lower2 ? -1 : 1;
1087 }
1088
1089 size_t len1 = length();
1090 size_t len2 = s.length();
dcb68102 1091
8f93a29f
VS
1092 if ( len1 < len2 )
1093 return -1;
1094 else if ( len1 > len2 )
1095 return 1;
1096 return 0;
dcb68102
RN
1097}
1098
1099
b1ac3b56 1100#if wxUSE_UNICODE
e015c2a3 1101
cf6bedce
SC
1102#ifdef __MWERKS__
1103#ifndef __SCHAR_MAX__
1104#define __SCHAR_MAX__ 127
1105#endif
1106#endif
1107
e6310bbc 1108wxString wxString::FromAscii(const char *ascii, size_t len)
b1ac3b56 1109{
e6310bbc 1110 if (!ascii || len == 0)
b1ac3b56 1111 return wxEmptyString;
e015c2a3 1112
b1ac3b56 1113 wxString res;
e015c2a3 1114
e6310bbc 1115 {
6798451b 1116 wxStringInternalBuffer buf(res, len);
602a857b 1117 wxStringCharType *dest = buf;
c1eada83 1118
602a857b
VS
1119 for ( ; len > 0; --len )
1120 {
1121 unsigned char c = (unsigned char)*ascii++;
1122 wxASSERT_MSG( c < 0x80,
1123 _T("Non-ASCII value passed to FromAscii().") );
c1eada83 1124
602a857b
VS
1125 *dest++ = (wchar_t)c;
1126 }
e015c2a3
VZ
1127 }
1128
b1ac3b56
RR
1129 return res;
1130}
1131
e6310bbc
VS
1132wxString wxString::FromAscii(const char *ascii)
1133{
0081dd72 1134 return FromAscii(ascii, wxStrlen(ascii));
e6310bbc
VS
1135}
1136
c5288c5c 1137wxString wxString::FromAscii(char ascii)
2b5f62a0
VZ
1138{
1139 // What do we do with '\0' ?
1140
c1eada83 1141 unsigned char c = (unsigned char)ascii;
8760bc65 1142
c1eada83
VS
1143 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1144
1145 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1146 return wxString(wxUniChar((wchar_t)c));
2b5f62a0
VZ
1147}
1148
b1ac3b56
RR
1149const wxCharBuffer wxString::ToAscii() const
1150{
e015c2a3
VZ
1151 // this will allocate enough space for the terminating NUL too
1152 wxCharBuffer buffer(length());
6e394fc6 1153 char *dest = buffer.data();
e015c2a3 1154
c1eada83 1155 for ( const_iterator i = begin(); i != end(); ++i )
b1ac3b56 1156 {
c1eada83
VS
1157 wxUniChar c(*i);
1158 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1159 *dest++ = c.IsAscii() ? (char)c : '_';
e015c2a3
VZ
1160
1161 // the output string can't have embedded NULs anyhow, so we can safely
1162 // stop at first of them even if we do have any
c1eada83 1163 if ( !c )
e015c2a3 1164 break;
b1ac3b56 1165 }
e015c2a3 1166
b1ac3b56
RR
1167 return buffer;
1168}
e015c2a3 1169
c1eada83 1170#endif // wxUSE_UNICODE
b1ac3b56 1171
c801d85f 1172// extract string of length nCount starting at nFirst
c801d85f
KB
1173wxString wxString::Mid(size_t nFirst, size_t nCount) const
1174{
73f507f5 1175 size_t nLen = length();
30d9011f 1176
73f507f5
WS
1177 // default value of nCount is npos and means "till the end"
1178 if ( nCount == npos )
1179 {
1180 nCount = nLen - nFirst;
1181 }
30d9011f 1182
73f507f5
WS
1183 // out-of-bounds requests return sensible things
1184 if ( nFirst + nCount > nLen )
1185 {
1186 nCount = nLen - nFirst;
1187 }
c801d85f 1188
73f507f5
WS
1189 if ( nFirst > nLen )
1190 {
1191 // AllocCopy() will return empty string
1192 return wxEmptyString;
1193 }
c801d85f 1194
73f507f5
WS
1195 wxString dest(*this, nFirst, nCount);
1196 if ( dest.length() != nCount )
1197 {
1198 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1199 }
30d9011f 1200
73f507f5 1201 return dest;
c801d85f
KB
1202}
1203
e87b7833 1204// check that the string starts with prefix and return the rest of the string
d775fa82 1205// in the provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1206bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
f6bcfd97 1207{
c5e7a7d7
VS
1208 if ( compare(0, prefix.length(), prefix) != 0 )
1209 return false;
f6bcfd97
BP
1210
1211 if ( rest )
1212 {
1213 // put the rest of the string into provided pointer
c5e7a7d7 1214 rest->assign(*this, prefix.length(), npos);
f6bcfd97
BP
1215 }
1216
d775fa82 1217 return true;
f6bcfd97
BP
1218}
1219
3affcd07
VZ
1220
1221// check that the string ends with suffix and return the rest of it in the
1222// provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1223bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
3affcd07 1224{
c5e7a7d7 1225 int start = length() - suffix.length();
81727065
VS
1226
1227 if ( start < 0 || compare(start, npos, suffix) != 0 )
3affcd07
VZ
1228 return false;
1229
1230 if ( rest )
1231 {
1232 // put the rest of the string into provided pointer
1233 rest->assign(*this, 0, start);
1234 }
1235
1236 return true;
1237}
1238
1239
c801d85f
KB
1240// extract nCount last (rightmost) characters
1241wxString wxString::Right(size_t nCount) const
1242{
e87b7833
MB
1243 if ( nCount > length() )
1244 nCount = length();
c801d85f 1245
e87b7833
MB
1246 wxString dest(*this, length() - nCount, nCount);
1247 if ( dest.length() != nCount ) {
b1801e0e
GD
1248 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1249 }
c801d85f
KB
1250 return dest;
1251}
1252
1253// get all characters after the last occurence of ch
1254// (returns the whole string if ch not found)
c9f78968 1255wxString wxString::AfterLast(wxUniChar ch) const
c801d85f
KB
1256{
1257 wxString str;
d775fa82 1258 int iPos = Find(ch, true);
3c67202d 1259 if ( iPos == wxNOT_FOUND )
c801d85f
KB
1260 str = *this;
1261 else
c9f78968 1262 str = wx_str() + iPos + 1;
c801d85f
KB
1263
1264 return str;
1265}
1266
1267// extract nCount first (leftmost) characters
1268wxString wxString::Left(size_t nCount) const
1269{
e87b7833
MB
1270 if ( nCount > length() )
1271 nCount = length();
c801d85f 1272
e87b7833
MB
1273 wxString dest(*this, 0, nCount);
1274 if ( dest.length() != nCount ) {
b1801e0e
GD
1275 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1276 }
c801d85f
KB
1277 return dest;
1278}
1279
1280// get all characters before the first occurence of ch
1281// (returns the whole string if ch not found)
c9f78968 1282wxString wxString::BeforeFirst(wxUniChar ch) const
c801d85f 1283{
e87b7833
MB
1284 int iPos = Find(ch);
1285 if ( iPos == wxNOT_FOUND ) iPos = length();
1286 return wxString(*this, 0, iPos);
c801d85f
KB
1287}
1288
1289/// get all characters before the last occurence of ch
1290/// (returns empty string if ch not found)
c9f78968 1291wxString wxString::BeforeLast(wxUniChar ch) const
c801d85f
KB
1292{
1293 wxString str;
d775fa82 1294 int iPos = Find(ch, true);
3c67202d 1295 if ( iPos != wxNOT_FOUND && iPos != 0 )
d1c9bbf6 1296 str = wxString(c_str(), iPos);
c801d85f
KB
1297
1298 return str;
1299}
1300
1301/// get all characters after the first occurence of ch
1302/// (returns empty string if ch not found)
c9f78968 1303wxString wxString::AfterFirst(wxUniChar ch) const
c801d85f
KB
1304{
1305 wxString str;
1306 int iPos = Find(ch);
3c67202d 1307 if ( iPos != wxNOT_FOUND )
c9f78968 1308 str = wx_str() + iPos + 1;
c801d85f
KB
1309
1310 return str;
1311}
1312
1313// replace first (or all) occurences of some substring with another one
8a540c88
VS
1314size_t wxString::Replace(const wxString& strOld,
1315 const wxString& strNew, bool bReplaceAll)
c801d85f 1316{
a8f1f1b2 1317 // if we tried to replace an empty string we'd enter an infinite loop below
8a540c88 1318 wxCHECK_MSG( !strOld.empty(), 0,
a8f1f1b2
VZ
1319 _T("wxString::Replace(): invalid parameter") );
1320
68482dc5
VZ
1321 wxSTRING_INVALIDATE_CACHE();
1322
510bb748 1323 size_t uiCount = 0; // count of replacements made
c801d85f 1324
8a627032
VZ
1325 // optimize the special common case: replacement of one character by
1326 // another one (in UTF-8 case we can only do this for ASCII characters)
1327 //
1328 // benchmarks show that this special version is around 3 times faster
1329 // (depending on the proportion of matching characters and UTF-8/wchar_t
1330 // build)
1331 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1332 {
1333 const wxStringCharType chOld = strOld.m_impl[0],
1334 chNew = strNew.m_impl[0];
1335
1336 // this loop is the simplified version of the one below
1337 for ( size_t pos = 0; ; )
1338 {
1339 pos = m_impl.find(chOld, pos);
1340 if ( pos == npos )
1341 break;
c801d85f 1342
8a627032
VZ
1343 m_impl[pos++] = chNew;
1344
1345 uiCount++;
1346
1347 if ( !bReplaceAll )
1348 break;
1349 }
1350 }
1351 else // general case
510bb748 1352 {
8a627032
VZ
1353 const size_t uiOldLen = strOld.m_impl.length();
1354 const size_t uiNewLen = strNew.m_impl.length();
1355
1356 for ( size_t pos = 0; ; )
1357 {
1358 pos = m_impl.find(strOld.m_impl, pos);
1359 if ( pos == npos )
1360 break;
510bb748 1361
8a627032
VZ
1362 // replace this occurrence of the old string with the new one
1363 m_impl.replace(pos, uiOldLen, strNew.m_impl);
510bb748 1364
8a627032
VZ
1365 // move up pos past the string that was replaced
1366 pos += uiNewLen;
ad5bb7d6 1367
8a627032
VZ
1368 // increase replace count
1369 uiCount++;
394b2900 1370
8a627032
VZ
1371 // stop after the first one?
1372 if ( !bReplaceAll )
1373 break;
1374 }
c801d85f 1375 }
c801d85f 1376
510bb748 1377 return uiCount;
c801d85f
KB
1378}
1379
1380bool wxString::IsAscii() const
1381{
a4a44612
VS
1382 for ( const_iterator i = begin(); i != end(); ++i )
1383 {
1384 if ( !(*i).IsAscii() )
1385 return false;
1386 }
1387
1388 return true;
c801d85f 1389}
dd1eaa89 1390
c801d85f
KB
1391bool wxString::IsWord() const
1392{
a4a44612
VS
1393 for ( const_iterator i = begin(); i != end(); ++i )
1394 {
1395 if ( !wxIsalpha(*i) )
1396 return false;
1397 }
1398
1399 return true;
c801d85f 1400}
dd1eaa89 1401
c801d85f
KB
1402bool wxString::IsNumber() const
1403{
a4a44612
VS
1404 if ( empty() )
1405 return true;
1406
1407 const_iterator i = begin();
1408
1409 if ( *i == _T('-') || *i == _T('+') )
1410 ++i;
1411
1412 for ( ; i != end(); ++i )
1413 {
1414 if ( !wxIsdigit(*i) )
1415 return false;
1416 }
1417
1418 return true;
c801d85f
KB
1419}
1420
c801d85f
KB
1421wxString wxString::Strip(stripType w) const
1422{
1423 wxString s = *this;
d775fa82
WS
1424 if ( w & leading ) s.Trim(false);
1425 if ( w & trailing ) s.Trim(true);
c801d85f
KB
1426 return s;
1427}
1428
c801d85f
KB
1429// ---------------------------------------------------------------------------
1430// case conversion
1431// ---------------------------------------------------------------------------
1432
1433wxString& wxString::MakeUpper()
1434{
e87b7833
MB
1435 for ( iterator it = begin(), en = end(); it != en; ++it )
1436 *it = (wxChar)wxToupper(*it);
c801d85f
KB
1437
1438 return *this;
1439}
1440
1441wxString& wxString::MakeLower()
1442{
e87b7833
MB
1443 for ( iterator it = begin(), en = end(); it != en; ++it )
1444 *it = (wxChar)wxTolower(*it);
c801d85f
KB
1445
1446 return *this;
1447}
1448
0c7db140
VZ
1449wxString& wxString::MakeCapitalized()
1450{
1451 const iterator en = end();
1452 iterator it = begin();
1453 if ( it != en )
1454 {
1455 *it = (wxChar)wxToupper(*it);
1456 for ( ++it; it != en; ++it )
1457 *it = (wxChar)wxTolower(*it);
1458 }
1459
1460 return *this;
1461}
1462
c801d85f
KB
1463// ---------------------------------------------------------------------------
1464// trimming and padding
1465// ---------------------------------------------------------------------------
1466
d775fa82 1467// some compilers (VC++ 6.0 not to name them) return true for a call to
9d55bfef 1468// isspace('\xEA') in the C locale which seems to be broken to me, but we have
c95e653c 1469// to live with this by checking that the character is a 7 bit one - even if
9d55bfef 1470// this may fail to detect some spaces (I don't know if Unicode doesn't have
576c608d
VZ
1471// space-like symbols somewhere except in the first 128 chars), it is arguably
1472// still better than trimming away accented letters
1473inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1474
c801d85f
KB
1475// trims spaces (in the sense of isspace) from left or right side
1476wxString& wxString::Trim(bool bFromRight)
1477{
3458e408
WS
1478 // first check if we're going to modify the string at all
1479 if ( !empty() &&
1480 (
1481 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1482 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1483 )
2c3b684c 1484 )
2c3b684c 1485 {
3458e408
WS
1486 if ( bFromRight )
1487 {
1488 // find last non-space character
d4d02bd5 1489 reverse_iterator psz = rbegin();
32c62191 1490 while ( (psz != rend()) && wxSafeIsspace(*psz) )
0d8b0f94 1491 ++psz;
92df97b8 1492
3458e408 1493 // truncate at trailing space start
d4d02bd5 1494 erase(psz.base(), end());
3458e408
WS
1495 }
1496 else
1497 {
1498 // find first non-space character
1499 iterator psz = begin();
32c62191 1500 while ( (psz != end()) && wxSafeIsspace(*psz) )
0d8b0f94 1501 ++psz;
2c3b684c 1502
3458e408
WS
1503 // fix up data and length
1504 erase(begin(), psz);
1505 }
2c3b684c 1506 }
c801d85f 1507
3458e408 1508 return *this;
c801d85f
KB
1509}
1510
1511// adds nCount characters chPad to the string from either side
c9f78968 1512wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
c801d85f 1513{
3458e408 1514 wxString s(chPad, nCount);
c801d85f 1515
3458e408
WS
1516 if ( bFromRight )
1517 *this += s;
1518 else
1519 {
1520 s += *this;
1521 swap(s);
1522 }
c801d85f 1523
3458e408 1524 return *this;
c801d85f
KB
1525}
1526
1527// truncate the string
1528wxString& wxString::Truncate(size_t uiLen)
1529{
3458e408
WS
1530 if ( uiLen < length() )
1531 {
1532 erase(begin() + uiLen, end());
1533 }
1534 //else: nothing to do, string is already short enough
c801d85f 1535
3458e408 1536 return *this;
c801d85f
KB
1537}
1538
1539// ---------------------------------------------------------------------------
3c67202d 1540// finding (return wxNOT_FOUND if not found and index otherwise)
c801d85f
KB
1541// ---------------------------------------------------------------------------
1542
1543// find a character
c9f78968 1544int wxString::Find(wxUniChar ch, bool bFromEnd) const
c801d85f 1545{
3458e408 1546 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
c801d85f 1547
3458e408 1548 return (idx == npos) ? wxNOT_FOUND : (int)idx;
c801d85f
KB
1549}
1550
cd0b1709
VZ
1551// ----------------------------------------------------------------------------
1552// conversion to numbers
1553// ----------------------------------------------------------------------------
1554
52de37c7
VS
1555// The implementation of all the functions below is exactly the same so factor
1556// it out. Note that number extraction works correctly on UTF-8 strings, so
1557// we can use wxStringCharType and wx_str() for maximum efficiency.
122f3c5d 1558
92df97b8 1559#ifndef __WXWINCE__
941a4e62
VS
1560 #define DO_IF_NOT_WINCE(x) x
1561#else
1562 #define DO_IF_NOT_WINCE(x)
92df97b8 1563#endif
4ea4767e 1564
c95e653c
VZ
1565#define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1566 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
941a4e62
VS
1567 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1568 \
1569 DO_IF_NOT_WINCE( errno = 0; ) \
1570 \
1571 const wxStringCharType *start = wx_str(); \
1572 wxStringCharType *end; \
c95e653c 1573 T val = func(start, &end, base); \
941a4e62
VS
1574 \
1575 /* return true only if scan was stopped by the terminating NUL and */ \
1576 /* if the string was not empty to start with and no under/overflow */ \
1577 /* occurred: */ \
c95e653c
VZ
1578 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1579 return false; \
1580 *out = val; \
1581 return true
cd0b1709 1582
c95e653c 1583bool wxString::ToLong(long *pVal, int base) const
cd0b1709 1584{
c95e653c 1585 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
619dcb09 1586}
cd0b1709 1587
c95e653c 1588bool wxString::ToULong(unsigned long *pVal, int base) const
619dcb09 1589{
c95e653c 1590 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
cd0b1709
VZ
1591}
1592
c95e653c 1593bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
d6718dd1 1594{
c95e653c 1595 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
d6718dd1
VZ
1596}
1597
c95e653c 1598bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
d6718dd1 1599{
c95e653c 1600 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
d6718dd1
VZ
1601}
1602
c95e653c 1603bool wxString::ToDouble(double *pVal) const
cd0b1709 1604{
c95e653c 1605 wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
cd0b1709 1606
c95e653c 1607 DO_IF_NOT_WINCE( errno = 0; )
e71e5b37 1608
cd0b1709
VZ
1609 const wxChar *start = c_str();
1610 wxChar *end;
c95e653c 1611 double val = wxStrtod(start, &end);
cd0b1709 1612
d775fa82 1613 // return true only if scan was stopped by the terminating NUL and if the
bda041e5 1614 // string was not empty to start with and no under/overflow occurred
c95e653c
VZ
1615 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1616 return false;
1617
1618 *pVal = val;
1619
1620 return true;
cd0b1709
VZ
1621}
1622
c801d85f 1623// ---------------------------------------------------------------------------
9efd3367 1624// formatted output
c801d85f 1625// ---------------------------------------------------------------------------
378b05f7 1626
d1f6e2cf 1627#if !wxUSE_UTF8_LOCALE_ONLY
341e7d28 1628/* static */
c9f78968 1629#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1528e0b8 1630wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
c9f78968 1631#else
d1f6e2cf 1632wxString wxString::DoFormatWchar(const wxChar *format, ...)
c9f78968 1633#endif
341e7d28 1634{
77c3e48a 1635 va_list argptr;
c9f78968 1636 va_start(argptr, format);
341e7d28 1637
77c3e48a 1638 wxString s;
c9f78968 1639 s.PrintfV(format, argptr);
341e7d28 1640
77c3e48a 1641 va_end(argptr);
341e7d28 1642
77c3e48a 1643 return s;
341e7d28 1644}
d1f6e2cf
VS
1645#endif // !wxUSE_UTF8_LOCALE_ONLY
1646
1647#if wxUSE_UNICODE_UTF8
1648/* static */
1649wxString wxString::DoFormatUtf8(const char *format, ...)
1650{
1651 va_list argptr;
1652 va_start(argptr, format);
1653
1654 wxString s;
1655 s.PrintfV(format, argptr);
1656
1657 va_end(argptr);
1658
1659 return s;
1660}
1661#endif // wxUSE_UNICODE_UTF8
341e7d28
VZ
1662
1663/* static */
c9f78968 1664wxString wxString::FormatV(const wxString& format, va_list argptr)
341e7d28
VZ
1665{
1666 wxString s;
c9f78968 1667 s.PrintfV(format, argptr);
341e7d28
VZ
1668 return s;
1669}
1670
d1f6e2cf 1671#if !wxUSE_UTF8_LOCALE_ONLY
c9f78968 1672#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
d1f6e2cf 1673int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1674#else
d1f6e2cf 1675int wxString::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1676#endif
c801d85f 1677{
ba9bbf13 1678 va_list argptr;
c9f78968 1679 va_start(argptr, format);
c801d85f 1680
c9f78968
VS
1681#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1682 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1683 // because it's the only cast that works safely for downcasting when
1684 // multiple inheritance is used:
1685 wxString *str = static_cast<wxString*>(this);
1686#else
1687 wxString *str = this;
1688#endif
1689
1690 int iLen = str->PrintfV(format, argptr);
c801d85f 1691
ba9bbf13 1692 va_end(argptr);
c801d85f 1693
ba9bbf13 1694 return iLen;
c801d85f 1695}
d1f6e2cf
VS
1696#endif // !wxUSE_UTF8_LOCALE_ONLY
1697
1698#if wxUSE_UNICODE_UTF8
1699int wxString::DoPrintfUtf8(const char *format, ...)
1700{
1701 va_list argptr;
1702 va_start(argptr, format);
1703
1704 int iLen = PrintfV(format, argptr);
1705
1706 va_end(argptr);
1707
1708 return iLen;
1709}
1710#endif // wxUSE_UNICODE_UTF8
c801d85f 1711
67612ff1
DE
1712/*
1713 Uses wxVsnprintf and places the result into the this string.
1714
1715 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1716 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1717 the ISO C99 (and thus SUSv3) standard the return value for the case of
1718 an undersized buffer is inconsistent. For conforming vsnprintf
1719 implementations the function must return the number of characters that
1720 would have been printed had the buffer been large enough. For conforming
1721 vswprintf implementations the function must return a negative number
1722 and set errno.
1723
1724 What vswprintf sets errno to is undefined but Darwin seems to set it to
a9a854d7
DE
1725 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1726 those are defined in the standard and backed up by several conformance
1727 statements. Note that ENOMEM mentioned in the manual page does not
1728 apply to swprintf, only wprintf and fwprintf.
1729
1730 Official manual page:
1731 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1732
1733 Some conformance statements (AIX, Solaris):
1734 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1735 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1736
1737 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1738 EILSEQ and EINVAL are specifically defined to mean the error is other than
1739 an undersized buffer and no other errno are defined we treat those two
1740 as meaning hard errors and everything else gets the old behavior which
1741 is to keep looping and increasing buffer size until the function succeeds.
c95e653c 1742
67612ff1
DE
1743 In practice it's impossible to determine before compilation which behavior
1744 may be used. The vswprintf function may have vsnprintf-like behavior or
1745 vice-versa. Behavior detected on one release can theoretically change
1746 with an updated release. Not to mention that configure testing for it
1747 would require the test to be run on the host system, not the build system
1748 which makes cross compilation difficult. Therefore, we make no assumptions
1749 about behavior and try our best to handle every known case, including the
1750 case where wxVsnprintf returns a negative number and fails to set errno.
1751
1752 There is yet one more non-standard implementation and that is our own.
1753 Fortunately, that can be detected at compile-time.
1754
1755 On top of all that, ISO C99 explicitly defines snprintf to write a null
1756 character to the last position of the specified buffer. That would be at
1757 at the given buffer size minus 1. It is supposed to do this even if it
1758 turns out that the buffer is sized too small.
1759
1760 Darwin (tested on 10.5) follows the C99 behavior exactly.
1761
1762 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1763 errno even when it fails. However, it only seems to ever fail due
1764 to an undersized buffer.
1765*/
2523e9b7
VS
1766#if wxUSE_UNICODE_UTF8
1767template<typename BufferType>
1768#else
1769// we only need one version in non-UTF8 builds and at least two Windows
1770// compilers have problems with this function template, so use just one
1771// normal function here
1772#endif
1773static int DoStringPrintfV(wxString& str,
1774 const wxString& format, va_list argptr)
c801d85f 1775{
f6f5941b 1776 int size = 1024;
e87b7833 1777
f6f5941b
VZ
1778 for ( ;; )
1779 {
2523e9b7
VS
1780#if wxUSE_UNICODE_UTF8
1781 BufferType tmp(str, size + 1);
1782 typename BufferType::CharType *buf = tmp;
1783#else
1784 wxStringBuffer tmp(str, size + 1);
de2589be 1785 wxChar *buf = tmp;
2523e9b7 1786#endif
2bb67b80 1787
ba9bbf13
WS
1788 if ( !buf )
1789 {
1790 // out of memory
a33c7045
VS
1791
1792 // in UTF-8 build, leaving uninitialized junk in the buffer
1793 // could result in invalid non-empty UTF-8 string, so just
1794 // reset the string to empty on failure:
1795 buf[0] = '\0';
ba9bbf13 1796 return -1;
e87b7833 1797 }
f6f5941b 1798
ba9bbf13
WS
1799 // wxVsnprintf() may modify the original arg pointer, so pass it
1800 // only a copy
1801 va_list argptrcopy;
1802 wxVaCopy(argptrcopy, argptr);
67612ff1
DE
1803
1804#ifndef __WXWINCE__
1805 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1806 errno = 0;
1807#endif
2523e9b7 1808 int len = wxVsnprintf(buf, size, format, argptrcopy);
ba9bbf13
WS
1809 va_end(argptrcopy);
1810
1811 // some implementations of vsnprintf() don't NUL terminate
1812 // the string if there is not enough space for it so
1813 // always do it manually
67612ff1
DE
1814 // FIXME: This really seems to be the wrong and would be an off-by-one
1815 // bug except the code above allocates an extra character.
ba9bbf13
WS
1816 buf[size] = _T('\0');
1817
caff62f2
VZ
1818 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1819 // total number of characters which would have been written if the
b1727cfe 1820 // buffer were large enough (newer standards such as Unix98)
de2589be
VZ
1821 if ( len < 0 )
1822 {
52de37c7
VS
1823 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1824 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1825 // is true if *both* of them use our own implementation,
1826 // otherwise we can't be sure
f2bbe5b6
VZ
1827#if wxUSE_WXVSNPRINTF
1828 // we know that our own implementation of wxVsnprintf() returns -1
1829 // only for a format error - thus there's something wrong with
1830 // the user's format string
a33c7045 1831 buf[0] = '\0';
f2bbe5b6 1832 return -1;
52de37c7
VS
1833#else // possibly using system version
1834 // assume it only returns error if there is not enough space, but
1835 // as we don't know how much we need, double the current size of
1836 // the buffer
67612ff1 1837#ifndef __WXWINCE__
a9a854d7
DE
1838 if( (errno == EILSEQ) || (errno == EINVAL) )
1839 // If errno was set to one of the two well-known hard errors
1840 // then fail immediately to avoid an infinite loop.
1841 return -1;
1842 else
1843#endif // __WXWINCE__
67612ff1
DE
1844 // still not enough, as we don't know how much we need, double the
1845 // current size of the buffer
1846 size *= 2;
f2bbe5b6 1847#endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
de2589be 1848 }
64f8f94c 1849 else if ( len >= size )
de2589be 1850 {
f2bbe5b6 1851#if wxUSE_WXVSNPRINTF
c95e653c 1852 // we know that our own implementation of wxVsnprintf() returns
f2bbe5b6
VZ
1853 // size+1 when there's not enough space but that's not the size
1854 // of the required buffer!
1855 size *= 2; // so we just double the current size of the buffer
1856#else
64f8f94c
VZ
1857 // some vsnprintf() implementations NUL-terminate the buffer and
1858 // some don't in len == size case, to be safe always add 1
67612ff1
DE
1859 // FIXME: I don't quite understand this comment. The vsnprintf
1860 // function is specifically defined to return the number of
1861 // characters printed not including the null terminator.
1862 // So OF COURSE you need to add 1 to get the right buffer size.
1863 // The following line is definitely correct, no question.
64f8f94c 1864 size = len + 1;
f2bbe5b6 1865#endif
de2589be
VZ
1866 }
1867 else // ok, there was enough space
f6f5941b 1868 {
f6f5941b
VZ
1869 break;
1870 }
f6f5941b
VZ
1871 }
1872
1873 // we could have overshot
2523e9b7
VS
1874 str.Shrink();
1875
1876 return str.length();
1877}
c801d85f 1878
2523e9b7
VS
1879int wxString::PrintfV(const wxString& format, va_list argptr)
1880{
2523e9b7
VS
1881#if wxUSE_UNICODE_UTF8
1882 #if wxUSE_STL_BASED_WXSTRING
1883 typedef wxStringTypeBuffer<char> Utf8Buffer;
1884 #else
6798451b 1885 typedef wxStringInternalBuffer Utf8Buffer;
2523e9b7
VS
1886 #endif
1887#endif
1888
1889#if wxUSE_UTF8_LOCALE_ONLY
c6255a6e 1890 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
1891#else
1892 #if wxUSE_UNICODE_UTF8
1893 if ( wxLocaleIsUtf8 )
c6255a6e 1894 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
1895 else
1896 // wxChar* version
c6255a6e 1897 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2523e9b7 1898 #else
c6255a6e 1899 return DoStringPrintfV(*this, format, argptr);
2523e9b7
VS
1900 #endif // UTF8/WCHAR
1901#endif
c801d85f
KB
1902}
1903
097c080b
VZ
1904// ----------------------------------------------------------------------------
1905// misc other operations
1906// ----------------------------------------------------------------------------
0c5d3e1c 1907
d775fa82 1908// returns true if the string matches the pattern which may contain '*' and
0c5d3e1c
VZ
1909// '?' metacharacters (as usual, '?' matches any character and '*' any number
1910// of them)
8a540c88 1911bool wxString::Matches(const wxString& mask) const
097c080b 1912{
d6044f58
VZ
1913 // I disable this code as it doesn't seem to be faster (in fact, it seems
1914 // to be much slower) than the old, hand-written code below and using it
1915 // here requires always linking with libregex even if the user code doesn't
1916 // use it
1917#if 0 // wxUSE_REGEX
706c2ac9
VZ
1918 // first translate the shell-like mask into a regex
1919 wxString pattern;
1920 pattern.reserve(wxStrlen(pszMask));
1921
1922 pattern += _T('^');
1923 while ( *pszMask )
1924 {
1925 switch ( *pszMask )
1926 {
1927 case _T('?'):
1928 pattern += _T('.');
1929 break;
1930
1931 case _T('*'):
1932 pattern += _T(".*");
1933 break;
1934
1935 case _T('^'):
1936 case _T('.'):
1937 case _T('$'):
1938 case _T('('):
1939 case _T(')'):
1940 case _T('|'):
1941 case _T('+'):
1942 case _T('\\'):
1943 // these characters are special in a RE, quote them
1944 // (however note that we don't quote '[' and ']' to allow
1945 // using them for Unix shell like matching)
1946 pattern += _T('\\');
1947 // fall through
1948
1949 default:
1950 pattern += *pszMask;
1951 }
1952
1953 pszMask++;
1954 }
1955 pattern += _T('$');
1956
1957 // and now use it
1958 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1959#else // !wxUSE_REGEX
9a4232dc
VZ
1960 // TODO: this is, of course, awfully inefficient...
1961
8a540c88
VS
1962 // FIXME-UTF8: implement using iterators, remove #if
1963#if wxUSE_UNICODE_UTF8
1964 wxWCharBuffer maskBuf = mask.wc_str();
1965 wxWCharBuffer txtBuf = wc_str();
1966 const wxChar *pszMask = maskBuf.data();
1967 const wxChar *pszTxt = txtBuf.data();
1968#else
1969 const wxChar *pszMask = mask.wx_str();
9a4232dc 1970 // the char currently being checked
8a540c88
VS
1971 const wxChar *pszTxt = wx_str();
1972#endif
9a4232dc
VZ
1973
1974 // the last location where '*' matched
1975 const wxChar *pszLastStarInText = NULL;
1976 const wxChar *pszLastStarInMask = NULL;
1977
1978match:
1979 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
097c080b 1980 switch ( *pszMask ) {
223d09f6
KB
1981 case wxT('?'):
1982 if ( *pszTxt == wxT('\0') )
d775fa82 1983 return false;
097c080b 1984
9a4232dc 1985 // pszTxt and pszMask will be incremented in the loop statement
0c5d3e1c 1986
097c080b
VZ
1987 break;
1988
223d09f6 1989 case wxT('*'):
097c080b 1990 {
9a4232dc
VZ
1991 // remember where we started to be able to backtrack later
1992 pszLastStarInText = pszTxt;
1993 pszLastStarInMask = pszMask;
1994
097c080b 1995 // ignore special chars immediately following this one
9a4232dc 1996 // (should this be an error?)
223d09f6 1997 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
097c080b
VZ
1998 pszMask++;
1999
2000 // if there is nothing more, match
223d09f6 2001 if ( *pszMask == wxT('\0') )
d775fa82 2002 return true;
097c080b
VZ
2003
2004 // are there any other metacharacters in the mask?
c86f1403 2005 size_t uiLenMask;
223d09f6 2006 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
097c080b
VZ
2007
2008 if ( pEndMask != NULL ) {
2009 // we have to match the string between two metachars
2010 uiLenMask = pEndMask - pszMask;
2011 }
2012 else {
2013 // we have to match the remainder of the string
2bb67b80 2014 uiLenMask = wxStrlen(pszMask);
097c080b
VZ
2015 }
2016
2017 wxString strToMatch(pszMask, uiLenMask);
2bb67b80 2018 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
097c080b 2019 if ( pMatch == NULL )
d775fa82 2020 return false;
097c080b
VZ
2021
2022 // -1 to compensate "++" in the loop
2023 pszTxt = pMatch + uiLenMask - 1;
2024 pszMask += uiLenMask - 1;
2025 }
2026 break;
2027
2028 default:
2029 if ( *pszMask != *pszTxt )
d775fa82 2030 return false;
097c080b
VZ
2031 break;
2032 }
2033 }
2034
2035 // match only if nothing left
9a4232dc 2036 if ( *pszTxt == wxT('\0') )
d775fa82 2037 return true;
9a4232dc
VZ
2038
2039 // if we failed to match, backtrack if we can
2040 if ( pszLastStarInText ) {
2041 pszTxt = pszLastStarInText + 1;
2042 pszMask = pszLastStarInMask;
2043
2044 pszLastStarInText = NULL;
2045
2046 // don't bother resetting pszLastStarInMask, it's unnecessary
2047
2048 goto match;
2049 }
2050
d775fa82 2051 return false;
706c2ac9 2052#endif // wxUSE_REGEX/!wxUSE_REGEX
097c080b
VZ
2053}
2054
1fc5dd6f 2055// Count the number of chars
c9f78968 2056int wxString::Freq(wxUniChar ch) const
1fc5dd6f
JS
2057{
2058 int count = 0;
8f93a29f 2059 for ( const_iterator i = begin(); i != end(); ++i )
1fc5dd6f 2060 {
8f93a29f 2061 if ( *i == ch )
1fc5dd6f
JS
2062 count ++;
2063 }
2064 return count;
2065}
2066
628f87da
VS
2067// ----------------------------------------------------------------------------
2068// wxUTF8StringBuffer
2069// ----------------------------------------------------------------------------
2070
7d46f92b 2071#if wxUSE_UNICODE_WCHAR
628f87da
VS
2072wxUTF8StringBuffer::~wxUTF8StringBuffer()
2073{
2074 wxMBConvStrictUTF8 conv;
2075 size_t wlen = conv.ToWChar(NULL, 0, m_buf);
2076 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2077
2078 wxStringInternalBuffer wbuf(m_str, wlen);
2079 conv.ToWChar(wbuf, wlen, m_buf);
2080}
2081
2082wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2083{
2084 wxCHECK_RET(m_lenSet, "length not set");
2085
2086 wxMBConvStrictUTF8 conv;
2087 size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
2088 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2089
2090 wxStringInternalBufferLength wbuf(m_str, wlen);
2091 conv.ToWChar(wbuf, wlen, m_buf, m_len);
2092 wbuf.SetLength(wlen);
2093}
7d46f92b 2094#endif // wxUSE_UNICODE_WCHAR
5c1de526
VS
2095
2096// ----------------------------------------------------------------------------
2097// wxCharBufferType<T>
2098// ----------------------------------------------------------------------------
2099
2100template<>
2101wxCharTypeBuffer<char>::Data
2102wxCharTypeBuffer<char>::NullData(NULL);
2103
2104template<>
2105wxCharTypeBuffer<wchar_t>::Data
2106wxCharTypeBuffer<wchar_t>::NullData(NULL);