]> git.saurik.com Git - wxWidgets.git/blame - src/common/string.cpp
use wxScopeGuard instead of an explicit try/catch(...)/throw: this suppresses wxUSE_E...
[wxWidgets.git] / src / common / string.cpp
CommitLineData
c801d85f 1/////////////////////////////////////////////////////////////////////////////
8898456d 2// Name: src/common/string.cpp
c801d85f 3// Purpose: wxString class
59059feb 4// Author: Vadim Zeitlin, Ryan Norton
c801d85f
KB
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
59059feb 9// (c) 2004 Ryan Norton <wxprojects@comcast.net>
65571936 10// Licence: wxWindows licence
c801d85f
KB
11/////////////////////////////////////////////////////////////////////////////
12
c801d85f
KB
13// ===========================================================================
14// headers, declarations, constants
15// ===========================================================================
16
17// For compilers that support precompilation, includes "wx.h".
18#include "wx/wxprec.h"
19
20#ifdef __BORLANDC__
8898456d 21 #pragma hdrstop
c801d85f
KB
22#endif
23
24#ifndef WX_PRECOMP
8898456d 25 #include "wx/string.h"
2523e9b7 26 #include "wx/wxcrtvararg.h"
6b769f3d 27#endif
c801d85f
KB
28
29#include <ctype.h>
92df97b8
WS
30
31#ifndef __WXWINCE__
32 #include <errno.h>
33#endif
34
c801d85f
KB
35#include <string.h>
36#include <stdlib.h>
9a08c20e 37
8116a0c5 38#include "wx/hashmap.h"
8f93a29f
VS
39
40// string handling functions used by wxString:
41#if wxUSE_UNICODE_UTF8
42 #define wxStringMemcpy memcpy
43 #define wxStringMemcmp memcmp
44 #define wxStringMemchr memchr
45 #define wxStringStrlen strlen
46#else
47 #define wxStringMemcpy wxTmemcpy
48 #define wxStringMemcmp wxTmemcmp
a7ea63e2
VS
49 #define wxStringMemchr wxTmemchr
50 #define wxStringStrlen wxStrlen
51#endif
8f93a29f 52
e87b7833 53
a7ea63e2
VS
54// ---------------------------------------------------------------------------
55// static class variables definition
56// ---------------------------------------------------------------------------
e87b7833 57
a7ea63e2
VS
58//According to STL _must_ be a -1 size_t
59const size_t wxString::npos = (size_t) -1;
8f93a29f 60
68482dc5 61#if wxUSE_STRING_POS_CACHE
68482dc5 62
e810df36
VZ
63#ifdef wxHAS_COMPILER_TLS
64
65wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
66
67#else // !wxHAS_COMPILER_TLS
68
ad8ae788
VZ
69struct wxStrCacheInitializer
70{
71 wxStrCacheInitializer()
72 {
73 // calling this function triggers s_cache initialization in it, and
74 // from now on it becomes safe to call from multiple threads
75 wxString::GetCache();
76 }
77};
78
e317bd3f
SC
79/*
80wxString::Cache& wxString::GetCache()
81{
82 static wxTLS_TYPE(Cache) s_cache;
83
84 return wxTLS_VALUE(s_cache);
85}
86*/
87
ad8ae788
VZ
88static wxStrCacheInitializer gs_stringCacheInit;
89
e810df36
VZ
90#endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
91
68482dc5
VZ
92// gdb seems to be unable to display thread-local variables correctly, at least
93// not my 6.4.98 version under amd64, so provide this debugging helper to do it
94#ifdef __WXDEBUG__
95
96struct wxStrCacheDumper
97{
98 static void ShowAll()
99 {
100 puts("*** wxString cache dump:");
101 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
102 {
103 const wxString::Cache::Element&
8b73c531 104 c = wxString::GetCacheBegin()[n];
68482dc5
VZ
105
106 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
107 n,
8b73c531 108 n == wxString::LastUsedCacheElement() ? " [*]" : "",
68482dc5
VZ
109 c.str,
110 (unsigned long)c.pos,
111 (unsigned long)c.impl,
112 (long)c.len);
113 }
114 }
115};
116
117void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
118
119#endif // __WXDEBUG__
120
121#ifdef wxPROFILE_STRING_CACHE
122
123wxString::CacheStats wxString::ms_cacheStats;
124
8c3b65d9 125struct wxStrCacheStatsDumper
68482dc5 126{
8c3b65d9 127 ~wxStrCacheStatsDumper()
68482dc5
VZ
128 {
129 const wxString::CacheStats& stats = wxString::ms_cacheStats;
130
131 if ( stats.postot )
132 {
133 puts("*** wxString cache statistics:");
134 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
135 stats.postot);
136 printf("\tHits %u (of which %u not used) or %.2f%%\n",
137 stats.poshits,
138 stats.mishits,
139 100.*float(stats.poshits - stats.mishits)/stats.postot);
140 printf("\tAverage position requested: %.2f\n",
141 float(stats.sumpos) / stats.postot);
142 printf("\tAverage offset after cached hint: %.2f\n",
143 float(stats.sumofs) / stats.postot);
144 }
145
146 if ( stats.lentot )
147 {
148 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
149 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
150 }
151 }
8c3b65d9 152};
68482dc5 153
8c3b65d9 154static wxStrCacheStatsDumper s_showCacheStats;
68482dc5
VZ
155
156#endif // wxPROFILE_STRING_CACHE
157
158#endif // wxUSE_STRING_POS_CACHE
159
a7ea63e2
VS
160// ----------------------------------------------------------------------------
161// global functions
162// ----------------------------------------------------------------------------
e87b7833 163
a7ea63e2 164#if wxUSE_STD_IOSTREAM
8f93a29f 165
a7ea63e2 166#include <iostream>
8f93a29f 167
a7ea63e2 168wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
8f93a29f 169{
7a906e1a
VZ
170#if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
171 return os << (const char *)str.AsCharBuf();
a7ea63e2 172#else
7a906e1a 173 return os << str.AsInternal();
a7ea63e2 174#endif
8f93a29f
VS
175}
176
04abe4bc
VS
177wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
178{
179 return os << str.c_str();
180}
181
182wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
183{
184 return os << str.data();
185}
186
187#ifndef __BORLANDC__
188wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
189{
190 return os << str.data();
191}
192#endif
193
6a6ea041 194#if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594
VZ
195
196wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
197{
198 return wos << str.wc_str();
199}
200
201wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
202{
203 return wos << str.AsWChar();
204}
205
206wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
207{
208 return wos << str.data();
209}
210
6a6ea041 211#endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594 212
a7ea63e2 213#endif // wxUSE_STD_IOSTREAM
e87b7833 214
81727065
VS
215// ===========================================================================
216// wxString class core
217// ===========================================================================
218
219#if wxUSE_UNICODE_UTF8
220
81727065
VS
221void wxString::PosLenToImpl(size_t pos, size_t len,
222 size_t *implPos, size_t *implLen) const
223{
224 if ( pos == npos )
68482dc5 225 {
81727065 226 *implPos = npos;
68482dc5
VZ
227 }
228 else // have valid start position
81727065 229 {
68482dc5
VZ
230 const const_iterator b = GetIterForNthChar(pos);
231 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
81727065 232 if ( len == npos )
68482dc5 233 {
81727065 234 *implLen = npos;
68482dc5
VZ
235 }
236 else // have valid length too
81727065 237 {
68482dc5
VZ
238 // we need to handle the case of length specifying a substring
239 // going beyond the end of the string, just as std::string does
240 const const_iterator e(end());
241 const_iterator i(b);
242 while ( len && i <= e )
243 {
244 ++i;
245 --len;
246 }
247
248 *implLen = i.impl() - b.impl();
81727065
VS
249 }
250 }
251}
252
253#endif // wxUSE_UNICODE_UTF8
254
11aac4ba
VS
255// ----------------------------------------------------------------------------
256// wxCStrData converted strings caching
257// ----------------------------------------------------------------------------
258
132276cf
VS
259// FIXME-UTF8: temporarily disabled because it doesn't work with global
260// string objects; re-enable after fixing this bug and benchmarking
261// performance to see if using a hash is a good idea at all
262#if 0
263
11aac4ba
VS
264// For backward compatibility reasons, it must be possible to assign the value
265// returned by wxString::c_str() to a char* or wchar_t* variable and work with
266// it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
267// because the memory would be freed immediately, but it has to be valid as long
268// as the string is not modified, so that code like this still works:
269//
270// const wxChar *s = str.c_str();
271// while ( s ) { ... }
272
273// FIXME-UTF8: not thread safe!
274// FIXME-UTF8: we currently clear the cached conversion only when the string is
275// destroyed, but we should do it when the string is modified, to
276// keep memory usage down
277// FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
278// invalidated the cache on every change, we could keep the previous
279// conversion
280// FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
281// to use mb_str() or wc_str() instead of (const [w]char*)c_str()
282
283template<typename T>
284static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
285{
6c4ebcda 286 typename T::iterator i = hash.find(wxConstCast(s, wxString));
11aac4ba
VS
287 if ( i != hash.end() )
288 {
289 free(i->second);
290 hash.erase(i);
291 }
292}
293
294#if wxUSE_UNICODE
6c4ebcda
VS
295// NB: non-STL implementation doesn't compile with "const wxString*" key type,
296// so we have to use wxString* here and const-cast when used
11aac4ba
VS
297WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
298 wxStringCharConversionCache);
299static wxStringCharConversionCache gs_stringsCharCache;
300
301const char* wxCStrData::AsChar() const
302{
303 // remove previously cache value, if any (see FIXMEs above):
304 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
305
306 // convert the string and keep it:
6c4ebcda
VS
307 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
308 m_str->mb_str().release();
11aac4ba
VS
309
310 return s + m_offset;
311}
312#endif // wxUSE_UNICODE
313
314#if !wxUSE_UNICODE_WCHAR
315WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
316 wxStringWCharConversionCache);
317static wxStringWCharConversionCache gs_stringsWCharCache;
318
319const wchar_t* wxCStrData::AsWChar() const
320{
321 // remove previously cache value, if any (see FIXMEs above):
322 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
323
324 // convert the string and keep it:
6c4ebcda
VS
325 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
326 m_str->wc_str().release();
11aac4ba
VS
327
328 return s + m_offset;
329}
330#endif // !wxUSE_UNICODE_WCHAR
331
11aac4ba
VS
332wxString::~wxString()
333{
334#if wxUSE_UNICODE
335 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
336 DeleteStringFromConversionCache(gs_stringsCharCache, this);
337#endif
338#if !wxUSE_UNICODE_WCHAR
339 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
340#endif
341}
132276cf
VS
342#endif
343
111d9948 344#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
132276cf
VS
345const char* wxCStrData::AsChar() const
346{
111d9948
VS
347#if wxUSE_UNICODE_UTF8
348 if ( wxLocaleIsUtf8 )
349 return AsInternal();
350#endif
351 // under non-UTF8 locales, we have to convert the internal UTF-8
352 // representation using wxConvLibc and cache the result
353
132276cf 354 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
355
356 // convert the string:
2a7431e1
VZ
357 //
358 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
359 // have it) but it's unfortunately not obvious to implement
360 // because we don't know how big buffer do we need for the
361 // given string length (in case of multibyte encodings, e.g.
362 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
363 //
364 // One idea would be to store more than just m_convertedToChar
365 // in wxString: then we could record the length of the string
366 // which was converted the last time and try to reuse the same
367 // buffer if the current length is not greater than it (this
368 // could still fail because string could have been modified in
369 // place but it would work most of the time, so we'd do it and
370 // only allocate the new buffer if in-place conversion returned
371 // an error). We could also store a bit saying if the string
372 // was modified since the last conversion (and update it in all
373 // operation modifying the string, of course) to avoid unneeded
374 // consequential conversions. But both of these ideas require
375 // adding more fields to wxString and require profiling results
376 // to be sure that we really gain enough from them to justify
377 // doing it.
05f32fc3
VS
378 wxCharBuffer buf(str->mb_str());
379
28be59b4
VZ
380 // if it failed, return empty string and not NULL to avoid crashes in code
381 // written with either wxWidgets 2 wxString or std::string behaviour in
382 // mind: neither of them ever returns NULL and so we shouldn't neither
383 if ( !buf )
384 return "";
385
05f32fc3
VS
386 if ( str->m_convertedToChar &&
387 strlen(buf) == strlen(str->m_convertedToChar) )
388 {
389 // keep the same buffer for as long as possible, so that several calls
390 // to c_str() in a row still work:
391 strcpy(str->m_convertedToChar, buf);
392 }
393 else
394 {
395 str->m_convertedToChar = buf.release();
396 }
397
398 // and keep it:
132276cf
VS
399 return str->m_convertedToChar + m_offset;
400}
111d9948 401#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
132276cf
VS
402
403#if !wxUSE_UNICODE_WCHAR
404const wchar_t* wxCStrData::AsWChar() const
405{
406 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
407
408 // convert the string:
409 wxWCharBuffer buf(str->wc_str());
410
28be59b4
VZ
411 // notice that here, unlike above in AsChar(), conversion can't fail as our
412 // internal UTF-8 is always well-formed -- or the string was corrupted and
413 // all bets are off anyhow
414
05f32fc3
VS
415 // FIXME-UTF8: do the conversion in-place in the existing buffer
416 if ( str->m_convertedToWChar &&
417 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
418 {
419 // keep the same buffer for as long as possible, so that several calls
420 // to c_str() in a row still work:
421 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
422 }
423 else
424 {
425 str->m_convertedToWChar = buf.release();
426 }
427
428 // and keep it:
132276cf
VS
429 return str->m_convertedToWChar + m_offset;
430}
431#endif // !wxUSE_UNICODE_WCHAR
432
433// ===========================================================================
434// wxString class core
435// ===========================================================================
436
437// ---------------------------------------------------------------------------
438// construction and conversion
439// ---------------------------------------------------------------------------
11aac4ba 440
81727065 441#if wxUSE_UNICODE_WCHAR
8f93a29f
VS
442/* static */
443wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
04abe4bc 444 const wxMBConv& conv)
8f93a29f
VS
445{
446 // anything to do?
447 if ( !psz || nLength == 0 )
81727065 448 return SubstrBufFromMB(L"", 0);
8f93a29f
VS
449
450 if ( nLength == npos )
451 nLength = wxNO_LEN;
452
453 size_t wcLen;
454 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
455 if ( !wcLen )
81727065 456 return SubstrBufFromMB(_T(""), 0);
8f93a29f
VS
457 else
458 return SubstrBufFromMB(wcBuf, wcLen);
459}
81727065
VS
460#endif // wxUSE_UNICODE_WCHAR
461
462#if wxUSE_UNICODE_UTF8
463/* static */
464wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
465 const wxMBConv& conv)
466{
81727065
VS
467 // anything to do?
468 if ( !psz || nLength == 0 )
469 return SubstrBufFromMB("", 0);
470
111d9948
VS
471 // if psz is already in UTF-8, we don't have to do the roundtrip to
472 // wchar_t* and back:
473 if ( conv.IsUTF8() )
474 {
475 // we need to validate the input because UTF8 iterators assume valid
476 // UTF-8 sequence and psz may be invalid:
477 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
478 {
9ef1ad0d
VZ
479 // we must pass the real string length to SubstrBufFromMB ctor
480 if ( nLength == npos )
481 nLength = psz ? strlen(psz) : 0;
111d9948
VS
482 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
483 }
484 // else: do the roundtrip through wchar_t*
485 }
486
81727065
VS
487 if ( nLength == npos )
488 nLength = wxNO_LEN;
489
490 // first convert to wide string:
491 size_t wcLen;
492 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
493 if ( !wcLen )
494 return SubstrBufFromMB("", 0);
495
496 // and then to UTF-8:
4fdfe2f3 497 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
81727065
VS
498 // widechar -> UTF-8 conversion isn't supposed to ever fail:
499 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
500
501 return buf;
502}
503#endif // wxUSE_UNICODE_UTF8
504
505#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
506/* static */
507wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
04abe4bc 508 const wxMBConv& conv)
8f93a29f
VS
509{
510 // anything to do?
511 if ( !pwz || nLength == 0 )
81727065 512 return SubstrBufFromWC("", 0);
8f93a29f
VS
513
514 if ( nLength == npos )
515 nLength = wxNO_LEN;
516
517 size_t mbLen;
518 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
519 if ( !mbLen )
81727065 520 return SubstrBufFromWC("", 0);
8f93a29f
VS
521 else
522 return SubstrBufFromWC(mbBuf, mbLen);
523}
81727065 524#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
525
526
81727065 527#if wxUSE_UNICODE_WCHAR
e87b7833 528
06386448 529//Convert wxString in Unicode mode to a multi-byte string
830f8f11 530const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
265d5cce 531{
81727065 532 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
e87b7833
MB
533}
534
81727065 535#elif wxUSE_UNICODE_UTF8
e87b7833 536
81727065
VS
537const wxWCharBuffer wxString::wc_str() const
538{
4fdfe2f3
VZ
539 return wxMBConvStrictUTF8().cMB2WC
540 (
541 m_impl.c_str(),
542 m_impl.length() + 1, // size, not length
543 NULL
544 );
81727065
VS
545}
546
547const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
548{
111d9948
VS
549 if ( conv.IsUTF8() )
550 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
551
81727065
VS
552 // FIXME-UTF8: use wc_str() here once we have buffers with length
553
554 size_t wcLen;
4fdfe2f3
VZ
555 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
556 (
557 m_impl.c_str(),
558 m_impl.length() + 1, // size
559 &wcLen
560 ));
81727065
VS
561 if ( !wcLen )
562 return wxCharBuffer("");
563
4f696af8 564 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
81727065
VS
565}
566
567#else // ANSI
eec47cc6 568
7663d0d4 569//Converts this string to a wide character string if unicode
06386448 570//mode is not enabled and wxUSE_WCHAR_T is enabled
830f8f11 571const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
265d5cce 572{
81727065 573 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
265d5cce 574}
7663d0d4 575
e87b7833
MB
576#endif // Unicode/ANSI
577
578// shrink to minimal size (releasing extra memory)
579bool wxString::Shrink()
580{
581 wxString tmp(begin(), end());
582 swap(tmp);
583 return tmp.length() == length();
584}
585
d8a4b666 586// deprecated compatibility code:
a7ea63e2 587#if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
c87a0bc8 588wxStringCharType *wxString::GetWriteBuf(size_t nLen)
d8a4b666
VS
589{
590 return DoGetWriteBuf(nLen);
591}
592
593void wxString::UngetWriteBuf()
594{
595 DoUngetWriteBuf();
596}
597
598void wxString::UngetWriteBuf(size_t nLen)
599{
600 DoUngetWriteBuf(nLen);
601}
a7ea63e2 602#endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
e87b7833 603
d8a4b666 604
e87b7833
MB
605// ---------------------------------------------------------------------------
606// data access
607// ---------------------------------------------------------------------------
608
609// all functions are inline in string.h
610
611// ---------------------------------------------------------------------------
e8f59039 612// concatenation operators
e87b7833
MB
613// ---------------------------------------------------------------------------
614
c801d85f 615/*
c801d85f
KB
616 * concatenation functions come in 5 flavours:
617 * string + string
618 * char + string and string + char
619 * C str + string and string + C str
620 */
621
b1801e0e 622wxString operator+(const wxString& str1, const wxString& str2)
c801d85f 623{
992527a5 624#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
625 wxASSERT( str1.IsValid() );
626 wxASSERT( str2.IsValid() );
e87b7833 627#endif
097c080b 628
3458e408
WS
629 wxString s = str1;
630 s += str2;
3168a13f 631
3458e408 632 return s;
c801d85f
KB
633}
634
c9f78968 635wxString operator+(const wxString& str, wxUniChar ch)
c801d85f 636{
992527a5 637#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 638 wxASSERT( str.IsValid() );
e87b7833 639#endif
3168a13f 640
3458e408
WS
641 wxString s = str;
642 s += ch;
097c080b 643
3458e408 644 return s;
c801d85f
KB
645}
646
c9f78968 647wxString operator+(wxUniChar ch, const wxString& str)
c801d85f 648{
992527a5 649#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 650 wxASSERT( str.IsValid() );
e87b7833 651#endif
097c080b 652
3458e408
WS
653 wxString s = ch;
654 s += str;
3168a13f 655
3458e408 656 return s;
c801d85f
KB
657}
658
8f93a29f 659wxString operator+(const wxString& str, const char *psz)
c801d85f 660{
992527a5 661#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 662 wxASSERT( str.IsValid() );
e87b7833 663#endif
097c080b 664
3458e408 665 wxString s;
8f93a29f 666 if ( !s.Alloc(strlen(psz) + str.length()) ) {
3458e408
WS
667 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
668 }
669 s += str;
670 s += psz;
3168a13f 671
3458e408 672 return s;
c801d85f
KB
673}
674
8f93a29f 675wxString operator+(const wxString& str, const wchar_t *pwz)
c801d85f 676{
992527a5 677#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
678 wxASSERT( str.IsValid() );
679#endif
680
681 wxString s;
682 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
683 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
684 }
685 s += str;
686 s += pwz;
687
688 return s;
689}
690
691wxString operator+(const char *psz, const wxString& str)
692{
a7ea63e2
VS
693#if !wxUSE_STL_BASED_WXSTRING
694 wxASSERT( str.IsValid() );
695#endif
696
697 wxString s;
698 if ( !s.Alloc(strlen(psz) + str.length()) ) {
699 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
700 }
701 s = psz;
702 s += str;
703
704 return s;
705}
706
707wxString operator+(const wchar_t *pwz, const wxString& str)
708{
709#if !wxUSE_STL_BASED_WXSTRING
710 wxASSERT( str.IsValid() );
711#endif
712
713 wxString s;
714 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
715 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
716 }
717 s = pwz;
718 s += str;
719
720 return s;
721}
722
723// ---------------------------------------------------------------------------
724// string comparison
725// ---------------------------------------------------------------------------
726
52de37c7
VS
727bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
728{
729 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
730 : wxToupper(GetChar(0u)) == wxToupper(c));
731}
732
a7ea63e2
VS
733#ifdef HAVE_STD_STRING_COMPARE
734
735// NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
736// UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
737// sort strings in characters code point order by sorting the byte sequence
738// in byte values order (i.e. what strcmp() and memcmp() do).
739
740int wxString::compare(const wxString& str) const
741{
742 return m_impl.compare(str.m_impl);
743}
744
745int wxString::compare(size_t nStart, size_t nLen,
746 const wxString& str) const
747{
748 size_t pos, len;
749 PosLenToImpl(nStart, nLen, &pos, &len);
750 return m_impl.compare(pos, len, str.m_impl);
751}
752
753int wxString::compare(size_t nStart, size_t nLen,
754 const wxString& str,
755 size_t nStart2, size_t nLen2) const
756{
757 size_t pos, len;
758 PosLenToImpl(nStart, nLen, &pos, &len);
759
760 size_t pos2, len2;
761 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
762
763 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
764}
765
766int wxString::compare(const char* sz) const
767{
768 return m_impl.compare(ImplStr(sz));
769}
770
771int wxString::compare(const wchar_t* sz) const
772{
773 return m_impl.compare(ImplStr(sz));
774}
775
776int wxString::compare(size_t nStart, size_t nLen,
777 const char* sz, size_t nCount) const
778{
779 size_t pos, len;
780 PosLenToImpl(nStart, nLen, &pos, &len);
781
782 SubstrBufFromMB str(ImplStr(sz, nCount));
783
784 return m_impl.compare(pos, len, str.data, str.len);
785}
786
787int wxString::compare(size_t nStart, size_t nLen,
788 const wchar_t* sz, size_t nCount) const
789{
790 size_t pos, len;
791 PosLenToImpl(nStart, nLen, &pos, &len);
792
793 SubstrBufFromWC str(ImplStr(sz, nCount));
794
795 return m_impl.compare(pos, len, str.data, str.len);
796}
797
798#else // !HAVE_STD_STRING_COMPARE
799
800static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
801 const wxStringCharType* s2, size_t l2)
802{
803 if( l1 == l2 )
804 return wxStringMemcmp(s1, s2, l1);
805 else if( l1 < l2 )
806 {
807 int ret = wxStringMemcmp(s1, s2, l1);
808 return ret == 0 ? -1 : ret;
809 }
810 else
811 {
812 int ret = wxStringMemcmp(s1, s2, l2);
813 return ret == 0 ? +1 : ret;
814 }
815}
816
817int wxString::compare(const wxString& str) const
818{
819 return ::wxDoCmp(m_impl.data(), m_impl.length(),
820 str.m_impl.data(), str.m_impl.length());
821}
822
823int wxString::compare(size_t nStart, size_t nLen,
824 const wxString& str) const
825{
826 wxASSERT(nStart <= length());
827 size_type strLen = length() - nStart;
828 nLen = strLen < nLen ? strLen : nLen;
829
830 size_t pos, len;
831 PosLenToImpl(nStart, nLen, &pos, &len);
832
833 return ::wxDoCmp(m_impl.data() + pos, len,
834 str.m_impl.data(), str.m_impl.length());
835}
836
837int wxString::compare(size_t nStart, size_t nLen,
838 const wxString& str,
839 size_t nStart2, size_t nLen2) const
840{
841 wxASSERT(nStart <= length());
842 wxASSERT(nStart2 <= str.length());
843 size_type strLen = length() - nStart,
844 strLen2 = str.length() - nStart2;
845 nLen = strLen < nLen ? strLen : nLen;
846 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
847
848 size_t pos, len;
849 PosLenToImpl(nStart, nLen, &pos, &len);
850 size_t pos2, len2;
851 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
852
853 return ::wxDoCmp(m_impl.data() + pos, len,
854 str.m_impl.data() + pos2, len2);
855}
856
857int wxString::compare(const char* sz) const
858{
859 SubstrBufFromMB str(ImplStr(sz, npos));
860 if ( str.len == npos )
861 str.len = wxStringStrlen(str.data);
862 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
863}
864
865int wxString::compare(const wchar_t* sz) const
866{
867 SubstrBufFromWC str(ImplStr(sz, npos));
868 if ( str.len == npos )
869 str.len = wxStringStrlen(str.data);
870 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
871}
872
873int wxString::compare(size_t nStart, size_t nLen,
874 const char* sz, size_t nCount) const
875{
876 wxASSERT(nStart <= length());
877 size_type strLen = length() - nStart;
878 nLen = strLen < nLen ? strLen : nLen;
097c080b 879
a7ea63e2
VS
880 size_t pos, len;
881 PosLenToImpl(nStart, nLen, &pos, &len);
3168a13f 882
a7ea63e2
VS
883 SubstrBufFromMB str(ImplStr(sz, nCount));
884 if ( str.len == npos )
885 str.len = wxStringStrlen(str.data);
886
887 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
c801d85f
KB
888}
889
a7ea63e2
VS
890int wxString::compare(size_t nStart, size_t nLen,
891 const wchar_t* sz, size_t nCount) const
8f93a29f 892{
a7ea63e2
VS
893 wxASSERT(nStart <= length());
894 size_type strLen = length() - nStart;
895 nLen = strLen < nLen ? strLen : nLen;
8f93a29f 896
a7ea63e2
VS
897 size_t pos, len;
898 PosLenToImpl(nStart, nLen, &pos, &len);
8f93a29f 899
a7ea63e2
VS
900 SubstrBufFromWC str(ImplStr(sz, nCount));
901 if ( str.len == npos )
902 str.len = wxStringStrlen(str.data);
903
904 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
8f93a29f
VS
905}
906
a7ea63e2
VS
907#endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
908
909
8f93a29f
VS
910// ---------------------------------------------------------------------------
911// find_{first,last}_[not]_of functions
912// ---------------------------------------------------------------------------
913
914#if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
c801d85f 915
8f93a29f
VS
916// NB: All these functions are implemented with the argument being wxChar*,
917// i.e. widechar string in any Unicode build, even though native string
918// representation is char* in the UTF-8 build. This is because we couldn't
919// use memchr() to determine if a character is in a set encoded as UTF-8.
920
921size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
dcb68102 922{
8f93a29f 923 return find_first_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
924}
925
8f93a29f 926size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
dcb68102 927{
8f93a29f 928 return find_first_not_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
929}
930
8f93a29f 931size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 932{
8f93a29f 933 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
dcb68102 934
8f93a29f
VS
935 size_t idx = nStart;
936 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 937 {
8f93a29f
VS
938 if ( wxTmemchr(sz, *i, n) )
939 return idx;
dcb68102 940 }
8f93a29f
VS
941
942 return npos;
943}
944
945size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
946{
947 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
948
949 size_t idx = nStart;
950 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 951 {
8f93a29f
VS
952 if ( !wxTmemchr(sz, *i, n) )
953 return idx;
954 }
955
956 return npos;
957}
958
959
960size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
961{
962 return find_last_of(sz, nStart, wxStrlen(sz));
963}
964
965size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
966{
967 return find_last_not_of(sz, nStart, wxStrlen(sz));
968}
969
970size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
971{
972 size_t len = length();
973
974 if ( nStart == npos )
975 {
976 nStart = len - 1;
dcb68102 977 }
2c09fb3b 978 else
dcb68102 979 {
8f93a29f 980 wxASSERT_MSG( nStart <= len, _T("invalid index") );
dcb68102 981 }
8f93a29f
VS
982
983 size_t idx = nStart;
984 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
985 i != rend(); --idx, ++i )
986 {
987 if ( wxTmemchr(sz, *i, n) )
988 return idx;
989 }
990
991 return npos;
dcb68102
RN
992}
993
8f93a29f 994size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 995{
8f93a29f
VS
996 size_t len = length();
997
998 if ( nStart == npos )
999 {
1000 nStart = len - 1;
1001 }
1002 else
1003 {
1004 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1005 }
1006
1007 size_t idx = nStart;
1008 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1009 i != rend(); --idx, ++i )
1010 {
1011 if ( !wxTmemchr(sz, *i, n) )
1012 return idx;
1013 }
1014
1015 return npos;
dcb68102
RN
1016}
1017
8f93a29f 1018size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
dcb68102 1019{
8f93a29f
VS
1020 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
1021
1022 size_t idx = nStart;
1023 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1024 {
1025 if ( *i != ch )
1026 return idx;
1027 }
1028
1029 return npos;
1030}
1031
1032size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1033{
1034 size_t len = length();
1035
1036 if ( nStart == npos )
1037 {
1038 nStart = len - 1;
1039 }
1040 else
1041 {
1042 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1043 }
1044
1045 size_t idx = nStart;
1046 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1047 i != rend(); --idx, ++i )
1048 {
1049 if ( *i != ch )
1050 return idx;
1051 }
1052
1053 return npos;
1054}
1055
1056// the functions above were implemented for wchar_t* arguments in Unicode
1057// build and char* in ANSI build; below are implementations for the other
1058// version:
1059#if wxUSE_UNICODE
1060 #define wxOtherCharType char
1061 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1062#else
1063 #define wxOtherCharType wchar_t
1064 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1065#endif
1066
1067size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1068 { return find_first_of(STRCONV(sz), nStart); }
1069
1070size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1071 size_t n) const
1072 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1073size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1074 { return find_last_of(STRCONV(sz), nStart); }
1075size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1076 size_t n) const
1077 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1078size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1079 { return find_first_not_of(STRCONV(sz), nStart); }
1080size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1081 size_t n) const
1082 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1083size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1084 { return find_last_not_of(STRCONV(sz), nStart); }
1085size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1086 size_t n) const
1087 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1088
1089#undef wxOtherCharType
1090#undef STRCONV
1091
1092#endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1093
1094// ===========================================================================
1095// other common string functions
1096// ===========================================================================
1097
1098int wxString::CmpNoCase(const wxString& s) const
1099{
1100 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1101
8f93a29f
VS
1102 const_iterator i1 = begin();
1103 const_iterator end1 = end();
1104 const_iterator i2 = s.begin();
1105 const_iterator end2 = s.end();
1106
0d8b0f94 1107 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
8f93a29f
VS
1108 {
1109 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1110 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1111 if ( lower1 != lower2 )
1112 return lower1 < lower2 ? -1 : 1;
1113 }
1114
1115 size_t len1 = length();
1116 size_t len2 = s.length();
dcb68102 1117
8f93a29f
VS
1118 if ( len1 < len2 )
1119 return -1;
1120 else if ( len1 > len2 )
1121 return 1;
1122 return 0;
dcb68102
RN
1123}
1124
1125
b1ac3b56 1126#if wxUSE_UNICODE
e015c2a3 1127
cf6bedce
SC
1128#ifdef __MWERKS__
1129#ifndef __SCHAR_MAX__
1130#define __SCHAR_MAX__ 127
1131#endif
1132#endif
1133
e6310bbc 1134wxString wxString::FromAscii(const char *ascii, size_t len)
b1ac3b56 1135{
e6310bbc 1136 if (!ascii || len == 0)
b1ac3b56 1137 return wxEmptyString;
e015c2a3 1138
b1ac3b56 1139 wxString res;
e015c2a3 1140
e6310bbc 1141 {
6798451b 1142 wxStringInternalBuffer buf(res, len);
602a857b 1143 wxStringCharType *dest = buf;
c1eada83 1144
602a857b
VS
1145 for ( ; len > 0; --len )
1146 {
1147 unsigned char c = (unsigned char)*ascii++;
1148 wxASSERT_MSG( c < 0x80,
1149 _T("Non-ASCII value passed to FromAscii().") );
c1eada83 1150
602a857b
VS
1151 *dest++ = (wchar_t)c;
1152 }
e015c2a3
VZ
1153 }
1154
b1ac3b56
RR
1155 return res;
1156}
1157
e6310bbc
VS
1158wxString wxString::FromAscii(const char *ascii)
1159{
0081dd72 1160 return FromAscii(ascii, wxStrlen(ascii));
e6310bbc
VS
1161}
1162
c5288c5c 1163wxString wxString::FromAscii(char ascii)
2b5f62a0
VZ
1164{
1165 // What do we do with '\0' ?
1166
c1eada83 1167 unsigned char c = (unsigned char)ascii;
8760bc65 1168
c1eada83
VS
1169 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1170
1171 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1172 return wxString(wxUniChar((wchar_t)c));
2b5f62a0
VZ
1173}
1174
b1ac3b56
RR
1175const wxCharBuffer wxString::ToAscii() const
1176{
e015c2a3
VZ
1177 // this will allocate enough space for the terminating NUL too
1178 wxCharBuffer buffer(length());
6e394fc6 1179 char *dest = buffer.data();
e015c2a3 1180
c1eada83 1181 for ( const_iterator i = begin(); i != end(); ++i )
b1ac3b56 1182 {
c1eada83
VS
1183 wxUniChar c(*i);
1184 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1185 *dest++ = c.IsAscii() ? (char)c : '_';
e015c2a3
VZ
1186
1187 // the output string can't have embedded NULs anyhow, so we can safely
1188 // stop at first of them even if we do have any
c1eada83 1189 if ( !c )
e015c2a3 1190 break;
b1ac3b56 1191 }
e015c2a3 1192
b1ac3b56
RR
1193 return buffer;
1194}
e015c2a3 1195
c1eada83 1196#endif // wxUSE_UNICODE
b1ac3b56 1197
c801d85f 1198// extract string of length nCount starting at nFirst
c801d85f
KB
1199wxString wxString::Mid(size_t nFirst, size_t nCount) const
1200{
73f507f5 1201 size_t nLen = length();
30d9011f 1202
73f507f5
WS
1203 // default value of nCount is npos and means "till the end"
1204 if ( nCount == npos )
1205 {
1206 nCount = nLen - nFirst;
1207 }
30d9011f 1208
73f507f5
WS
1209 // out-of-bounds requests return sensible things
1210 if ( nFirst + nCount > nLen )
1211 {
1212 nCount = nLen - nFirst;
1213 }
c801d85f 1214
73f507f5
WS
1215 if ( nFirst > nLen )
1216 {
1217 // AllocCopy() will return empty string
1218 return wxEmptyString;
1219 }
c801d85f 1220
73f507f5
WS
1221 wxString dest(*this, nFirst, nCount);
1222 if ( dest.length() != nCount )
1223 {
1224 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1225 }
30d9011f 1226
73f507f5 1227 return dest;
c801d85f
KB
1228}
1229
e87b7833 1230// check that the string starts with prefix and return the rest of the string
d775fa82 1231// in the provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1232bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
f6bcfd97 1233{
c5e7a7d7
VS
1234 if ( compare(0, prefix.length(), prefix) != 0 )
1235 return false;
f6bcfd97
BP
1236
1237 if ( rest )
1238 {
1239 // put the rest of the string into provided pointer
c5e7a7d7 1240 rest->assign(*this, prefix.length(), npos);
f6bcfd97
BP
1241 }
1242
d775fa82 1243 return true;
f6bcfd97
BP
1244}
1245
3affcd07
VZ
1246
1247// check that the string ends with suffix and return the rest of it in the
1248// provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1249bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
3affcd07 1250{
c5e7a7d7 1251 int start = length() - suffix.length();
81727065
VS
1252
1253 if ( start < 0 || compare(start, npos, suffix) != 0 )
3affcd07
VZ
1254 return false;
1255
1256 if ( rest )
1257 {
1258 // put the rest of the string into provided pointer
1259 rest->assign(*this, 0, start);
1260 }
1261
1262 return true;
1263}
1264
1265
c801d85f
KB
1266// extract nCount last (rightmost) characters
1267wxString wxString::Right(size_t nCount) const
1268{
e87b7833
MB
1269 if ( nCount > length() )
1270 nCount = length();
c801d85f 1271
e87b7833
MB
1272 wxString dest(*this, length() - nCount, nCount);
1273 if ( dest.length() != nCount ) {
b1801e0e
GD
1274 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1275 }
c801d85f
KB
1276 return dest;
1277}
1278
7929902d 1279// get all characters after the last occurrence of ch
c801d85f 1280// (returns the whole string if ch not found)
c9f78968 1281wxString wxString::AfterLast(wxUniChar ch) const
c801d85f
KB
1282{
1283 wxString str;
d775fa82 1284 int iPos = Find(ch, true);
3c67202d 1285 if ( iPos == wxNOT_FOUND )
c801d85f
KB
1286 str = *this;
1287 else
c565abe1 1288 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1289
1290 return str;
1291}
1292
1293// extract nCount first (leftmost) characters
1294wxString wxString::Left(size_t nCount) const
1295{
e87b7833
MB
1296 if ( nCount > length() )
1297 nCount = length();
c801d85f 1298
e87b7833
MB
1299 wxString dest(*this, 0, nCount);
1300 if ( dest.length() != nCount ) {
b1801e0e
GD
1301 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1302 }
c801d85f
KB
1303 return dest;
1304}
1305
7929902d 1306// get all characters before the first occurrence of ch
c801d85f 1307// (returns the whole string if ch not found)
c9f78968 1308wxString wxString::BeforeFirst(wxUniChar ch) const
c801d85f 1309{
e87b7833 1310 int iPos = Find(ch);
c565abe1
VZ
1311 if ( iPos == wxNOT_FOUND )
1312 iPos = length();
e87b7833 1313 return wxString(*this, 0, iPos);
c801d85f
KB
1314}
1315
7929902d 1316/// get all characters before the last occurrence of ch
c801d85f 1317/// (returns empty string if ch not found)
c9f78968 1318wxString wxString::BeforeLast(wxUniChar ch) const
c801d85f
KB
1319{
1320 wxString str;
d775fa82 1321 int iPos = Find(ch, true);
3c67202d 1322 if ( iPos != wxNOT_FOUND && iPos != 0 )
d1c9bbf6 1323 str = wxString(c_str(), iPos);
c801d85f
KB
1324
1325 return str;
1326}
1327
7929902d 1328/// get all characters after the first occurrence of ch
c801d85f 1329/// (returns empty string if ch not found)
c9f78968 1330wxString wxString::AfterFirst(wxUniChar ch) const
c801d85f
KB
1331{
1332 wxString str;
1333 int iPos = Find(ch);
3c67202d 1334 if ( iPos != wxNOT_FOUND )
c565abe1 1335 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1336
1337 return str;
1338}
1339
7929902d 1340// replace first (or all) occurrences of some substring with another one
8a540c88
VS
1341size_t wxString::Replace(const wxString& strOld,
1342 const wxString& strNew, bool bReplaceAll)
c801d85f 1343{
a8f1f1b2 1344 // if we tried to replace an empty string we'd enter an infinite loop below
8a540c88 1345 wxCHECK_MSG( !strOld.empty(), 0,
a8f1f1b2
VZ
1346 _T("wxString::Replace(): invalid parameter") );
1347
68482dc5
VZ
1348 wxSTRING_INVALIDATE_CACHE();
1349
510bb748 1350 size_t uiCount = 0; // count of replacements made
c801d85f 1351
8a627032
VZ
1352 // optimize the special common case: replacement of one character by
1353 // another one (in UTF-8 case we can only do this for ASCII characters)
1354 //
1355 // benchmarks show that this special version is around 3 times faster
1356 // (depending on the proportion of matching characters and UTF-8/wchar_t
1357 // build)
1358 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1359 {
1360 const wxStringCharType chOld = strOld.m_impl[0],
1361 chNew = strNew.m_impl[0];
1362
1363 // this loop is the simplified version of the one below
1364 for ( size_t pos = 0; ; )
1365 {
1366 pos = m_impl.find(chOld, pos);
1367 if ( pos == npos )
1368 break;
c801d85f 1369
8a627032
VZ
1370 m_impl[pos++] = chNew;
1371
1372 uiCount++;
1373
1374 if ( !bReplaceAll )
1375 break;
1376 }
1377 }
1378 else // general case
510bb748 1379 {
8a627032
VZ
1380 const size_t uiOldLen = strOld.m_impl.length();
1381 const size_t uiNewLen = strNew.m_impl.length();
1382
1383 for ( size_t pos = 0; ; )
1384 {
1385 pos = m_impl.find(strOld.m_impl, pos);
1386 if ( pos == npos )
1387 break;
510bb748 1388
8a627032
VZ
1389 // replace this occurrence of the old string with the new one
1390 m_impl.replace(pos, uiOldLen, strNew.m_impl);
510bb748 1391
8a627032
VZ
1392 // move up pos past the string that was replaced
1393 pos += uiNewLen;
ad5bb7d6 1394
8a627032
VZ
1395 // increase replace count
1396 uiCount++;
394b2900 1397
8a627032
VZ
1398 // stop after the first one?
1399 if ( !bReplaceAll )
1400 break;
1401 }
c801d85f 1402 }
c801d85f 1403
510bb748 1404 return uiCount;
c801d85f
KB
1405}
1406
1407bool wxString::IsAscii() const
1408{
a4a44612
VS
1409 for ( const_iterator i = begin(); i != end(); ++i )
1410 {
1411 if ( !(*i).IsAscii() )
1412 return false;
1413 }
1414
1415 return true;
c801d85f 1416}
dd1eaa89 1417
c801d85f
KB
1418bool wxString::IsWord() const
1419{
a4a44612
VS
1420 for ( const_iterator i = begin(); i != end(); ++i )
1421 {
1422 if ( !wxIsalpha(*i) )
1423 return false;
1424 }
1425
1426 return true;
c801d85f 1427}
dd1eaa89 1428
c801d85f
KB
1429bool wxString::IsNumber() const
1430{
a4a44612
VS
1431 if ( empty() )
1432 return true;
1433
1434 const_iterator i = begin();
1435
1436 if ( *i == _T('-') || *i == _T('+') )
1437 ++i;
1438
1439 for ( ; i != end(); ++i )
1440 {
1441 if ( !wxIsdigit(*i) )
1442 return false;
1443 }
1444
1445 return true;
c801d85f
KB
1446}
1447
c801d85f
KB
1448wxString wxString::Strip(stripType w) const
1449{
1450 wxString s = *this;
d775fa82
WS
1451 if ( w & leading ) s.Trim(false);
1452 if ( w & trailing ) s.Trim(true);
c801d85f
KB
1453 return s;
1454}
1455
c801d85f
KB
1456// ---------------------------------------------------------------------------
1457// case conversion
1458// ---------------------------------------------------------------------------
1459
1460wxString& wxString::MakeUpper()
1461{
e87b7833
MB
1462 for ( iterator it = begin(), en = end(); it != en; ++it )
1463 *it = (wxChar)wxToupper(*it);
c801d85f
KB
1464
1465 return *this;
1466}
1467
1468wxString& wxString::MakeLower()
1469{
e87b7833
MB
1470 for ( iterator it = begin(), en = end(); it != en; ++it )
1471 *it = (wxChar)wxTolower(*it);
c801d85f
KB
1472
1473 return *this;
1474}
1475
0c7db140
VZ
1476wxString& wxString::MakeCapitalized()
1477{
1478 const iterator en = end();
1479 iterator it = begin();
1480 if ( it != en )
1481 {
1482 *it = (wxChar)wxToupper(*it);
1483 for ( ++it; it != en; ++it )
1484 *it = (wxChar)wxTolower(*it);
1485 }
1486
1487 return *this;
1488}
1489
c801d85f
KB
1490// ---------------------------------------------------------------------------
1491// trimming and padding
1492// ---------------------------------------------------------------------------
1493
d775fa82 1494// some compilers (VC++ 6.0 not to name them) return true for a call to
9d55bfef 1495// isspace('\xEA') in the C locale which seems to be broken to me, but we have
c95e653c 1496// to live with this by checking that the character is a 7 bit one - even if
9d55bfef 1497// this may fail to detect some spaces (I don't know if Unicode doesn't have
576c608d
VZ
1498// space-like symbols somewhere except in the first 128 chars), it is arguably
1499// still better than trimming away accented letters
1500inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1501
c801d85f
KB
1502// trims spaces (in the sense of isspace) from left or right side
1503wxString& wxString::Trim(bool bFromRight)
1504{
3458e408
WS
1505 // first check if we're going to modify the string at all
1506 if ( !empty() &&
1507 (
1508 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1509 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1510 )
2c3b684c 1511 )
2c3b684c 1512 {
3458e408
WS
1513 if ( bFromRight )
1514 {
1515 // find last non-space character
d4d02bd5 1516 reverse_iterator psz = rbegin();
32c62191 1517 while ( (psz != rend()) && wxSafeIsspace(*psz) )
0d8b0f94 1518 ++psz;
92df97b8 1519
3458e408 1520 // truncate at trailing space start
d4d02bd5 1521 erase(psz.base(), end());
3458e408
WS
1522 }
1523 else
1524 {
1525 // find first non-space character
1526 iterator psz = begin();
32c62191 1527 while ( (psz != end()) && wxSafeIsspace(*psz) )
0d8b0f94 1528 ++psz;
2c3b684c 1529
3458e408
WS
1530 // fix up data and length
1531 erase(begin(), psz);
1532 }
2c3b684c 1533 }
c801d85f 1534
3458e408 1535 return *this;
c801d85f
KB
1536}
1537
1538// adds nCount characters chPad to the string from either side
c9f78968 1539wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
c801d85f 1540{
3458e408 1541 wxString s(chPad, nCount);
c801d85f 1542
3458e408
WS
1543 if ( bFromRight )
1544 *this += s;
1545 else
1546 {
1547 s += *this;
1548 swap(s);
1549 }
c801d85f 1550
3458e408 1551 return *this;
c801d85f
KB
1552}
1553
1554// truncate the string
1555wxString& wxString::Truncate(size_t uiLen)
1556{
3458e408
WS
1557 if ( uiLen < length() )
1558 {
1559 erase(begin() + uiLen, end());
1560 }
1561 //else: nothing to do, string is already short enough
c801d85f 1562
3458e408 1563 return *this;
c801d85f
KB
1564}
1565
1566// ---------------------------------------------------------------------------
3c67202d 1567// finding (return wxNOT_FOUND if not found and index otherwise)
c801d85f
KB
1568// ---------------------------------------------------------------------------
1569
1570// find a character
c9f78968 1571int wxString::Find(wxUniChar ch, bool bFromEnd) const
c801d85f 1572{
3458e408 1573 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
c801d85f 1574
3458e408 1575 return (idx == npos) ? wxNOT_FOUND : (int)idx;
c801d85f
KB
1576}
1577
cd0b1709
VZ
1578// ----------------------------------------------------------------------------
1579// conversion to numbers
1580// ----------------------------------------------------------------------------
1581
52de37c7
VS
1582// The implementation of all the functions below is exactly the same so factor
1583// it out. Note that number extraction works correctly on UTF-8 strings, so
1584// we can use wxStringCharType and wx_str() for maximum efficiency.
122f3c5d 1585
92df97b8 1586#ifndef __WXWINCE__
941a4e62
VS
1587 #define DO_IF_NOT_WINCE(x) x
1588#else
1589 #define DO_IF_NOT_WINCE(x)
92df97b8 1590#endif
4ea4767e 1591
c95e653c
VZ
1592#define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1593 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
941a4e62
VS
1594 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1595 \
1596 DO_IF_NOT_WINCE( errno = 0; ) \
1597 \
1598 const wxStringCharType *start = wx_str(); \
1599 wxStringCharType *end; \
c95e653c 1600 T val = func(start, &end, base); \
941a4e62
VS
1601 \
1602 /* return true only if scan was stopped by the terminating NUL and */ \
1603 /* if the string was not empty to start with and no under/overflow */ \
1604 /* occurred: */ \
c95e653c
VZ
1605 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1606 return false; \
1607 *out = val; \
1608 return true
cd0b1709 1609
c95e653c 1610bool wxString::ToLong(long *pVal, int base) const
cd0b1709 1611{
c95e653c 1612 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
619dcb09 1613}
cd0b1709 1614
c95e653c 1615bool wxString::ToULong(unsigned long *pVal, int base) const
619dcb09 1616{
c95e653c 1617 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
cd0b1709
VZ
1618}
1619
c95e653c 1620bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
d6718dd1 1621{
c95e653c 1622 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
d6718dd1
VZ
1623}
1624
c95e653c 1625bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
d6718dd1 1626{
c95e653c 1627 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
d6718dd1
VZ
1628}
1629
c95e653c 1630bool wxString::ToDouble(double *pVal) const
cd0b1709 1631{
c95e653c 1632 wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
cd0b1709 1633
c95e653c 1634 DO_IF_NOT_WINCE( errno = 0; )
e71e5b37 1635
cd0b1709
VZ
1636 const wxChar *start = c_str();
1637 wxChar *end;
c95e653c 1638 double val = wxStrtod(start, &end);
cd0b1709 1639
d775fa82 1640 // return true only if scan was stopped by the terminating NUL and if the
bda041e5 1641 // string was not empty to start with and no under/overflow occurred
c95e653c
VZ
1642 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1643 return false;
1644
1645 *pVal = val;
1646
1647 return true;
cd0b1709
VZ
1648}
1649
c801d85f 1650// ---------------------------------------------------------------------------
9efd3367 1651// formatted output
c801d85f 1652// ---------------------------------------------------------------------------
378b05f7 1653
d1f6e2cf 1654#if !wxUSE_UTF8_LOCALE_ONLY
341e7d28 1655/* static */
c9f78968 1656#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1528e0b8 1657wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
c9f78968 1658#else
d1f6e2cf 1659wxString wxString::DoFormatWchar(const wxChar *format, ...)
c9f78968 1660#endif
341e7d28 1661{
77c3e48a 1662 va_list argptr;
c9f78968 1663 va_start(argptr, format);
341e7d28 1664
77c3e48a 1665 wxString s;
c9f78968 1666 s.PrintfV(format, argptr);
341e7d28 1667
77c3e48a 1668 va_end(argptr);
341e7d28 1669
77c3e48a 1670 return s;
341e7d28 1671}
d1f6e2cf
VS
1672#endif // !wxUSE_UTF8_LOCALE_ONLY
1673
1674#if wxUSE_UNICODE_UTF8
1675/* static */
1676wxString wxString::DoFormatUtf8(const char *format, ...)
1677{
1678 va_list argptr;
1679 va_start(argptr, format);
1680
1681 wxString s;
1682 s.PrintfV(format, argptr);
1683
1684 va_end(argptr);
1685
1686 return s;
1687}
1688#endif // wxUSE_UNICODE_UTF8
341e7d28
VZ
1689
1690/* static */
c9f78968 1691wxString wxString::FormatV(const wxString& format, va_list argptr)
341e7d28
VZ
1692{
1693 wxString s;
c9f78968 1694 s.PrintfV(format, argptr);
341e7d28
VZ
1695 return s;
1696}
1697
d1f6e2cf 1698#if !wxUSE_UTF8_LOCALE_ONLY
c9f78968 1699#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
d1f6e2cf 1700int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1701#else
d1f6e2cf 1702int wxString::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1703#endif
c801d85f 1704{
ba9bbf13 1705 va_list argptr;
c9f78968 1706 va_start(argptr, format);
c801d85f 1707
c9f78968
VS
1708#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1709 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1710 // because it's the only cast that works safely for downcasting when
1711 // multiple inheritance is used:
1712 wxString *str = static_cast<wxString*>(this);
1713#else
1714 wxString *str = this;
1715#endif
1716
1717 int iLen = str->PrintfV(format, argptr);
c801d85f 1718
ba9bbf13 1719 va_end(argptr);
c801d85f 1720
ba9bbf13 1721 return iLen;
c801d85f 1722}
d1f6e2cf
VS
1723#endif // !wxUSE_UTF8_LOCALE_ONLY
1724
1725#if wxUSE_UNICODE_UTF8
1726int wxString::DoPrintfUtf8(const char *format, ...)
1727{
1728 va_list argptr;
1729 va_start(argptr, format);
1730
1731 int iLen = PrintfV(format, argptr);
1732
1733 va_end(argptr);
1734
1735 return iLen;
1736}
1737#endif // wxUSE_UNICODE_UTF8
c801d85f 1738
67612ff1
DE
1739/*
1740 Uses wxVsnprintf and places the result into the this string.
1741
1742 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1743 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1744 the ISO C99 (and thus SUSv3) standard the return value for the case of
1745 an undersized buffer is inconsistent. For conforming vsnprintf
1746 implementations the function must return the number of characters that
1747 would have been printed had the buffer been large enough. For conforming
1748 vswprintf implementations the function must return a negative number
1749 and set errno.
1750
1751 What vswprintf sets errno to is undefined but Darwin seems to set it to
a9a854d7
DE
1752 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1753 those are defined in the standard and backed up by several conformance
1754 statements. Note that ENOMEM mentioned in the manual page does not
1755 apply to swprintf, only wprintf and fwprintf.
1756
1757 Official manual page:
1758 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1759
1760 Some conformance statements (AIX, Solaris):
1761 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1762 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1763
1764 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1765 EILSEQ and EINVAL are specifically defined to mean the error is other than
1766 an undersized buffer and no other errno are defined we treat those two
1767 as meaning hard errors and everything else gets the old behavior which
1768 is to keep looping and increasing buffer size until the function succeeds.
c95e653c 1769
67612ff1
DE
1770 In practice it's impossible to determine before compilation which behavior
1771 may be used. The vswprintf function may have vsnprintf-like behavior or
1772 vice-versa. Behavior detected on one release can theoretically change
1773 with an updated release. Not to mention that configure testing for it
1774 would require the test to be run on the host system, not the build system
1775 which makes cross compilation difficult. Therefore, we make no assumptions
1776 about behavior and try our best to handle every known case, including the
1777 case where wxVsnprintf returns a negative number and fails to set errno.
1778
1779 There is yet one more non-standard implementation and that is our own.
1780 Fortunately, that can be detected at compile-time.
1781
1782 On top of all that, ISO C99 explicitly defines snprintf to write a null
1783 character to the last position of the specified buffer. That would be at
1784 at the given buffer size minus 1. It is supposed to do this even if it
1785 turns out that the buffer is sized too small.
1786
1787 Darwin (tested on 10.5) follows the C99 behavior exactly.
1788
1789 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1790 errno even when it fails. However, it only seems to ever fail due
1791 to an undersized buffer.
1792*/
2523e9b7
VS
1793#if wxUSE_UNICODE_UTF8
1794template<typename BufferType>
1795#else
1796// we only need one version in non-UTF8 builds and at least two Windows
1797// compilers have problems with this function template, so use just one
1798// normal function here
1799#endif
1800static int DoStringPrintfV(wxString& str,
1801 const wxString& format, va_list argptr)
c801d85f 1802{
f6f5941b 1803 int size = 1024;
e87b7833 1804
f6f5941b
VZ
1805 for ( ;; )
1806 {
2523e9b7
VS
1807#if wxUSE_UNICODE_UTF8
1808 BufferType tmp(str, size + 1);
1809 typename BufferType::CharType *buf = tmp;
1810#else
1811 wxStringBuffer tmp(str, size + 1);
de2589be 1812 wxChar *buf = tmp;
2523e9b7 1813#endif
2bb67b80 1814
ba9bbf13
WS
1815 if ( !buf )
1816 {
1817 // out of memory
a33c7045
VS
1818
1819 // in UTF-8 build, leaving uninitialized junk in the buffer
1820 // could result in invalid non-empty UTF-8 string, so just
1821 // reset the string to empty on failure:
1822 buf[0] = '\0';
ba9bbf13 1823 return -1;
e87b7833 1824 }
f6f5941b 1825
ba9bbf13
WS
1826 // wxVsnprintf() may modify the original arg pointer, so pass it
1827 // only a copy
1828 va_list argptrcopy;
1829 wxVaCopy(argptrcopy, argptr);
67612ff1
DE
1830
1831#ifndef __WXWINCE__
1832 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1833 errno = 0;
1834#endif
2523e9b7 1835 int len = wxVsnprintf(buf, size, format, argptrcopy);
ba9bbf13
WS
1836 va_end(argptrcopy);
1837
1838 // some implementations of vsnprintf() don't NUL terminate
1839 // the string if there is not enough space for it so
1840 // always do it manually
67612ff1
DE
1841 // FIXME: This really seems to be the wrong and would be an off-by-one
1842 // bug except the code above allocates an extra character.
ba9bbf13
WS
1843 buf[size] = _T('\0');
1844
caff62f2
VZ
1845 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1846 // total number of characters which would have been written if the
b1727cfe 1847 // buffer were large enough (newer standards such as Unix98)
de2589be
VZ
1848 if ( len < 0 )
1849 {
52de37c7
VS
1850 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1851 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1852 // is true if *both* of them use our own implementation,
1853 // otherwise we can't be sure
f2bbe5b6
VZ
1854#if wxUSE_WXVSNPRINTF
1855 // we know that our own implementation of wxVsnprintf() returns -1
1856 // only for a format error - thus there's something wrong with
1857 // the user's format string
a33c7045 1858 buf[0] = '\0';
f2bbe5b6 1859 return -1;
52de37c7
VS
1860#else // possibly using system version
1861 // assume it only returns error if there is not enough space, but
1862 // as we don't know how much we need, double the current size of
1863 // the buffer
67612ff1 1864#ifndef __WXWINCE__
a9a854d7
DE
1865 if( (errno == EILSEQ) || (errno == EINVAL) )
1866 // If errno was set to one of the two well-known hard errors
1867 // then fail immediately to avoid an infinite loop.
1868 return -1;
1869 else
1870#endif // __WXWINCE__
67612ff1
DE
1871 // still not enough, as we don't know how much we need, double the
1872 // current size of the buffer
1873 size *= 2;
f2bbe5b6 1874#endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
de2589be 1875 }
64f8f94c 1876 else if ( len >= size )
de2589be 1877 {
f2bbe5b6 1878#if wxUSE_WXVSNPRINTF
c95e653c 1879 // we know that our own implementation of wxVsnprintf() returns
f2bbe5b6
VZ
1880 // size+1 when there's not enough space but that's not the size
1881 // of the required buffer!
1882 size *= 2; // so we just double the current size of the buffer
1883#else
64f8f94c
VZ
1884 // some vsnprintf() implementations NUL-terminate the buffer and
1885 // some don't in len == size case, to be safe always add 1
67612ff1
DE
1886 // FIXME: I don't quite understand this comment. The vsnprintf
1887 // function is specifically defined to return the number of
1888 // characters printed not including the null terminator.
1889 // So OF COURSE you need to add 1 to get the right buffer size.
1890 // The following line is definitely correct, no question.
64f8f94c 1891 size = len + 1;
f2bbe5b6 1892#endif
de2589be
VZ
1893 }
1894 else // ok, there was enough space
f6f5941b 1895 {
f6f5941b
VZ
1896 break;
1897 }
f6f5941b
VZ
1898 }
1899
1900 // we could have overshot
2523e9b7
VS
1901 str.Shrink();
1902
1903 return str.length();
1904}
c801d85f 1905
2523e9b7
VS
1906int wxString::PrintfV(const wxString& format, va_list argptr)
1907{
2523e9b7
VS
1908#if wxUSE_UNICODE_UTF8
1909 #if wxUSE_STL_BASED_WXSTRING
1910 typedef wxStringTypeBuffer<char> Utf8Buffer;
1911 #else
6798451b 1912 typedef wxStringInternalBuffer Utf8Buffer;
2523e9b7
VS
1913 #endif
1914#endif
1915
1916#if wxUSE_UTF8_LOCALE_ONLY
c6255a6e 1917 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
1918#else
1919 #if wxUSE_UNICODE_UTF8
1920 if ( wxLocaleIsUtf8 )
c6255a6e 1921 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
1922 else
1923 // wxChar* version
c6255a6e 1924 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2523e9b7 1925 #else
c6255a6e 1926 return DoStringPrintfV(*this, format, argptr);
2523e9b7
VS
1927 #endif // UTF8/WCHAR
1928#endif
c801d85f
KB
1929}
1930
097c080b
VZ
1931// ----------------------------------------------------------------------------
1932// misc other operations
1933// ----------------------------------------------------------------------------
0c5d3e1c 1934
d775fa82 1935// returns true if the string matches the pattern which may contain '*' and
0c5d3e1c
VZ
1936// '?' metacharacters (as usual, '?' matches any character and '*' any number
1937// of them)
8a540c88 1938bool wxString::Matches(const wxString& mask) const
097c080b 1939{
d6044f58
VZ
1940 // I disable this code as it doesn't seem to be faster (in fact, it seems
1941 // to be much slower) than the old, hand-written code below and using it
1942 // here requires always linking with libregex even if the user code doesn't
1943 // use it
1944#if 0 // wxUSE_REGEX
706c2ac9
VZ
1945 // first translate the shell-like mask into a regex
1946 wxString pattern;
1947 pattern.reserve(wxStrlen(pszMask));
1948
1949 pattern += _T('^');
1950 while ( *pszMask )
1951 {
1952 switch ( *pszMask )
1953 {
1954 case _T('?'):
1955 pattern += _T('.');
1956 break;
1957
1958 case _T('*'):
1959 pattern += _T(".*");
1960 break;
1961
1962 case _T('^'):
1963 case _T('.'):
1964 case _T('$'):
1965 case _T('('):
1966 case _T(')'):
1967 case _T('|'):
1968 case _T('+'):
1969 case _T('\\'):
1970 // these characters are special in a RE, quote them
1971 // (however note that we don't quote '[' and ']' to allow
1972 // using them for Unix shell like matching)
1973 pattern += _T('\\');
1974 // fall through
1975
1976 default:
1977 pattern += *pszMask;
1978 }
1979
1980 pszMask++;
1981 }
1982 pattern += _T('$');
1983
1984 // and now use it
1985 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
1986#else // !wxUSE_REGEX
9a4232dc
VZ
1987 // TODO: this is, of course, awfully inefficient...
1988
8a540c88
VS
1989 // FIXME-UTF8: implement using iterators, remove #if
1990#if wxUSE_UNICODE_UTF8
1991 wxWCharBuffer maskBuf = mask.wc_str();
1992 wxWCharBuffer txtBuf = wc_str();
1993 const wxChar *pszMask = maskBuf.data();
1994 const wxChar *pszTxt = txtBuf.data();
1995#else
1996 const wxChar *pszMask = mask.wx_str();
9a4232dc 1997 // the char currently being checked
8a540c88
VS
1998 const wxChar *pszTxt = wx_str();
1999#endif
9a4232dc
VZ
2000
2001 // the last location where '*' matched
2002 const wxChar *pszLastStarInText = NULL;
2003 const wxChar *pszLastStarInMask = NULL;
2004
2005match:
2006 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
097c080b 2007 switch ( *pszMask ) {
223d09f6
KB
2008 case wxT('?'):
2009 if ( *pszTxt == wxT('\0') )
d775fa82 2010 return false;
097c080b 2011
9a4232dc 2012 // pszTxt and pszMask will be incremented in the loop statement
0c5d3e1c 2013
097c080b
VZ
2014 break;
2015
223d09f6 2016 case wxT('*'):
097c080b 2017 {
9a4232dc
VZ
2018 // remember where we started to be able to backtrack later
2019 pszLastStarInText = pszTxt;
2020 pszLastStarInMask = pszMask;
2021
097c080b 2022 // ignore special chars immediately following this one
9a4232dc 2023 // (should this be an error?)
223d09f6 2024 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
097c080b
VZ
2025 pszMask++;
2026
2027 // if there is nothing more, match
223d09f6 2028 if ( *pszMask == wxT('\0') )
d775fa82 2029 return true;
097c080b
VZ
2030
2031 // are there any other metacharacters in the mask?
c86f1403 2032 size_t uiLenMask;
223d09f6 2033 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
097c080b
VZ
2034
2035 if ( pEndMask != NULL ) {
2036 // we have to match the string between two metachars
2037 uiLenMask = pEndMask - pszMask;
2038 }
2039 else {
2040 // we have to match the remainder of the string
2bb67b80 2041 uiLenMask = wxStrlen(pszMask);
097c080b
VZ
2042 }
2043
2044 wxString strToMatch(pszMask, uiLenMask);
2bb67b80 2045 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
097c080b 2046 if ( pMatch == NULL )
d775fa82 2047 return false;
097c080b
VZ
2048
2049 // -1 to compensate "++" in the loop
2050 pszTxt = pMatch + uiLenMask - 1;
2051 pszMask += uiLenMask - 1;
2052 }
2053 break;
2054
2055 default:
2056 if ( *pszMask != *pszTxt )
d775fa82 2057 return false;
097c080b
VZ
2058 break;
2059 }
2060 }
2061
2062 // match only if nothing left
9a4232dc 2063 if ( *pszTxt == wxT('\0') )
d775fa82 2064 return true;
9a4232dc
VZ
2065
2066 // if we failed to match, backtrack if we can
2067 if ( pszLastStarInText ) {
2068 pszTxt = pszLastStarInText + 1;
2069 pszMask = pszLastStarInMask;
2070
2071 pszLastStarInText = NULL;
2072
2073 // don't bother resetting pszLastStarInMask, it's unnecessary
2074
2075 goto match;
2076 }
2077
d775fa82 2078 return false;
706c2ac9 2079#endif // wxUSE_REGEX/!wxUSE_REGEX
097c080b
VZ
2080}
2081
1fc5dd6f 2082// Count the number of chars
c9f78968 2083int wxString::Freq(wxUniChar ch) const
1fc5dd6f
JS
2084{
2085 int count = 0;
8f93a29f 2086 for ( const_iterator i = begin(); i != end(); ++i )
1fc5dd6f 2087 {
8f93a29f 2088 if ( *i == ch )
1fc5dd6f
JS
2089 count ++;
2090 }
2091 return count;
2092}
2093
628f87da
VS
2094// ----------------------------------------------------------------------------
2095// wxUTF8StringBuffer
2096// ----------------------------------------------------------------------------
2097
7d46f92b 2098#if wxUSE_UNICODE_WCHAR
628f87da
VS
2099wxUTF8StringBuffer::~wxUTF8StringBuffer()
2100{
2101 wxMBConvStrictUTF8 conv;
2102 size_t wlen = conv.ToWChar(NULL, 0, m_buf);
2103 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2104
2105 wxStringInternalBuffer wbuf(m_str, wlen);
2106 conv.ToWChar(wbuf, wlen, m_buf);
2107}
2108
2109wxUTF8StringBufferLength::~wxUTF8StringBufferLength()
2110{
2111 wxCHECK_RET(m_lenSet, "length not set");
2112
2113 wxMBConvStrictUTF8 conv;
2114 size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
2115 wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
2116
2117 wxStringInternalBufferLength wbuf(m_str, wlen);
2118 conv.ToWChar(wbuf, wlen, m_buf, m_len);
2119 wbuf.SetLength(wlen);
2120}
7d46f92b 2121#endif // wxUSE_UNICODE_WCHAR
5c1de526
VS
2122
2123// ----------------------------------------------------------------------------
2124// wxCharBufferType<T>
2125// ----------------------------------------------------------------------------
2126
c9b6cebd 2127#ifndef __VMS_BROKEN_TEMPLATES
5c1de526 2128template<>
c9b6cebd 2129#endif
5c1de526
VS
2130wxCharTypeBuffer<char>::Data
2131wxCharTypeBuffer<char>::NullData(NULL);
2132
c9b6cebd 2133#ifndef __VMS_BROKEN_TEMPLATES
5c1de526 2134template<>
c9b6cebd 2135#endif
5c1de526
VS
2136wxCharTypeBuffer<wchar_t>::Data
2137wxCharTypeBuffer<wchar_t>::NullData(NULL);