]> git.saurik.com Git - wxWidgets.git/blame - src/common/string.cpp
blind build fix for STL builds
[wxWidgets.git] / src / common / string.cpp
CommitLineData
c801d85f 1/////////////////////////////////////////////////////////////////////////////
8898456d 2// Name: src/common/string.cpp
c801d85f 3// Purpose: wxString class
59059feb 4// Author: Vadim Zeitlin, Ryan Norton
c801d85f
KB
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
59059feb 9// (c) 2004 Ryan Norton <wxprojects@comcast.net>
65571936 10// Licence: wxWindows licence
c801d85f
KB
11/////////////////////////////////////////////////////////////////////////////
12
c801d85f
KB
13// ===========================================================================
14// headers, declarations, constants
15// ===========================================================================
16
17// For compilers that support precompilation, includes "wx.h".
18#include "wx/wxprec.h"
19
20#ifdef __BORLANDC__
8898456d 21 #pragma hdrstop
c801d85f
KB
22#endif
23
24#ifndef WX_PRECOMP
8898456d 25 #include "wx/string.h"
2523e9b7 26 #include "wx/wxcrtvararg.h"
6b769f3d 27#endif
c801d85f
KB
28
29#include <ctype.h>
92df97b8
WS
30
31#ifndef __WXWINCE__
32 #include <errno.h>
33#endif
34
c801d85f
KB
35#include <string.h>
36#include <stdlib.h>
9a08c20e 37
8116a0c5 38#include "wx/hashmap.h"
072682ce 39#include "wx/vector.h"
8f93a29f
VS
40
41// string handling functions used by wxString:
42#if wxUSE_UNICODE_UTF8
43 #define wxStringMemcpy memcpy
44 #define wxStringMemcmp memcmp
45 #define wxStringMemchr memchr
46 #define wxStringStrlen strlen
47#else
48 #define wxStringMemcpy wxTmemcpy
49 #define wxStringMemcmp wxTmemcmp
a7ea63e2
VS
50 #define wxStringMemchr wxTmemchr
51 #define wxStringStrlen wxStrlen
52#endif
8f93a29f 53
4e79262f
VZ
54// ----------------------------------------------------------------------------
55// global variables
56// ----------------------------------------------------------------------------
57
58namespace wxPrivate
59{
60
61static UntypedBufferData s_untypedNullData(NULL);
62
63UntypedBufferData * const untypedNullDataPtr = &s_untypedNullData;
64
65} // namespace wxPrivate
e87b7833 66
a7ea63e2
VS
67// ---------------------------------------------------------------------------
68// static class variables definition
69// ---------------------------------------------------------------------------
e87b7833 70
a7ea63e2
VS
71//According to STL _must_ be a -1 size_t
72const size_t wxString::npos = (size_t) -1;
8f93a29f 73
68482dc5 74#if wxUSE_STRING_POS_CACHE
68482dc5 75
e810df36
VZ
76#ifdef wxHAS_COMPILER_TLS
77
78wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
79
80#else // !wxHAS_COMPILER_TLS
81
ad8ae788
VZ
82struct wxStrCacheInitializer
83{
84 wxStrCacheInitializer()
85 {
86 // calling this function triggers s_cache initialization in it, and
87 // from now on it becomes safe to call from multiple threads
88 wxString::GetCache();
89 }
90};
91
e317bd3f
SC
92/*
93wxString::Cache& wxString::GetCache()
94{
95 static wxTLS_TYPE(Cache) s_cache;
96
97 return wxTLS_VALUE(s_cache);
98}
99*/
100
ad8ae788
VZ
101static wxStrCacheInitializer gs_stringCacheInit;
102
e810df36
VZ
103#endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
104
68482dc5
VZ
105// gdb seems to be unable to display thread-local variables correctly, at least
106// not my 6.4.98 version under amd64, so provide this debugging helper to do it
107#ifdef __WXDEBUG__
108
109struct wxStrCacheDumper
110{
111 static void ShowAll()
112 {
113 puts("*** wxString cache dump:");
114 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
115 {
116 const wxString::Cache::Element&
8b73c531 117 c = wxString::GetCacheBegin()[n];
68482dc5
VZ
118
119 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
120 n,
8b73c531 121 n == wxString::LastUsedCacheElement() ? " [*]" : "",
68482dc5
VZ
122 c.str,
123 (unsigned long)c.pos,
124 (unsigned long)c.impl,
125 (long)c.len);
126 }
127 }
128};
129
130void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
131
132#endif // __WXDEBUG__
133
134#ifdef wxPROFILE_STRING_CACHE
135
136wxString::CacheStats wxString::ms_cacheStats;
137
8c3b65d9 138struct wxStrCacheStatsDumper
68482dc5 139{
8c3b65d9 140 ~wxStrCacheStatsDumper()
68482dc5
VZ
141 {
142 const wxString::CacheStats& stats = wxString::ms_cacheStats;
143
144 if ( stats.postot )
145 {
146 puts("*** wxString cache statistics:");
147 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
148 stats.postot);
149 printf("\tHits %u (of which %u not used) or %.2f%%\n",
150 stats.poshits,
151 stats.mishits,
152 100.*float(stats.poshits - stats.mishits)/stats.postot);
153 printf("\tAverage position requested: %.2f\n",
154 float(stats.sumpos) / stats.postot);
155 printf("\tAverage offset after cached hint: %.2f\n",
156 float(stats.sumofs) / stats.postot);
157 }
158
159 if ( stats.lentot )
160 {
161 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
162 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
163 }
164 }
8c3b65d9 165};
68482dc5 166
8c3b65d9 167static wxStrCacheStatsDumper s_showCacheStats;
68482dc5
VZ
168
169#endif // wxPROFILE_STRING_CACHE
170
171#endif // wxUSE_STRING_POS_CACHE
172
a7ea63e2
VS
173// ----------------------------------------------------------------------------
174// global functions
175// ----------------------------------------------------------------------------
e87b7833 176
a7ea63e2 177#if wxUSE_STD_IOSTREAM
8f93a29f 178
a7ea63e2 179#include <iostream>
8f93a29f 180
a7ea63e2 181wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
8f93a29f 182{
7a906e1a 183#if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
ddf01bdb
VZ
184 const wxCharBuffer buf(str.AsCharBuf());
185 if ( !buf )
186 os.clear(wxSTD ios_base::failbit);
187 else
188 os << buf.data();
189
190 return os;
a7ea63e2 191#else
7a906e1a 192 return os << str.AsInternal();
a7ea63e2 193#endif
8f93a29f
VS
194}
195
04abe4bc
VS
196wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
197{
198 return os << str.c_str();
199}
200
201wxSTD ostream& operator<<(wxSTD ostream& os, const wxCharBuffer& str)
202{
203 return os << str.data();
204}
205
206#ifndef __BORLANDC__
207wxSTD ostream& operator<<(wxSTD ostream& os, const wxWCharBuffer& str)
208{
209 return os << str.data();
210}
211#endif
212
6a6ea041 213#if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594
VZ
214
215wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
216{
217 return wos << str.wc_str();
218}
219
220wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
221{
222 return wos << str.AsWChar();
223}
224
225wxSTD wostream& operator<<(wxSTD wostream& wos, const wxWCharBuffer& str)
226{
227 return wos << str.data();
228}
229
6a6ea041 230#endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594 231
a7ea63e2 232#endif // wxUSE_STD_IOSTREAM
e87b7833 233
81727065
VS
234// ===========================================================================
235// wxString class core
236// ===========================================================================
237
238#if wxUSE_UNICODE_UTF8
239
81727065
VS
240void wxString::PosLenToImpl(size_t pos, size_t len,
241 size_t *implPos, size_t *implLen) const
242{
243 if ( pos == npos )
68482dc5 244 {
81727065 245 *implPos = npos;
68482dc5
VZ
246 }
247 else // have valid start position
81727065 248 {
68482dc5
VZ
249 const const_iterator b = GetIterForNthChar(pos);
250 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
81727065 251 if ( len == npos )
68482dc5 252 {
81727065 253 *implLen = npos;
68482dc5
VZ
254 }
255 else // have valid length too
81727065 256 {
68482dc5
VZ
257 // we need to handle the case of length specifying a substring
258 // going beyond the end of the string, just as std::string does
259 const const_iterator e(end());
260 const_iterator i(b);
261 while ( len && i <= e )
262 {
263 ++i;
264 --len;
265 }
266
267 *implLen = i.impl() - b.impl();
81727065
VS
268 }
269 }
270}
271
272#endif // wxUSE_UNICODE_UTF8
273
11aac4ba
VS
274// ----------------------------------------------------------------------------
275// wxCStrData converted strings caching
276// ----------------------------------------------------------------------------
277
132276cf
VS
278// FIXME-UTF8: temporarily disabled because it doesn't work with global
279// string objects; re-enable after fixing this bug and benchmarking
280// performance to see if using a hash is a good idea at all
281#if 0
282
11aac4ba
VS
283// For backward compatibility reasons, it must be possible to assign the value
284// returned by wxString::c_str() to a char* or wchar_t* variable and work with
285// it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
286// because the memory would be freed immediately, but it has to be valid as long
287// as the string is not modified, so that code like this still works:
288//
289// const wxChar *s = str.c_str();
290// while ( s ) { ... }
291
292// FIXME-UTF8: not thread safe!
293// FIXME-UTF8: we currently clear the cached conversion only when the string is
294// destroyed, but we should do it when the string is modified, to
295// keep memory usage down
296// FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
297// invalidated the cache on every change, we could keep the previous
298// conversion
299// FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
300// to use mb_str() or wc_str() instead of (const [w]char*)c_str()
301
302template<typename T>
303static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
304{
6c4ebcda 305 typename T::iterator i = hash.find(wxConstCast(s, wxString));
11aac4ba
VS
306 if ( i != hash.end() )
307 {
308 free(i->second);
309 hash.erase(i);
310 }
311}
312
313#if wxUSE_UNICODE
6c4ebcda
VS
314// NB: non-STL implementation doesn't compile with "const wxString*" key type,
315// so we have to use wxString* here and const-cast when used
11aac4ba
VS
316WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
317 wxStringCharConversionCache);
318static wxStringCharConversionCache gs_stringsCharCache;
319
320const char* wxCStrData::AsChar() const
321{
322 // remove previously cache value, if any (see FIXMEs above):
323 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
324
325 // convert the string and keep it:
6c4ebcda
VS
326 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
327 m_str->mb_str().release();
11aac4ba
VS
328
329 return s + m_offset;
330}
331#endif // wxUSE_UNICODE
332
333#if !wxUSE_UNICODE_WCHAR
334WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
335 wxStringWCharConversionCache);
336static wxStringWCharConversionCache gs_stringsWCharCache;
337
338const wchar_t* wxCStrData::AsWChar() const
339{
340 // remove previously cache value, if any (see FIXMEs above):
341 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
342
343 // convert the string and keep it:
6c4ebcda
VS
344 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
345 m_str->wc_str().release();
11aac4ba
VS
346
347 return s + m_offset;
348}
349#endif // !wxUSE_UNICODE_WCHAR
350
11aac4ba
VS
351wxString::~wxString()
352{
353#if wxUSE_UNICODE
354 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
355 DeleteStringFromConversionCache(gs_stringsCharCache, this);
356#endif
357#if !wxUSE_UNICODE_WCHAR
358 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
359#endif
360}
132276cf
VS
361#endif
362
111d9948 363#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
132276cf
VS
364const char* wxCStrData::AsChar() const
365{
111d9948
VS
366#if wxUSE_UNICODE_UTF8
367 if ( wxLocaleIsUtf8 )
368 return AsInternal();
369#endif
370 // under non-UTF8 locales, we have to convert the internal UTF-8
371 // representation using wxConvLibc and cache the result
372
132276cf 373 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
374
375 // convert the string:
2a7431e1
VZ
376 //
377 // FIXME-UTF8: we'd like to do the conversion in the existing buffer (if we
378 // have it) but it's unfortunately not obvious to implement
379 // because we don't know how big buffer do we need for the
380 // given string length (in case of multibyte encodings, e.g.
381 // ISO-2022-JP or UTF-8 when internal representation is wchar_t)
382 //
383 // One idea would be to store more than just m_convertedToChar
384 // in wxString: then we could record the length of the string
385 // which was converted the last time and try to reuse the same
386 // buffer if the current length is not greater than it (this
387 // could still fail because string could have been modified in
388 // place but it would work most of the time, so we'd do it and
389 // only allocate the new buffer if in-place conversion returned
390 // an error). We could also store a bit saying if the string
391 // was modified since the last conversion (and update it in all
392 // operation modifying the string, of course) to avoid unneeded
393 // consequential conversions. But both of these ideas require
394 // adding more fields to wxString and require profiling results
395 // to be sure that we really gain enough from them to justify
396 // doing it.
05f32fc3
VS
397 wxCharBuffer buf(str->mb_str());
398
28be59b4
VZ
399 // if it failed, return empty string and not NULL to avoid crashes in code
400 // written with either wxWidgets 2 wxString or std::string behaviour in
401 // mind: neither of them ever returns NULL and so we shouldn't neither
402 if ( !buf )
403 return "";
404
05f32fc3
VS
405 if ( str->m_convertedToChar &&
406 strlen(buf) == strlen(str->m_convertedToChar) )
407 {
408 // keep the same buffer for as long as possible, so that several calls
409 // to c_str() in a row still work:
410 strcpy(str->m_convertedToChar, buf);
411 }
412 else
413 {
414 str->m_convertedToChar = buf.release();
415 }
416
417 // and keep it:
132276cf
VS
418 return str->m_convertedToChar + m_offset;
419}
111d9948 420#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
132276cf
VS
421
422#if !wxUSE_UNICODE_WCHAR
423const wchar_t* wxCStrData::AsWChar() const
424{
425 wxString *str = wxConstCast(m_str, wxString);
05f32fc3
VS
426
427 // convert the string:
428 wxWCharBuffer buf(str->wc_str());
429
28be59b4
VZ
430 // notice that here, unlike above in AsChar(), conversion can't fail as our
431 // internal UTF-8 is always well-formed -- or the string was corrupted and
432 // all bets are off anyhow
433
05f32fc3
VS
434 // FIXME-UTF8: do the conversion in-place in the existing buffer
435 if ( str->m_convertedToWChar &&
436 wxWcslen(buf) == wxWcslen(str->m_convertedToWChar) )
437 {
438 // keep the same buffer for as long as possible, so that several calls
439 // to c_str() in a row still work:
440 memcpy(str->m_convertedToWChar, buf, sizeof(wchar_t) * wxWcslen(buf));
441 }
442 else
443 {
444 str->m_convertedToWChar = buf.release();
445 }
446
447 // and keep it:
132276cf
VS
448 return str->m_convertedToWChar + m_offset;
449}
450#endif // !wxUSE_UNICODE_WCHAR
451
452// ===========================================================================
453// wxString class core
454// ===========================================================================
455
456// ---------------------------------------------------------------------------
457// construction and conversion
458// ---------------------------------------------------------------------------
11aac4ba 459
81727065 460#if wxUSE_UNICODE_WCHAR
8f93a29f
VS
461/* static */
462wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
04abe4bc 463 const wxMBConv& conv)
8f93a29f
VS
464{
465 // anything to do?
466 if ( !psz || nLength == 0 )
81727065 467 return SubstrBufFromMB(L"", 0);
8f93a29f
VS
468
469 if ( nLength == npos )
470 nLength = wxNO_LEN;
471
472 size_t wcLen;
473 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
474 if ( !wcLen )
81727065 475 return SubstrBufFromMB(_T(""), 0);
8f93a29f
VS
476 else
477 return SubstrBufFromMB(wcBuf, wcLen);
478}
81727065
VS
479#endif // wxUSE_UNICODE_WCHAR
480
481#if wxUSE_UNICODE_UTF8
482/* static */
483wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
484 const wxMBConv& conv)
485{
81727065
VS
486 // anything to do?
487 if ( !psz || nLength == 0 )
488 return SubstrBufFromMB("", 0);
489
111d9948
VS
490 // if psz is already in UTF-8, we don't have to do the roundtrip to
491 // wchar_t* and back:
492 if ( conv.IsUTF8() )
493 {
494 // we need to validate the input because UTF8 iterators assume valid
495 // UTF-8 sequence and psz may be invalid:
496 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
497 {
9ef1ad0d
VZ
498 // we must pass the real string length to SubstrBufFromMB ctor
499 if ( nLength == npos )
500 nLength = psz ? strlen(psz) : 0;
111d9948
VS
501 return SubstrBufFromMB(wxCharBuffer::CreateNonOwned(psz), nLength);
502 }
503 // else: do the roundtrip through wchar_t*
504 }
505
81727065
VS
506 if ( nLength == npos )
507 nLength = wxNO_LEN;
508
509 // first convert to wide string:
510 size_t wcLen;
511 wxWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
512 if ( !wcLen )
513 return SubstrBufFromMB("", 0);
514
515 // and then to UTF-8:
4fdfe2f3 516 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
81727065
VS
517 // widechar -> UTF-8 conversion isn't supposed to ever fail:
518 wxASSERT_MSG( buf.data, _T("conversion to UTF-8 failed") );
519
520 return buf;
521}
522#endif // wxUSE_UNICODE_UTF8
523
524#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
525/* static */
526wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
04abe4bc 527 const wxMBConv& conv)
8f93a29f
VS
528{
529 // anything to do?
530 if ( !pwz || nLength == 0 )
81727065 531 return SubstrBufFromWC("", 0);
8f93a29f
VS
532
533 if ( nLength == npos )
534 nLength = wxNO_LEN;
535
536 size_t mbLen;
537 wxCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
538 if ( !mbLen )
81727065 539 return SubstrBufFromWC("", 0);
8f93a29f
VS
540 else
541 return SubstrBufFromWC(mbBuf, mbLen);
542}
81727065 543#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
544
545
81727065 546#if wxUSE_UNICODE_WCHAR
e87b7833 547
06386448 548//Convert wxString in Unicode mode to a multi-byte string
830f8f11 549const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
265d5cce 550{
81727065 551 return conv.cWC2MB(wx_str(), length() + 1 /* size, not length */, NULL);
e87b7833
MB
552}
553
81727065 554#elif wxUSE_UNICODE_UTF8
e87b7833 555
81727065
VS
556const wxWCharBuffer wxString::wc_str() const
557{
4fdfe2f3
VZ
558 return wxMBConvStrictUTF8().cMB2WC
559 (
560 m_impl.c_str(),
561 m_impl.length() + 1, // size, not length
562 NULL
563 );
81727065
VS
564}
565
566const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
567{
111d9948
VS
568 if ( conv.IsUTF8() )
569 return wxCharBuffer::CreateNonOwned(m_impl.c_str());
570
81727065
VS
571 // FIXME-UTF8: use wc_str() here once we have buffers with length
572
573 size_t wcLen;
4fdfe2f3
VZ
574 wxWCharBuffer wcBuf(wxMBConvStrictUTF8().cMB2WC
575 (
576 m_impl.c_str(),
577 m_impl.length() + 1, // size
578 &wcLen
579 ));
81727065
VS
580 if ( !wcLen )
581 return wxCharBuffer("");
582
4f696af8 583 return conv.cWC2MB(wcBuf, wcLen+1, NULL);
81727065
VS
584}
585
586#else // ANSI
eec47cc6 587
7663d0d4 588//Converts this string to a wide character string if unicode
06386448 589//mode is not enabled and wxUSE_WCHAR_T is enabled
830f8f11 590const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
265d5cce 591{
81727065 592 return conv.cMB2WC(wx_str(), length() + 1 /* size, not length */, NULL);
265d5cce 593}
7663d0d4 594
e87b7833
MB
595#endif // Unicode/ANSI
596
597// shrink to minimal size (releasing extra memory)
598bool wxString::Shrink()
599{
600 wxString tmp(begin(), end());
601 swap(tmp);
602 return tmp.length() == length();
603}
604
d8a4b666 605// deprecated compatibility code:
a7ea63e2 606#if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
c87a0bc8 607wxStringCharType *wxString::GetWriteBuf(size_t nLen)
d8a4b666
VS
608{
609 return DoGetWriteBuf(nLen);
610}
611
612void wxString::UngetWriteBuf()
613{
614 DoUngetWriteBuf();
615}
616
617void wxString::UngetWriteBuf(size_t nLen)
618{
619 DoUngetWriteBuf(nLen);
620}
a7ea63e2 621#endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
e87b7833 622
d8a4b666 623
e87b7833
MB
624// ---------------------------------------------------------------------------
625// data access
626// ---------------------------------------------------------------------------
627
628// all functions are inline in string.h
629
630// ---------------------------------------------------------------------------
e8f59039 631// concatenation operators
e87b7833
MB
632// ---------------------------------------------------------------------------
633
c801d85f 634/*
c801d85f
KB
635 * concatenation functions come in 5 flavours:
636 * string + string
637 * char + string and string + char
638 * C str + string and string + C str
639 */
640
b1801e0e 641wxString operator+(const wxString& str1, const wxString& str2)
c801d85f 642{
992527a5 643#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
644 wxASSERT( str1.IsValid() );
645 wxASSERT( str2.IsValid() );
e87b7833 646#endif
097c080b 647
3458e408
WS
648 wxString s = str1;
649 s += str2;
3168a13f 650
3458e408 651 return s;
c801d85f
KB
652}
653
c9f78968 654wxString operator+(const wxString& str, wxUniChar ch)
c801d85f 655{
992527a5 656#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 657 wxASSERT( str.IsValid() );
e87b7833 658#endif
3168a13f 659
3458e408
WS
660 wxString s = str;
661 s += ch;
097c080b 662
3458e408 663 return s;
c801d85f
KB
664}
665
c9f78968 666wxString operator+(wxUniChar ch, const wxString& str)
c801d85f 667{
992527a5 668#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 669 wxASSERT( str.IsValid() );
e87b7833 670#endif
097c080b 671
3458e408
WS
672 wxString s = ch;
673 s += str;
3168a13f 674
3458e408 675 return s;
c801d85f
KB
676}
677
8f93a29f 678wxString operator+(const wxString& str, const char *psz)
c801d85f 679{
992527a5 680#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 681 wxASSERT( str.IsValid() );
e87b7833 682#endif
097c080b 683
3458e408 684 wxString s;
8f93a29f 685 if ( !s.Alloc(strlen(psz) + str.length()) ) {
3458e408
WS
686 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
687 }
688 s += str;
689 s += psz;
3168a13f 690
3458e408 691 return s;
c801d85f
KB
692}
693
8f93a29f 694wxString operator+(const wxString& str, const wchar_t *pwz)
c801d85f 695{
992527a5 696#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
697 wxASSERT( str.IsValid() );
698#endif
699
700 wxString s;
701 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
702 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
703 }
704 s += str;
705 s += pwz;
706
707 return s;
708}
709
710wxString operator+(const char *psz, const wxString& str)
711{
a7ea63e2
VS
712#if !wxUSE_STL_BASED_WXSTRING
713 wxASSERT( str.IsValid() );
714#endif
715
716 wxString s;
717 if ( !s.Alloc(strlen(psz) + str.length()) ) {
718 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
719 }
720 s = psz;
721 s += str;
722
723 return s;
724}
725
726wxString operator+(const wchar_t *pwz, const wxString& str)
727{
728#if !wxUSE_STL_BASED_WXSTRING
729 wxASSERT( str.IsValid() );
730#endif
731
732 wxString s;
733 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
734 wxFAIL_MSG( _T("out of memory in wxString::operator+") );
735 }
736 s = pwz;
737 s += str;
738
739 return s;
740}
741
742// ---------------------------------------------------------------------------
743// string comparison
744// ---------------------------------------------------------------------------
745
52de37c7
VS
746bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
747{
748 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
749 : wxToupper(GetChar(0u)) == wxToupper(c));
750}
751
a7ea63e2
VS
752#ifdef HAVE_STD_STRING_COMPARE
753
754// NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
755// UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
756// sort strings in characters code point order by sorting the byte sequence
757// in byte values order (i.e. what strcmp() and memcmp() do).
758
759int wxString::compare(const wxString& str) const
760{
761 return m_impl.compare(str.m_impl);
762}
763
764int wxString::compare(size_t nStart, size_t nLen,
765 const wxString& str) const
766{
767 size_t pos, len;
768 PosLenToImpl(nStart, nLen, &pos, &len);
769 return m_impl.compare(pos, len, str.m_impl);
770}
771
772int wxString::compare(size_t nStart, size_t nLen,
773 const wxString& str,
774 size_t nStart2, size_t nLen2) const
775{
776 size_t pos, len;
777 PosLenToImpl(nStart, nLen, &pos, &len);
778
779 size_t pos2, len2;
780 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
781
782 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
783}
784
785int wxString::compare(const char* sz) const
786{
787 return m_impl.compare(ImplStr(sz));
788}
789
790int wxString::compare(const wchar_t* sz) const
791{
792 return m_impl.compare(ImplStr(sz));
793}
794
795int wxString::compare(size_t nStart, size_t nLen,
796 const char* sz, size_t nCount) const
797{
798 size_t pos, len;
799 PosLenToImpl(nStart, nLen, &pos, &len);
800
801 SubstrBufFromMB str(ImplStr(sz, nCount));
802
803 return m_impl.compare(pos, len, str.data, str.len);
804}
805
806int wxString::compare(size_t nStart, size_t nLen,
807 const wchar_t* sz, size_t nCount) const
808{
809 size_t pos, len;
810 PosLenToImpl(nStart, nLen, &pos, &len);
811
812 SubstrBufFromWC str(ImplStr(sz, nCount));
813
814 return m_impl.compare(pos, len, str.data, str.len);
815}
816
817#else // !HAVE_STD_STRING_COMPARE
818
819static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
820 const wxStringCharType* s2, size_t l2)
821{
822 if( l1 == l2 )
823 return wxStringMemcmp(s1, s2, l1);
824 else if( l1 < l2 )
825 {
826 int ret = wxStringMemcmp(s1, s2, l1);
827 return ret == 0 ? -1 : ret;
828 }
829 else
830 {
831 int ret = wxStringMemcmp(s1, s2, l2);
832 return ret == 0 ? +1 : ret;
833 }
834}
835
836int wxString::compare(const wxString& str) const
837{
838 return ::wxDoCmp(m_impl.data(), m_impl.length(),
839 str.m_impl.data(), str.m_impl.length());
840}
841
842int wxString::compare(size_t nStart, size_t nLen,
843 const wxString& str) const
844{
845 wxASSERT(nStart <= length());
846 size_type strLen = length() - nStart;
847 nLen = strLen < nLen ? strLen : nLen;
848
849 size_t pos, len;
850 PosLenToImpl(nStart, nLen, &pos, &len);
851
852 return ::wxDoCmp(m_impl.data() + pos, len,
853 str.m_impl.data(), str.m_impl.length());
854}
855
856int wxString::compare(size_t nStart, size_t nLen,
857 const wxString& str,
858 size_t nStart2, size_t nLen2) const
859{
860 wxASSERT(nStart <= length());
861 wxASSERT(nStart2 <= str.length());
862 size_type strLen = length() - nStart,
863 strLen2 = str.length() - nStart2;
864 nLen = strLen < nLen ? strLen : nLen;
865 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
866
867 size_t pos, len;
868 PosLenToImpl(nStart, nLen, &pos, &len);
869 size_t pos2, len2;
870 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
871
872 return ::wxDoCmp(m_impl.data() + pos, len,
873 str.m_impl.data() + pos2, len2);
874}
875
876int wxString::compare(const char* sz) const
877{
878 SubstrBufFromMB str(ImplStr(sz, npos));
879 if ( str.len == npos )
880 str.len = wxStringStrlen(str.data);
881 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
882}
883
884int wxString::compare(const wchar_t* sz) const
885{
886 SubstrBufFromWC str(ImplStr(sz, npos));
887 if ( str.len == npos )
888 str.len = wxStringStrlen(str.data);
889 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
890}
891
892int wxString::compare(size_t nStart, size_t nLen,
893 const char* sz, size_t nCount) const
894{
895 wxASSERT(nStart <= length());
896 size_type strLen = length() - nStart;
897 nLen = strLen < nLen ? strLen : nLen;
097c080b 898
a7ea63e2
VS
899 size_t pos, len;
900 PosLenToImpl(nStart, nLen, &pos, &len);
3168a13f 901
a7ea63e2
VS
902 SubstrBufFromMB str(ImplStr(sz, nCount));
903 if ( str.len == npos )
904 str.len = wxStringStrlen(str.data);
905
906 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
c801d85f
KB
907}
908
a7ea63e2
VS
909int wxString::compare(size_t nStart, size_t nLen,
910 const wchar_t* sz, size_t nCount) const
8f93a29f 911{
a7ea63e2
VS
912 wxASSERT(nStart <= length());
913 size_type strLen = length() - nStart;
914 nLen = strLen < nLen ? strLen : nLen;
8f93a29f 915
a7ea63e2
VS
916 size_t pos, len;
917 PosLenToImpl(nStart, nLen, &pos, &len);
8f93a29f 918
a7ea63e2
VS
919 SubstrBufFromWC str(ImplStr(sz, nCount));
920 if ( str.len == npos )
921 str.len = wxStringStrlen(str.data);
922
923 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
8f93a29f
VS
924}
925
a7ea63e2
VS
926#endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
927
928
8f93a29f
VS
929// ---------------------------------------------------------------------------
930// find_{first,last}_[not]_of functions
931// ---------------------------------------------------------------------------
932
933#if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
c801d85f 934
8f93a29f
VS
935// NB: All these functions are implemented with the argument being wxChar*,
936// i.e. widechar string in any Unicode build, even though native string
937// representation is char* in the UTF-8 build. This is because we couldn't
938// use memchr() to determine if a character is in a set encoded as UTF-8.
939
940size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
dcb68102 941{
8f93a29f 942 return find_first_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
943}
944
8f93a29f 945size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
dcb68102 946{
8f93a29f 947 return find_first_not_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
948}
949
8f93a29f 950size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 951{
8f93a29f 952 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
dcb68102 953
8f93a29f
VS
954 size_t idx = nStart;
955 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 956 {
8f93a29f
VS
957 if ( wxTmemchr(sz, *i, n) )
958 return idx;
dcb68102 959 }
8f93a29f
VS
960
961 return npos;
962}
963
964size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
965{
966 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
967
968 size_t idx = nStart;
969 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 970 {
8f93a29f
VS
971 if ( !wxTmemchr(sz, *i, n) )
972 return idx;
973 }
974
975 return npos;
976}
977
978
979size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
980{
981 return find_last_of(sz, nStart, wxStrlen(sz));
982}
983
984size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
985{
986 return find_last_not_of(sz, nStart, wxStrlen(sz));
987}
988
989size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
990{
991 size_t len = length();
992
993 if ( nStart == npos )
994 {
995 nStart = len - 1;
dcb68102 996 }
2c09fb3b 997 else
dcb68102 998 {
8f93a29f 999 wxASSERT_MSG( nStart <= len, _T("invalid index") );
dcb68102 1000 }
8f93a29f
VS
1001
1002 size_t idx = nStart;
1003 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1004 i != rend(); --idx, ++i )
1005 {
1006 if ( wxTmemchr(sz, *i, n) )
1007 return idx;
1008 }
1009
1010 return npos;
dcb68102
RN
1011}
1012
8f93a29f 1013size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 1014{
8f93a29f
VS
1015 size_t len = length();
1016
1017 if ( nStart == npos )
1018 {
1019 nStart = len - 1;
1020 }
1021 else
1022 {
1023 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1024 }
1025
1026 size_t idx = nStart;
1027 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1028 i != rend(); --idx, ++i )
1029 {
1030 if ( !wxTmemchr(sz, *i, n) )
1031 return idx;
1032 }
1033
1034 return npos;
dcb68102
RN
1035}
1036
8f93a29f 1037size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
dcb68102 1038{
8f93a29f
VS
1039 wxASSERT_MSG( nStart <= length(), _T("invalid index") );
1040
1041 size_t idx = nStart;
1042 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1043 {
1044 if ( *i != ch )
1045 return idx;
1046 }
1047
1048 return npos;
1049}
1050
1051size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1052{
1053 size_t len = length();
1054
1055 if ( nStart == npos )
1056 {
1057 nStart = len - 1;
1058 }
1059 else
1060 {
1061 wxASSERT_MSG( nStart <= len, _T("invalid index") );
1062 }
1063
1064 size_t idx = nStart;
1065 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1066 i != rend(); --idx, ++i )
1067 {
1068 if ( *i != ch )
1069 return idx;
1070 }
1071
1072 return npos;
1073}
1074
1075// the functions above were implemented for wchar_t* arguments in Unicode
1076// build and char* in ANSI build; below are implementations for the other
1077// version:
1078#if wxUSE_UNICODE
1079 #define wxOtherCharType char
1080 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1081#else
1082 #define wxOtherCharType wchar_t
1083 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1084#endif
1085
1086size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1087 { return find_first_of(STRCONV(sz), nStart); }
1088
1089size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1090 size_t n) const
1091 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1092size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1093 { return find_last_of(STRCONV(sz), nStart); }
1094size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1095 size_t n) const
1096 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1097size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1098 { return find_first_not_of(STRCONV(sz), nStart); }
1099size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1100 size_t n) const
1101 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1102size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1103 { return find_last_not_of(STRCONV(sz), nStart); }
1104size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1105 size_t n) const
1106 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1107
1108#undef wxOtherCharType
1109#undef STRCONV
1110
1111#endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1112
1113// ===========================================================================
1114// other common string functions
1115// ===========================================================================
1116
1117int wxString::CmpNoCase(const wxString& s) const
1118{
6689960c 1119#if wxUSE_UNICODE_UTF8
8f93a29f
VS
1120 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
1121
8f93a29f
VS
1122 const_iterator i1 = begin();
1123 const_iterator end1 = end();
1124 const_iterator i2 = s.begin();
1125 const_iterator end2 = s.end();
1126
0d8b0f94 1127 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
8f93a29f
VS
1128 {
1129 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1130 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1131 if ( lower1 != lower2 )
1132 return lower1 < lower2 ? -1 : 1;
1133 }
1134
1135 size_t len1 = length();
1136 size_t len2 = s.length();
dcb68102 1137
8f93a29f
VS
1138 if ( len1 < len2 )
1139 return -1;
1140 else if ( len1 > len2 )
1141 return 1;
1142 return 0;
6689960c
VZ
1143#else // wxUSE_UNICODE_WCHAR or ANSI
1144 return wxStricmp(m_impl.c_str(), s.m_impl.c_str());
1145#endif
dcb68102
RN
1146}
1147
1148
b1ac3b56 1149#if wxUSE_UNICODE
e015c2a3 1150
cf6bedce
SC
1151#ifdef __MWERKS__
1152#ifndef __SCHAR_MAX__
1153#define __SCHAR_MAX__ 127
1154#endif
1155#endif
1156
e6310bbc 1157wxString wxString::FromAscii(const char *ascii, size_t len)
b1ac3b56 1158{
e6310bbc 1159 if (!ascii || len == 0)
b1ac3b56 1160 return wxEmptyString;
e015c2a3 1161
b1ac3b56 1162 wxString res;
e015c2a3 1163
e6310bbc 1164 {
6798451b 1165 wxStringInternalBuffer buf(res, len);
602a857b 1166 wxStringCharType *dest = buf;
c1eada83 1167
602a857b
VS
1168 for ( ; len > 0; --len )
1169 {
1170 unsigned char c = (unsigned char)*ascii++;
1171 wxASSERT_MSG( c < 0x80,
1172 _T("Non-ASCII value passed to FromAscii().") );
c1eada83 1173
602a857b
VS
1174 *dest++ = (wchar_t)c;
1175 }
e015c2a3
VZ
1176 }
1177
b1ac3b56
RR
1178 return res;
1179}
1180
e6310bbc
VS
1181wxString wxString::FromAscii(const char *ascii)
1182{
0081dd72 1183 return FromAscii(ascii, wxStrlen(ascii));
e6310bbc
VS
1184}
1185
c5288c5c 1186wxString wxString::FromAscii(char ascii)
2b5f62a0
VZ
1187{
1188 // What do we do with '\0' ?
1189
c1eada83 1190 unsigned char c = (unsigned char)ascii;
8760bc65 1191
c1eada83
VS
1192 wxASSERT_MSG( c < 0x80, _T("Non-ASCII value passed to FromAscii().") );
1193
1194 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1195 return wxString(wxUniChar((wchar_t)c));
2b5f62a0
VZ
1196}
1197
b1ac3b56
RR
1198const wxCharBuffer wxString::ToAscii() const
1199{
e015c2a3
VZ
1200 // this will allocate enough space for the terminating NUL too
1201 wxCharBuffer buffer(length());
6e394fc6 1202 char *dest = buffer.data();
e015c2a3 1203
c1eada83 1204 for ( const_iterator i = begin(); i != end(); ++i )
b1ac3b56 1205 {
c1eada83
VS
1206 wxUniChar c(*i);
1207 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1208 *dest++ = c.IsAscii() ? (char)c : '_';
e015c2a3
VZ
1209
1210 // the output string can't have embedded NULs anyhow, so we can safely
1211 // stop at first of them even if we do have any
c1eada83 1212 if ( !c )
e015c2a3 1213 break;
b1ac3b56 1214 }
e015c2a3 1215
b1ac3b56
RR
1216 return buffer;
1217}
e015c2a3 1218
c1eada83 1219#endif // wxUSE_UNICODE
b1ac3b56 1220
c801d85f 1221// extract string of length nCount starting at nFirst
c801d85f
KB
1222wxString wxString::Mid(size_t nFirst, size_t nCount) const
1223{
73f507f5 1224 size_t nLen = length();
30d9011f 1225
73f507f5
WS
1226 // default value of nCount is npos and means "till the end"
1227 if ( nCount == npos )
1228 {
1229 nCount = nLen - nFirst;
1230 }
30d9011f 1231
73f507f5
WS
1232 // out-of-bounds requests return sensible things
1233 if ( nFirst + nCount > nLen )
1234 {
1235 nCount = nLen - nFirst;
1236 }
c801d85f 1237
73f507f5
WS
1238 if ( nFirst > nLen )
1239 {
1240 // AllocCopy() will return empty string
1241 return wxEmptyString;
1242 }
c801d85f 1243
73f507f5
WS
1244 wxString dest(*this, nFirst, nCount);
1245 if ( dest.length() != nCount )
1246 {
1247 wxFAIL_MSG( _T("out of memory in wxString::Mid") );
1248 }
30d9011f 1249
73f507f5 1250 return dest;
c801d85f
KB
1251}
1252
e87b7833 1253// check that the string starts with prefix and return the rest of the string
d775fa82 1254// in the provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1255bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
f6bcfd97 1256{
c5e7a7d7
VS
1257 if ( compare(0, prefix.length(), prefix) != 0 )
1258 return false;
f6bcfd97
BP
1259
1260 if ( rest )
1261 {
1262 // put the rest of the string into provided pointer
c5e7a7d7 1263 rest->assign(*this, prefix.length(), npos);
f6bcfd97
BP
1264 }
1265
d775fa82 1266 return true;
f6bcfd97
BP
1267}
1268
3affcd07
VZ
1269
1270// check that the string ends with suffix and return the rest of it in the
1271// provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1272bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
3affcd07 1273{
c5e7a7d7 1274 int start = length() - suffix.length();
81727065
VS
1275
1276 if ( start < 0 || compare(start, npos, suffix) != 0 )
3affcd07
VZ
1277 return false;
1278
1279 if ( rest )
1280 {
1281 // put the rest of the string into provided pointer
1282 rest->assign(*this, 0, start);
1283 }
1284
1285 return true;
1286}
1287
1288
c801d85f
KB
1289// extract nCount last (rightmost) characters
1290wxString wxString::Right(size_t nCount) const
1291{
e87b7833
MB
1292 if ( nCount > length() )
1293 nCount = length();
c801d85f 1294
e87b7833
MB
1295 wxString dest(*this, length() - nCount, nCount);
1296 if ( dest.length() != nCount ) {
b1801e0e
GD
1297 wxFAIL_MSG( _T("out of memory in wxString::Right") );
1298 }
c801d85f
KB
1299 return dest;
1300}
1301
7929902d 1302// get all characters after the last occurrence of ch
c801d85f 1303// (returns the whole string if ch not found)
c9f78968 1304wxString wxString::AfterLast(wxUniChar ch) const
c801d85f
KB
1305{
1306 wxString str;
d775fa82 1307 int iPos = Find(ch, true);
3c67202d 1308 if ( iPos == wxNOT_FOUND )
c801d85f
KB
1309 str = *this;
1310 else
c565abe1 1311 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1312
1313 return str;
1314}
1315
1316// extract nCount first (leftmost) characters
1317wxString wxString::Left(size_t nCount) const
1318{
e87b7833
MB
1319 if ( nCount > length() )
1320 nCount = length();
c801d85f 1321
e87b7833
MB
1322 wxString dest(*this, 0, nCount);
1323 if ( dest.length() != nCount ) {
b1801e0e
GD
1324 wxFAIL_MSG( _T("out of memory in wxString::Left") );
1325 }
c801d85f
KB
1326 return dest;
1327}
1328
7929902d 1329// get all characters before the first occurrence of ch
c801d85f 1330// (returns the whole string if ch not found)
c9f78968 1331wxString wxString::BeforeFirst(wxUniChar ch) const
c801d85f 1332{
e87b7833 1333 int iPos = Find(ch);
c565abe1
VZ
1334 if ( iPos == wxNOT_FOUND )
1335 iPos = length();
e87b7833 1336 return wxString(*this, 0, iPos);
c801d85f
KB
1337}
1338
7929902d 1339/// get all characters before the last occurrence of ch
c801d85f 1340/// (returns empty string if ch not found)
c9f78968 1341wxString wxString::BeforeLast(wxUniChar ch) const
c801d85f
KB
1342{
1343 wxString str;
d775fa82 1344 int iPos = Find(ch, true);
3c67202d 1345 if ( iPos != wxNOT_FOUND && iPos != 0 )
d1c9bbf6 1346 str = wxString(c_str(), iPos);
c801d85f
KB
1347
1348 return str;
1349}
1350
7929902d 1351/// get all characters after the first occurrence of ch
c801d85f 1352/// (returns empty string if ch not found)
c9f78968 1353wxString wxString::AfterFirst(wxUniChar ch) const
c801d85f
KB
1354{
1355 wxString str;
1356 int iPos = Find(ch);
3c67202d 1357 if ( iPos != wxNOT_FOUND )
c565abe1 1358 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1359
1360 return str;
1361}
1362
7929902d 1363// replace first (or all) occurrences of some substring with another one
8a540c88
VS
1364size_t wxString::Replace(const wxString& strOld,
1365 const wxString& strNew, bool bReplaceAll)
c801d85f 1366{
a8f1f1b2 1367 // if we tried to replace an empty string we'd enter an infinite loop below
8a540c88 1368 wxCHECK_MSG( !strOld.empty(), 0,
a8f1f1b2
VZ
1369 _T("wxString::Replace(): invalid parameter") );
1370
68482dc5
VZ
1371 wxSTRING_INVALIDATE_CACHE();
1372
510bb748 1373 size_t uiCount = 0; // count of replacements made
c801d85f 1374
8a627032
VZ
1375 // optimize the special common case: replacement of one character by
1376 // another one (in UTF-8 case we can only do this for ASCII characters)
1377 //
1378 // benchmarks show that this special version is around 3 times faster
1379 // (depending on the proportion of matching characters and UTF-8/wchar_t
1380 // build)
1381 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1382 {
1383 const wxStringCharType chOld = strOld.m_impl[0],
1384 chNew = strNew.m_impl[0];
1385
1386 // this loop is the simplified version of the one below
1387 for ( size_t pos = 0; ; )
1388 {
1389 pos = m_impl.find(chOld, pos);
1390 if ( pos == npos )
1391 break;
c801d85f 1392
8a627032
VZ
1393 m_impl[pos++] = chNew;
1394
1395 uiCount++;
1396
1397 if ( !bReplaceAll )
1398 break;
1399 }
1400 }
072682ce
VZ
1401 else if ( !bReplaceAll)
1402 {
1403 size_t pos = m_impl.find(strOld, 0);
1404 if ( pos != npos )
1405 {
1406 m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl);
1407 uiCount = 1;
1408 }
1409 }
1410 else // replace all occurrences
510bb748 1411 {
8a627032
VZ
1412 const size_t uiOldLen = strOld.m_impl.length();
1413 const size_t uiNewLen = strNew.m_impl.length();
1414
072682ce
VZ
1415 // first scan the string to find all positions at which the replacement
1416 // should be made
1417 wxVector<size_t> replacePositions;
1418
1419 size_t pos;
1420 for ( pos = m_impl.find(strOld.m_impl, 0);
1421 pos != npos;
1422 pos = m_impl.find(strOld.m_impl, pos + uiOldLen))
8a627032 1423 {
072682ce
VZ
1424 replacePositions.push_back(pos);
1425 ++uiCount;
1426 }
510bb748 1427
072682ce
VZ
1428 if ( !uiCount )
1429 return 0;
510bb748 1430
072682ce
VZ
1431 // allocate enough memory for the whole new string
1432 wxString tmp;
1433 tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen));
ad5bb7d6 1434
072682ce
VZ
1435 // copy this string to tmp doing replacements on the fly
1436 size_t replNum = 0;
1437 for ( pos = 0; replNum < uiCount; replNum++ )
1438 {
1439 const size_t nextReplPos = replacePositions[replNum];
394b2900 1440
072682ce
VZ
1441 if ( pos != nextReplPos )
1442 {
1443 tmp.m_impl.append(m_impl, pos, nextReplPos - pos);
1444 }
1445
1446 tmp.m_impl.append(strNew.m_impl);
1447 pos = nextReplPos + uiOldLen;
8a627032 1448 }
072682ce
VZ
1449
1450 if ( pos != m_impl.length() )
1451 {
1452 // append the rest of the string unchanged
1453 tmp.m_impl.append(m_impl, pos, m_impl.length() - pos);
1454 }
1455
1456 swap(tmp);
c801d85f 1457 }
c801d85f 1458
510bb748 1459 return uiCount;
c801d85f
KB
1460}
1461
1462bool wxString::IsAscii() const
1463{
a4a44612
VS
1464 for ( const_iterator i = begin(); i != end(); ++i )
1465 {
1466 if ( !(*i).IsAscii() )
1467 return false;
1468 }
1469
1470 return true;
c801d85f 1471}
dd1eaa89 1472
c801d85f
KB
1473bool wxString::IsWord() const
1474{
a4a44612
VS
1475 for ( const_iterator i = begin(); i != end(); ++i )
1476 {
1477 if ( !wxIsalpha(*i) )
1478 return false;
1479 }
1480
1481 return true;
c801d85f 1482}
dd1eaa89 1483
c801d85f
KB
1484bool wxString::IsNumber() const
1485{
a4a44612
VS
1486 if ( empty() )
1487 return true;
1488
1489 const_iterator i = begin();
1490
1491 if ( *i == _T('-') || *i == _T('+') )
1492 ++i;
1493
1494 for ( ; i != end(); ++i )
1495 {
1496 if ( !wxIsdigit(*i) )
1497 return false;
1498 }
1499
1500 return true;
c801d85f
KB
1501}
1502
c801d85f
KB
1503wxString wxString::Strip(stripType w) const
1504{
1505 wxString s = *this;
d775fa82
WS
1506 if ( w & leading ) s.Trim(false);
1507 if ( w & trailing ) s.Trim(true);
c801d85f
KB
1508 return s;
1509}
1510
c801d85f
KB
1511// ---------------------------------------------------------------------------
1512// case conversion
1513// ---------------------------------------------------------------------------
1514
1515wxString& wxString::MakeUpper()
1516{
e87b7833
MB
1517 for ( iterator it = begin(), en = end(); it != en; ++it )
1518 *it = (wxChar)wxToupper(*it);
c801d85f
KB
1519
1520 return *this;
1521}
1522
1523wxString& wxString::MakeLower()
1524{
e87b7833
MB
1525 for ( iterator it = begin(), en = end(); it != en; ++it )
1526 *it = (wxChar)wxTolower(*it);
c801d85f
KB
1527
1528 return *this;
1529}
1530
0c7db140
VZ
1531wxString& wxString::MakeCapitalized()
1532{
1533 const iterator en = end();
1534 iterator it = begin();
1535 if ( it != en )
1536 {
1537 *it = (wxChar)wxToupper(*it);
1538 for ( ++it; it != en; ++it )
1539 *it = (wxChar)wxTolower(*it);
1540 }
1541
1542 return *this;
1543}
1544
c801d85f
KB
1545// ---------------------------------------------------------------------------
1546// trimming and padding
1547// ---------------------------------------------------------------------------
1548
d775fa82 1549// some compilers (VC++ 6.0 not to name them) return true for a call to
9d55bfef 1550// isspace('\xEA') in the C locale which seems to be broken to me, but we have
c95e653c 1551// to live with this by checking that the character is a 7 bit one - even if
9d55bfef 1552// this may fail to detect some spaces (I don't know if Unicode doesn't have
576c608d
VZ
1553// space-like symbols somewhere except in the first 128 chars), it is arguably
1554// still better than trimming away accented letters
1555inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1556
c801d85f
KB
1557// trims spaces (in the sense of isspace) from left or right side
1558wxString& wxString::Trim(bool bFromRight)
1559{
3458e408
WS
1560 // first check if we're going to modify the string at all
1561 if ( !empty() &&
1562 (
1563 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1564 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1565 )
2c3b684c 1566 )
2c3b684c 1567 {
3458e408
WS
1568 if ( bFromRight )
1569 {
1570 // find last non-space character
d4d02bd5 1571 reverse_iterator psz = rbegin();
32c62191 1572 while ( (psz != rend()) && wxSafeIsspace(*psz) )
0d8b0f94 1573 ++psz;
92df97b8 1574
3458e408 1575 // truncate at trailing space start
d4d02bd5 1576 erase(psz.base(), end());
3458e408
WS
1577 }
1578 else
1579 {
1580 // find first non-space character
1581 iterator psz = begin();
32c62191 1582 while ( (psz != end()) && wxSafeIsspace(*psz) )
0d8b0f94 1583 ++psz;
2c3b684c 1584
3458e408
WS
1585 // fix up data and length
1586 erase(begin(), psz);
1587 }
2c3b684c 1588 }
c801d85f 1589
3458e408 1590 return *this;
c801d85f
KB
1591}
1592
1593// adds nCount characters chPad to the string from either side
c9f78968 1594wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
c801d85f 1595{
3458e408 1596 wxString s(chPad, nCount);
c801d85f 1597
3458e408
WS
1598 if ( bFromRight )
1599 *this += s;
1600 else
1601 {
1602 s += *this;
1603 swap(s);
1604 }
c801d85f 1605
3458e408 1606 return *this;
c801d85f
KB
1607}
1608
1609// truncate the string
1610wxString& wxString::Truncate(size_t uiLen)
1611{
3458e408
WS
1612 if ( uiLen < length() )
1613 {
1614 erase(begin() + uiLen, end());
1615 }
1616 //else: nothing to do, string is already short enough
c801d85f 1617
3458e408 1618 return *this;
c801d85f
KB
1619}
1620
1621// ---------------------------------------------------------------------------
3c67202d 1622// finding (return wxNOT_FOUND if not found and index otherwise)
c801d85f
KB
1623// ---------------------------------------------------------------------------
1624
1625// find a character
c9f78968 1626int wxString::Find(wxUniChar ch, bool bFromEnd) const
c801d85f 1627{
3458e408 1628 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
c801d85f 1629
3458e408 1630 return (idx == npos) ? wxNOT_FOUND : (int)idx;
c801d85f
KB
1631}
1632
cd0b1709
VZ
1633// ----------------------------------------------------------------------------
1634// conversion to numbers
1635// ----------------------------------------------------------------------------
1636
52de37c7
VS
1637// The implementation of all the functions below is exactly the same so factor
1638// it out. Note that number extraction works correctly on UTF-8 strings, so
1639// we can use wxStringCharType and wx_str() for maximum efficiency.
122f3c5d 1640
92df97b8 1641#ifndef __WXWINCE__
941a4e62
VS
1642 #define DO_IF_NOT_WINCE(x) x
1643#else
1644 #define DO_IF_NOT_WINCE(x)
92df97b8 1645#endif
4ea4767e 1646
c95e653c
VZ
1647#define WX_STRING_TO_INT_TYPE(out, base, func, T) \
1648 wxCHECK_MSG( out, false, _T("NULL output pointer") ); \
941a4e62
VS
1649 wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") ); \
1650 \
1651 DO_IF_NOT_WINCE( errno = 0; ) \
1652 \
1653 const wxStringCharType *start = wx_str(); \
1654 wxStringCharType *end; \
c95e653c 1655 T val = func(start, &end, base); \
941a4e62
VS
1656 \
1657 /* return true only if scan was stopped by the terminating NUL and */ \
1658 /* if the string was not empty to start with and no under/overflow */ \
1659 /* occurred: */ \
c95e653c
VZ
1660 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
1661 return false; \
1662 *out = val; \
1663 return true
cd0b1709 1664
c95e653c 1665bool wxString::ToLong(long *pVal, int base) const
cd0b1709 1666{
c95e653c 1667 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtol, long);
619dcb09 1668}
cd0b1709 1669
c95e653c 1670bool wxString::ToULong(unsigned long *pVal, int base) const
619dcb09 1671{
c95e653c 1672 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoul, unsigned long);
cd0b1709
VZ
1673}
1674
c95e653c 1675bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
d6718dd1 1676{
c95e653c 1677 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoll, wxLongLong_t);
d6718dd1
VZ
1678}
1679
c95e653c 1680bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
d6718dd1 1681{
c95e653c 1682 WX_STRING_TO_INT_TYPE(pVal, base, wxStrtoull, wxULongLong_t);
d6718dd1
VZ
1683}
1684
c95e653c 1685bool wxString::ToDouble(double *pVal) const
cd0b1709 1686{
c95e653c 1687 wxCHECK_MSG( pVal, false, _T("NULL output pointer") );
cd0b1709 1688
c95e653c 1689 DO_IF_NOT_WINCE( errno = 0; )
e71e5b37 1690
cd0b1709
VZ
1691 const wxChar *start = c_str();
1692 wxChar *end;
c95e653c 1693 double val = wxStrtod(start, &end);
cd0b1709 1694
d775fa82 1695 // return true only if scan was stopped by the terminating NUL and if the
bda041e5 1696 // string was not empty to start with and no under/overflow occurred
c95e653c
VZ
1697 if ( *end || end == start DO_IF_NOT_WINCE(|| errno == ERANGE) )
1698 return false;
1699
1700 *pVal = val;
1701
1702 return true;
cd0b1709
VZ
1703}
1704
c801d85f 1705// ---------------------------------------------------------------------------
9efd3367 1706// formatted output
c801d85f 1707// ---------------------------------------------------------------------------
378b05f7 1708
d1f6e2cf 1709#if !wxUSE_UTF8_LOCALE_ONLY
341e7d28 1710/* static */
c9f78968 1711#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1528e0b8 1712wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
c9f78968 1713#else
d1f6e2cf 1714wxString wxString::DoFormatWchar(const wxChar *format, ...)
c9f78968 1715#endif
341e7d28 1716{
77c3e48a 1717 va_list argptr;
c9f78968 1718 va_start(argptr, format);
341e7d28 1719
77c3e48a 1720 wxString s;
c9f78968 1721 s.PrintfV(format, argptr);
341e7d28 1722
77c3e48a 1723 va_end(argptr);
341e7d28 1724
77c3e48a 1725 return s;
341e7d28 1726}
d1f6e2cf
VS
1727#endif // !wxUSE_UTF8_LOCALE_ONLY
1728
1729#if wxUSE_UNICODE_UTF8
1730/* static */
1731wxString wxString::DoFormatUtf8(const char *format, ...)
1732{
1733 va_list argptr;
1734 va_start(argptr, format);
1735
1736 wxString s;
1737 s.PrintfV(format, argptr);
1738
1739 va_end(argptr);
1740
1741 return s;
1742}
1743#endif // wxUSE_UNICODE_UTF8
341e7d28
VZ
1744
1745/* static */
c9f78968 1746wxString wxString::FormatV(const wxString& format, va_list argptr)
341e7d28
VZ
1747{
1748 wxString s;
c9f78968 1749 s.PrintfV(format, argptr);
341e7d28
VZ
1750 return s;
1751}
1752
d1f6e2cf 1753#if !wxUSE_UTF8_LOCALE_ONLY
c9f78968 1754#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
d1f6e2cf 1755int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1756#else
d1f6e2cf 1757int wxString::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1758#endif
c801d85f 1759{
ba9bbf13 1760 va_list argptr;
c9f78968 1761 va_start(argptr, format);
c801d85f 1762
c9f78968
VS
1763#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1764 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1765 // because it's the only cast that works safely for downcasting when
1766 // multiple inheritance is used:
1767 wxString *str = static_cast<wxString*>(this);
1768#else
1769 wxString *str = this;
1770#endif
1771
1772 int iLen = str->PrintfV(format, argptr);
c801d85f 1773
ba9bbf13 1774 va_end(argptr);
c801d85f 1775
ba9bbf13 1776 return iLen;
c801d85f 1777}
d1f6e2cf
VS
1778#endif // !wxUSE_UTF8_LOCALE_ONLY
1779
1780#if wxUSE_UNICODE_UTF8
1781int wxString::DoPrintfUtf8(const char *format, ...)
1782{
1783 va_list argptr;
1784 va_start(argptr, format);
1785
1786 int iLen = PrintfV(format, argptr);
1787
1788 va_end(argptr);
1789
1790 return iLen;
1791}
1792#endif // wxUSE_UNICODE_UTF8
c801d85f 1793
67612ff1
DE
1794/*
1795 Uses wxVsnprintf and places the result into the this string.
1796
1797 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1798 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1799 the ISO C99 (and thus SUSv3) standard the return value for the case of
1800 an undersized buffer is inconsistent. For conforming vsnprintf
1801 implementations the function must return the number of characters that
1802 would have been printed had the buffer been large enough. For conforming
1803 vswprintf implementations the function must return a negative number
1804 and set errno.
1805
1806 What vswprintf sets errno to is undefined but Darwin seems to set it to
a9a854d7
DE
1807 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1808 those are defined in the standard and backed up by several conformance
1809 statements. Note that ENOMEM mentioned in the manual page does not
1810 apply to swprintf, only wprintf and fwprintf.
1811
1812 Official manual page:
1813 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1814
1815 Some conformance statements (AIX, Solaris):
1816 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1817 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1818
1819 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1820 EILSEQ and EINVAL are specifically defined to mean the error is other than
1821 an undersized buffer and no other errno are defined we treat those two
1822 as meaning hard errors and everything else gets the old behavior which
1823 is to keep looping and increasing buffer size until the function succeeds.
c95e653c 1824
67612ff1
DE
1825 In practice it's impossible to determine before compilation which behavior
1826 may be used. The vswprintf function may have vsnprintf-like behavior or
1827 vice-versa. Behavior detected on one release can theoretically change
1828 with an updated release. Not to mention that configure testing for it
1829 would require the test to be run on the host system, not the build system
1830 which makes cross compilation difficult. Therefore, we make no assumptions
1831 about behavior and try our best to handle every known case, including the
1832 case where wxVsnprintf returns a negative number and fails to set errno.
1833
1834 There is yet one more non-standard implementation and that is our own.
1835 Fortunately, that can be detected at compile-time.
1836
1837 On top of all that, ISO C99 explicitly defines snprintf to write a null
1838 character to the last position of the specified buffer. That would be at
1839 at the given buffer size minus 1. It is supposed to do this even if it
1840 turns out that the buffer is sized too small.
1841
1842 Darwin (tested on 10.5) follows the C99 behavior exactly.
1843
1844 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1845 errno even when it fails. However, it only seems to ever fail due
1846 to an undersized buffer.
1847*/
2523e9b7
VS
1848#if wxUSE_UNICODE_UTF8
1849template<typename BufferType>
1850#else
1851// we only need one version in non-UTF8 builds and at least two Windows
1852// compilers have problems with this function template, so use just one
1853// normal function here
1854#endif
1855static int DoStringPrintfV(wxString& str,
1856 const wxString& format, va_list argptr)
c801d85f 1857{
f6f5941b 1858 int size = 1024;
e87b7833 1859
f6f5941b
VZ
1860 for ( ;; )
1861 {
2523e9b7
VS
1862#if wxUSE_UNICODE_UTF8
1863 BufferType tmp(str, size + 1);
1864 typename BufferType::CharType *buf = tmp;
1865#else
1866 wxStringBuffer tmp(str, size + 1);
de2589be 1867 wxChar *buf = tmp;
2523e9b7 1868#endif
2bb67b80 1869
ba9bbf13
WS
1870 if ( !buf )
1871 {
1872 // out of memory
a33c7045
VS
1873
1874 // in UTF-8 build, leaving uninitialized junk in the buffer
1875 // could result in invalid non-empty UTF-8 string, so just
1876 // reset the string to empty on failure:
1877 buf[0] = '\0';
ba9bbf13 1878 return -1;
e87b7833 1879 }
f6f5941b 1880
ba9bbf13
WS
1881 // wxVsnprintf() may modify the original arg pointer, so pass it
1882 // only a copy
1883 va_list argptrcopy;
1884 wxVaCopy(argptrcopy, argptr);
67612ff1
DE
1885
1886#ifndef __WXWINCE__
1887 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1888 errno = 0;
1889#endif
2523e9b7 1890 int len = wxVsnprintf(buf, size, format, argptrcopy);
ba9bbf13
WS
1891 va_end(argptrcopy);
1892
1893 // some implementations of vsnprintf() don't NUL terminate
1894 // the string if there is not enough space for it so
1895 // always do it manually
67612ff1
DE
1896 // FIXME: This really seems to be the wrong and would be an off-by-one
1897 // bug except the code above allocates an extra character.
ba9bbf13
WS
1898 buf[size] = _T('\0');
1899
caff62f2
VZ
1900 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1901 // total number of characters which would have been written if the
b1727cfe 1902 // buffer were large enough (newer standards such as Unix98)
de2589be
VZ
1903 if ( len < 0 )
1904 {
52de37c7
VS
1905 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1906 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1907 // is true if *both* of them use our own implementation,
1908 // otherwise we can't be sure
f2bbe5b6
VZ
1909#if wxUSE_WXVSNPRINTF
1910 // we know that our own implementation of wxVsnprintf() returns -1
1911 // only for a format error - thus there's something wrong with
1912 // the user's format string
a33c7045 1913 buf[0] = '\0';
f2bbe5b6 1914 return -1;
52de37c7
VS
1915#else // possibly using system version
1916 // assume it only returns error if there is not enough space, but
1917 // as we don't know how much we need, double the current size of
1918 // the buffer
67612ff1 1919#ifndef __WXWINCE__
a9a854d7
DE
1920 if( (errno == EILSEQ) || (errno == EINVAL) )
1921 // If errno was set to one of the two well-known hard errors
1922 // then fail immediately to avoid an infinite loop.
1923 return -1;
1924 else
1925#endif // __WXWINCE__
67612ff1
DE
1926 // still not enough, as we don't know how much we need, double the
1927 // current size of the buffer
1928 size *= 2;
f2bbe5b6 1929#endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
de2589be 1930 }
64f8f94c 1931 else if ( len >= size )
de2589be 1932 {
f2bbe5b6 1933#if wxUSE_WXVSNPRINTF
c95e653c 1934 // we know that our own implementation of wxVsnprintf() returns
f2bbe5b6
VZ
1935 // size+1 when there's not enough space but that's not the size
1936 // of the required buffer!
1937 size *= 2; // so we just double the current size of the buffer
1938#else
64f8f94c
VZ
1939 // some vsnprintf() implementations NUL-terminate the buffer and
1940 // some don't in len == size case, to be safe always add 1
67612ff1
DE
1941 // FIXME: I don't quite understand this comment. The vsnprintf
1942 // function is specifically defined to return the number of
1943 // characters printed not including the null terminator.
1944 // So OF COURSE you need to add 1 to get the right buffer size.
1945 // The following line is definitely correct, no question.
64f8f94c 1946 size = len + 1;
f2bbe5b6 1947#endif
de2589be
VZ
1948 }
1949 else // ok, there was enough space
f6f5941b 1950 {
f6f5941b
VZ
1951 break;
1952 }
f6f5941b
VZ
1953 }
1954
1955 // we could have overshot
2523e9b7
VS
1956 str.Shrink();
1957
1958 return str.length();
1959}
c801d85f 1960
2523e9b7
VS
1961int wxString::PrintfV(const wxString& format, va_list argptr)
1962{
2523e9b7
VS
1963#if wxUSE_UNICODE_UTF8
1964 #if wxUSE_STL_BASED_WXSTRING
1965 typedef wxStringTypeBuffer<char> Utf8Buffer;
1966 #else
6798451b 1967 typedef wxStringInternalBuffer Utf8Buffer;
2523e9b7
VS
1968 #endif
1969#endif
1970
1971#if wxUSE_UTF8_LOCALE_ONLY
c6255a6e 1972 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
1973#else
1974 #if wxUSE_UNICODE_UTF8
1975 if ( wxLocaleIsUtf8 )
c6255a6e 1976 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
1977 else
1978 // wxChar* version
c6255a6e 1979 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2523e9b7 1980 #else
c6255a6e 1981 return DoStringPrintfV(*this, format, argptr);
2523e9b7
VS
1982 #endif // UTF8/WCHAR
1983#endif
c801d85f
KB
1984}
1985
097c080b
VZ
1986// ----------------------------------------------------------------------------
1987// misc other operations
1988// ----------------------------------------------------------------------------
0c5d3e1c 1989
d775fa82 1990// returns true if the string matches the pattern which may contain '*' and
0c5d3e1c
VZ
1991// '?' metacharacters (as usual, '?' matches any character and '*' any number
1992// of them)
8a540c88 1993bool wxString::Matches(const wxString& mask) const
097c080b 1994{
d6044f58
VZ
1995 // I disable this code as it doesn't seem to be faster (in fact, it seems
1996 // to be much slower) than the old, hand-written code below and using it
1997 // here requires always linking with libregex even if the user code doesn't
1998 // use it
1999#if 0 // wxUSE_REGEX
706c2ac9
VZ
2000 // first translate the shell-like mask into a regex
2001 wxString pattern;
2002 pattern.reserve(wxStrlen(pszMask));
2003
2004 pattern += _T('^');
2005 while ( *pszMask )
2006 {
2007 switch ( *pszMask )
2008 {
2009 case _T('?'):
2010 pattern += _T('.');
2011 break;
2012
2013 case _T('*'):
2014 pattern += _T(".*");
2015 break;
2016
2017 case _T('^'):
2018 case _T('.'):
2019 case _T('$'):
2020 case _T('('):
2021 case _T(')'):
2022 case _T('|'):
2023 case _T('+'):
2024 case _T('\\'):
2025 // these characters are special in a RE, quote them
2026 // (however note that we don't quote '[' and ']' to allow
2027 // using them for Unix shell like matching)
2028 pattern += _T('\\');
2029 // fall through
2030
2031 default:
2032 pattern += *pszMask;
2033 }
2034
2035 pszMask++;
2036 }
2037 pattern += _T('$');
2038
2039 // and now use it
2040 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2041#else // !wxUSE_REGEX
9a4232dc
VZ
2042 // TODO: this is, of course, awfully inefficient...
2043
8a540c88
VS
2044 // FIXME-UTF8: implement using iterators, remove #if
2045#if wxUSE_UNICODE_UTF8
2046 wxWCharBuffer maskBuf = mask.wc_str();
2047 wxWCharBuffer txtBuf = wc_str();
2048 const wxChar *pszMask = maskBuf.data();
2049 const wxChar *pszTxt = txtBuf.data();
2050#else
2051 const wxChar *pszMask = mask.wx_str();
9a4232dc 2052 // the char currently being checked
8a540c88
VS
2053 const wxChar *pszTxt = wx_str();
2054#endif
9a4232dc
VZ
2055
2056 // the last location where '*' matched
2057 const wxChar *pszLastStarInText = NULL;
2058 const wxChar *pszLastStarInMask = NULL;
2059
2060match:
2061 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
097c080b 2062 switch ( *pszMask ) {
223d09f6
KB
2063 case wxT('?'):
2064 if ( *pszTxt == wxT('\0') )
d775fa82 2065 return false;
097c080b 2066
9a4232dc 2067 // pszTxt and pszMask will be incremented in the loop statement
0c5d3e1c 2068
097c080b
VZ
2069 break;
2070
223d09f6 2071 case wxT('*'):
097c080b 2072 {
9a4232dc
VZ
2073 // remember where we started to be able to backtrack later
2074 pszLastStarInText = pszTxt;
2075 pszLastStarInMask = pszMask;
2076
097c080b 2077 // ignore special chars immediately following this one
9a4232dc 2078 // (should this be an error?)
223d09f6 2079 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
097c080b
VZ
2080 pszMask++;
2081
2082 // if there is nothing more, match
223d09f6 2083 if ( *pszMask == wxT('\0') )
d775fa82 2084 return true;
097c080b
VZ
2085
2086 // are there any other metacharacters in the mask?
c86f1403 2087 size_t uiLenMask;
223d09f6 2088 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
097c080b
VZ
2089
2090 if ( pEndMask != NULL ) {
2091 // we have to match the string between two metachars
2092 uiLenMask = pEndMask - pszMask;
2093 }
2094 else {
2095 // we have to match the remainder of the string
2bb67b80 2096 uiLenMask = wxStrlen(pszMask);
097c080b
VZ
2097 }
2098
2099 wxString strToMatch(pszMask, uiLenMask);
2bb67b80 2100 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
097c080b 2101 if ( pMatch == NULL )
d775fa82 2102 return false;
097c080b
VZ
2103
2104 // -1 to compensate "++" in the loop
2105 pszTxt = pMatch + uiLenMask - 1;
2106 pszMask += uiLenMask - 1;
2107 }
2108 break;
2109
2110 default:
2111 if ( *pszMask != *pszTxt )
d775fa82 2112 return false;
097c080b
VZ
2113 break;
2114 }
2115 }
2116
2117 // match only if nothing left
9a4232dc 2118 if ( *pszTxt == wxT('\0') )
d775fa82 2119 return true;
9a4232dc
VZ
2120
2121 // if we failed to match, backtrack if we can
2122 if ( pszLastStarInText ) {
2123 pszTxt = pszLastStarInText + 1;
2124 pszMask = pszLastStarInMask;
2125
2126 pszLastStarInText = NULL;
2127
2128 // don't bother resetting pszLastStarInMask, it's unnecessary
2129
2130 goto match;
2131 }
2132
d775fa82 2133 return false;
706c2ac9 2134#endif // wxUSE_REGEX/!wxUSE_REGEX
097c080b
VZ
2135}
2136
1fc5dd6f 2137// Count the number of chars
c9f78968 2138int wxString::Freq(wxUniChar ch) const
1fc5dd6f
JS
2139{
2140 int count = 0;
8f93a29f 2141 for ( const_iterator i = begin(); i != end(); ++i )
1fc5dd6f 2142 {
8f93a29f 2143 if ( *i == ch )
1fc5dd6f
JS
2144 count ++;
2145 }
2146 return count;
2147}
4e79262f 2148