]> git.saurik.com Git - wxWidgets.git/blame - src/common/string.cpp
Fix assert during separator items creation introduced by r64226.
[wxWidgets.git] / src / common / string.cpp
CommitLineData
c801d85f 1/////////////////////////////////////////////////////////////////////////////
8898456d 2// Name: src/common/string.cpp
c801d85f 3// Purpose: wxString class
59059feb 4// Author: Vadim Zeitlin, Ryan Norton
c801d85f
KB
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
59059feb 9// (c) 2004 Ryan Norton <wxprojects@comcast.net>
65571936 10// Licence: wxWindows licence
c801d85f
KB
11/////////////////////////////////////////////////////////////////////////////
12
c801d85f
KB
13// ===========================================================================
14// headers, declarations, constants
15// ===========================================================================
16
17// For compilers that support precompilation, includes "wx.h".
18#include "wx/wxprec.h"
19
20#ifdef __BORLANDC__
8898456d 21 #pragma hdrstop
c801d85f
KB
22#endif
23
24#ifndef WX_PRECOMP
8898456d 25 #include "wx/string.h"
2523e9b7 26 #include "wx/wxcrtvararg.h"
ba7e7253 27 #include "wx/log.h"
6b769f3d 28#endif
c801d85f
KB
29
30#include <ctype.h>
92df97b8
WS
31
32#ifndef __WXWINCE__
33 #include <errno.h>
34#endif
35
c801d85f
KB
36#include <string.h>
37#include <stdlib.h>
9a08c20e 38
8116a0c5 39#include "wx/hashmap.h"
072682ce 40#include "wx/vector.h"
529e491c 41#include "wx/xlocale.h"
8f93a29f 42
825d69c1
VZ
43#ifdef __WXMSW__
44 #include "wx/msw/wrapwin.h"
45#endif // __WXMSW__
46
8f93a29f
VS
47// string handling functions used by wxString:
48#if wxUSE_UNICODE_UTF8
49 #define wxStringMemcpy memcpy
50 #define wxStringMemcmp memcmp
51 #define wxStringMemchr memchr
52 #define wxStringStrlen strlen
53#else
54 #define wxStringMemcpy wxTmemcpy
55 #define wxStringMemcmp wxTmemcmp
a7ea63e2
VS
56 #define wxStringMemchr wxTmemchr
57 #define wxStringStrlen wxStrlen
58#endif
8f93a29f 59
b96a56e6
VZ
60// define a function declared in wx/buffer.h here as we don't have buffer.cpp
61// and don't want to add it just because of this simple function
4e79262f
VZ
62namespace wxPrivate
63{
64
b96a56e6
VZ
65// wxXXXBuffer classes can be (implicitly) used during global statics
66// initialization so wrap the status UntypedBufferData variable in a function
67// to make it safe to access it even before all global statics are initialized
68UntypedBufferData *GetUntypedNullData()
69{
70 static UntypedBufferData s_untypedNullData(NULL, 0);
4e79262f 71
b96a56e6
VZ
72 return &s_untypedNullData;
73}
4e79262f
VZ
74
75} // namespace wxPrivate
e87b7833 76
a7ea63e2
VS
77// ---------------------------------------------------------------------------
78// static class variables definition
79// ---------------------------------------------------------------------------
e87b7833 80
a7ea63e2
VS
81//According to STL _must_ be a -1 size_t
82const size_t wxString::npos = (size_t) -1;
8f93a29f 83
68482dc5 84#if wxUSE_STRING_POS_CACHE
68482dc5 85
e810df36
VZ
86#ifdef wxHAS_COMPILER_TLS
87
88wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
89
90#else // !wxHAS_COMPILER_TLS
91
ad8ae788
VZ
92struct wxStrCacheInitializer
93{
94 wxStrCacheInitializer()
95 {
96 // calling this function triggers s_cache initialization in it, and
97 // from now on it becomes safe to call from multiple threads
98 wxString::GetCache();
99 }
100};
101
e317bd3f
SC
102/*
103wxString::Cache& wxString::GetCache()
104{
105 static wxTLS_TYPE(Cache) s_cache;
106
107 return wxTLS_VALUE(s_cache);
108}
109*/
110
ad8ae788
VZ
111static wxStrCacheInitializer gs_stringCacheInit;
112
e810df36
VZ
113#endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
114
68482dc5
VZ
115// gdb seems to be unable to display thread-local variables correctly, at least
116// not my 6.4.98 version under amd64, so provide this debugging helper to do it
4b6a582b 117#if wxDEBUG_LEVEL >= 2
68482dc5
VZ
118
119struct wxStrCacheDumper
120{
121 static void ShowAll()
122 {
123 puts("*** wxString cache dump:");
124 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
125 {
126 const wxString::Cache::Element&
8b73c531 127 c = wxString::GetCacheBegin()[n];
68482dc5
VZ
128
129 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
130 n,
8b73c531 131 n == wxString::LastUsedCacheElement() ? " [*]" : "",
68482dc5
VZ
132 c.str,
133 (unsigned long)c.pos,
134 (unsigned long)c.impl,
135 (long)c.len);
136 }
137 }
138};
139
140void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
141
4b6a582b 142#endif // wxDEBUG_LEVEL >= 2
68482dc5
VZ
143
144#ifdef wxPROFILE_STRING_CACHE
145
146wxString::CacheStats wxString::ms_cacheStats;
147
8c3b65d9 148struct wxStrCacheStatsDumper
68482dc5 149{
8c3b65d9 150 ~wxStrCacheStatsDumper()
68482dc5
VZ
151 {
152 const wxString::CacheStats& stats = wxString::ms_cacheStats;
153
154 if ( stats.postot )
155 {
156 puts("*** wxString cache statistics:");
157 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
158 stats.postot);
159 printf("\tHits %u (of which %u not used) or %.2f%%\n",
160 stats.poshits,
161 stats.mishits,
162 100.*float(stats.poshits - stats.mishits)/stats.postot);
163 printf("\tAverage position requested: %.2f\n",
164 float(stats.sumpos) / stats.postot);
165 printf("\tAverage offset after cached hint: %.2f\n",
166 float(stats.sumofs) / stats.postot);
167 }
168
169 if ( stats.lentot )
170 {
171 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
172 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
173 }
174 }
8c3b65d9 175};
68482dc5 176
8c3b65d9 177static wxStrCacheStatsDumper s_showCacheStats;
68482dc5
VZ
178
179#endif // wxPROFILE_STRING_CACHE
180
181#endif // wxUSE_STRING_POS_CACHE
182
a7ea63e2
VS
183// ----------------------------------------------------------------------------
184// global functions
185// ----------------------------------------------------------------------------
e87b7833 186
a7ea63e2 187#if wxUSE_STD_IOSTREAM
8f93a29f 188
a7ea63e2 189#include <iostream>
8f93a29f 190
a7ea63e2 191wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
8f93a29f 192{
7a906e1a 193#if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
de4983f3 194 const wxScopedCharBuffer buf(str.AsCharBuf());
ddf01bdb
VZ
195 if ( !buf )
196 os.clear(wxSTD ios_base::failbit);
197 else
198 os << buf.data();
199
200 return os;
a7ea63e2 201#else
7a906e1a 202 return os << str.AsInternal();
a7ea63e2 203#endif
8f93a29f
VS
204}
205
04abe4bc
VS
206wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
207{
208 return os << str.c_str();
209}
210
de4983f3 211wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str)
04abe4bc
VS
212{
213 return os << str.data();
214}
215
216#ifndef __BORLANDC__
de4983f3 217wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str)
04abe4bc
VS
218{
219 return os << str.data();
220}
221#endif
222
6a6ea041 223#if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594
VZ
224
225wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
226{
227 return wos << str.wc_str();
228}
229
230wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
231{
232 return wos << str.AsWChar();
233}
234
de4983f3 235wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str)
6b61b594
VZ
236{
237 return wos << str.data();
238}
239
6a6ea041 240#endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594 241
a7ea63e2 242#endif // wxUSE_STD_IOSTREAM
e87b7833 243
81727065
VS
244// ===========================================================================
245// wxString class core
246// ===========================================================================
247
248#if wxUSE_UNICODE_UTF8
249
81727065
VS
250void wxString::PosLenToImpl(size_t pos, size_t len,
251 size_t *implPos, size_t *implLen) const
252{
253 if ( pos == npos )
68482dc5 254 {
81727065 255 *implPos = npos;
68482dc5
VZ
256 }
257 else // have valid start position
81727065 258 {
68482dc5
VZ
259 const const_iterator b = GetIterForNthChar(pos);
260 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
81727065 261 if ( len == npos )
68482dc5 262 {
81727065 263 *implLen = npos;
68482dc5
VZ
264 }
265 else // have valid length too
81727065 266 {
68482dc5
VZ
267 // we need to handle the case of length specifying a substring
268 // going beyond the end of the string, just as std::string does
269 const const_iterator e(end());
270 const_iterator i(b);
271 while ( len && i <= e )
272 {
273 ++i;
274 --len;
275 }
276
277 *implLen = i.impl() - b.impl();
81727065
VS
278 }
279 }
280}
281
282#endif // wxUSE_UNICODE_UTF8
283
11aac4ba
VS
284// ----------------------------------------------------------------------------
285// wxCStrData converted strings caching
286// ----------------------------------------------------------------------------
287
132276cf
VS
288// FIXME-UTF8: temporarily disabled because it doesn't work with global
289// string objects; re-enable after fixing this bug and benchmarking
290// performance to see if using a hash is a good idea at all
291#if 0
292
11aac4ba
VS
293// For backward compatibility reasons, it must be possible to assign the value
294// returned by wxString::c_str() to a char* or wchar_t* variable and work with
295// it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
296// because the memory would be freed immediately, but it has to be valid as long
297// as the string is not modified, so that code like this still works:
298//
299// const wxChar *s = str.c_str();
300// while ( s ) { ... }
301
302// FIXME-UTF8: not thread safe!
303// FIXME-UTF8: we currently clear the cached conversion only when the string is
304// destroyed, but we should do it when the string is modified, to
305// keep memory usage down
306// FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
307// invalidated the cache on every change, we could keep the previous
308// conversion
309// FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
310// to use mb_str() or wc_str() instead of (const [w]char*)c_str()
311
312template<typename T>
313static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
314{
6c4ebcda 315 typename T::iterator i = hash.find(wxConstCast(s, wxString));
11aac4ba
VS
316 if ( i != hash.end() )
317 {
318 free(i->second);
319 hash.erase(i);
320 }
321}
322
323#if wxUSE_UNICODE
6c4ebcda
VS
324// NB: non-STL implementation doesn't compile with "const wxString*" key type,
325// so we have to use wxString* here and const-cast when used
11aac4ba
VS
326WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
327 wxStringCharConversionCache);
328static wxStringCharConversionCache gs_stringsCharCache;
329
330const char* wxCStrData::AsChar() const
331{
332 // remove previously cache value, if any (see FIXMEs above):
333 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
334
335 // convert the string and keep it:
6c4ebcda
VS
336 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
337 m_str->mb_str().release();
11aac4ba
VS
338
339 return s + m_offset;
340}
341#endif // wxUSE_UNICODE
342
343#if !wxUSE_UNICODE_WCHAR
344WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
345 wxStringWCharConversionCache);
346static wxStringWCharConversionCache gs_stringsWCharCache;
347
348const wchar_t* wxCStrData::AsWChar() const
349{
350 // remove previously cache value, if any (see FIXMEs above):
351 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
352
353 // convert the string and keep it:
6c4ebcda
VS
354 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
355 m_str->wc_str().release();
11aac4ba
VS
356
357 return s + m_offset;
358}
359#endif // !wxUSE_UNICODE_WCHAR
360
11aac4ba
VS
361wxString::~wxString()
362{
363#if wxUSE_UNICODE
364 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
365 DeleteStringFromConversionCache(gs_stringsCharCache, this);
366#endif
367#if !wxUSE_UNICODE_WCHAR
368 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
369#endif
370}
132276cf
VS
371#endif
372
132276cf
VS
373// ===========================================================================
374// wxString class core
375// ===========================================================================
376
377// ---------------------------------------------------------------------------
378// construction and conversion
379// ---------------------------------------------------------------------------
11aac4ba 380
81727065 381#if wxUSE_UNICODE_WCHAR
8f93a29f
VS
382/* static */
383wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
04abe4bc 384 const wxMBConv& conv)
8f93a29f
VS
385{
386 // anything to do?
387 if ( !psz || nLength == 0 )
de4983f3 388 return SubstrBufFromMB(wxWCharBuffer(L""), 0);
8f93a29f
VS
389
390 if ( nLength == npos )
391 nLength = wxNO_LEN;
392
393 size_t wcLen;
de4983f3 394 wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
8f93a29f 395 if ( !wcLen )
de4983f3 396 return SubstrBufFromMB(wxWCharBuffer(L""), 0);
8f93a29f
VS
397 else
398 return SubstrBufFromMB(wcBuf, wcLen);
399}
81727065
VS
400#endif // wxUSE_UNICODE_WCHAR
401
402#if wxUSE_UNICODE_UTF8
403/* static */
404wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
405 const wxMBConv& conv)
406{
81727065
VS
407 // anything to do?
408 if ( !psz || nLength == 0 )
de4983f3 409 return SubstrBufFromMB(wxCharBuffer(""), 0);
81727065 410
111d9948
VS
411 // if psz is already in UTF-8, we don't have to do the roundtrip to
412 // wchar_t* and back:
413 if ( conv.IsUTF8() )
414 {
415 // we need to validate the input because UTF8 iterators assume valid
416 // UTF-8 sequence and psz may be invalid:
417 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
418 {
9ef1ad0d
VZ
419 // we must pass the real string length to SubstrBufFromMB ctor
420 if ( nLength == npos )
421 nLength = psz ? strlen(psz) : 0;
38d26d60 422 return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz, nLength),
6df09f32 423 nLength);
111d9948
VS
424 }
425 // else: do the roundtrip through wchar_t*
426 }
427
81727065
VS
428 if ( nLength == npos )
429 nLength = wxNO_LEN;
430
431 // first convert to wide string:
432 size_t wcLen;
de4983f3 433 wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
81727065 434 if ( !wcLen )
de4983f3 435 return SubstrBufFromMB(wxCharBuffer(""), 0);
81727065
VS
436
437 // and then to UTF-8:
4fdfe2f3 438 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
81727065 439 // widechar -> UTF-8 conversion isn't supposed to ever fail:
9a83f860 440 wxASSERT_MSG( buf.data, wxT("conversion to UTF-8 failed") );
81727065
VS
441
442 return buf;
443}
444#endif // wxUSE_UNICODE_UTF8
445
446#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
447/* static */
448wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
04abe4bc 449 const wxMBConv& conv)
8f93a29f
VS
450{
451 // anything to do?
452 if ( !pwz || nLength == 0 )
de4983f3 453 return SubstrBufFromWC(wxCharBuffer(""), 0);
8f93a29f
VS
454
455 if ( nLength == npos )
456 nLength = wxNO_LEN;
457
458 size_t mbLen;
de4983f3 459 wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
8f93a29f 460 if ( !mbLen )
de4983f3 461 return SubstrBufFromWC(wxCharBuffer(""), 0);
8f93a29f
VS
462 else
463 return SubstrBufFromWC(mbBuf, mbLen);
464}
81727065 465#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f 466
f54cb154
VZ
467// This std::string::c_str()-like method returns a wide char pointer to string
468// contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return
469// a pointer to the internal representation. Otherwise a conversion is required
470// and it returns a temporary buffer.
471//
472// However for compatibility with c_str() and to avoid breaking existing code
473// doing
474//
475// for ( const wchar_t *p = s.wc_str(); *p; p++ )
476// ... use *p...
477//
478// we actually need to ensure that the returned buffer is _not_ temporary and
479// so we use wxString::m_convertedToWChar to store the returned data
480#if !wxUSE_UNICODE_WCHAR
8f93a29f 481
f54cb154 482const wchar_t *wxString::AsWChar(const wxMBConv& conv) const
265d5cce 483{
f54cb154
VZ
484 const char * const strMB = m_impl.c_str();
485 const size_t lenMB = m_impl.length();
486
487 // find out the size of the buffer needed
488 const size_t lenWC = conv.ToWChar(NULL, 0, strMB, lenMB);
489 if ( lenWC == wxCONV_FAILED )
490 return NULL;
491
492 // keep the same buffer if the string size didn't change: this is not only
493 // an optimization but also ensure that code which modifies string
494 // character by character (without changing its length) can continue to use
495 // the pointer returned by a previous wc_str() call even after changing the
496 // string
497
498 // TODO-UTF8: we could check for ">" instead of "!=" here as this would
499 // allow to save on buffer reallocations but at the cost of
500 // consuming (even) more memory, we should benchmark this to
501 // determine if it's worth doing
502 if ( !m_convertedToWChar.m_str || lenWC != m_convertedToWChar.m_len )
503 {
504 if ( !const_cast<wxString *>(this)->m_convertedToWChar.Extend(lenWC) )
505 return NULL;
506 }
e87b7833 507
f54cb154
VZ
508 // finally do convert
509 m_convertedToWChar.m_str[lenWC] = L'\0';
510 if ( conv.ToWChar(m_convertedToWChar.m_str, lenWC,
511 strMB, lenMB) == wxCONV_FAILED )
512 return NULL;
e87b7833 513
f54cb154 514 return m_convertedToWChar.m_str;
81727065
VS
515}
516
f54cb154
VZ
517#endif // !wxUSE_UNICODE_WCHAR
518
519
520// Same thing for mb_str() which returns a normal char pointer to string
521// contents: this always requires converting it to the specified encoding in
522// non-ANSI build except if we need to convert to UTF-8 and this is what we
523// already use internally.
524#if wxUSE_UNICODE
525
526const char *wxString::AsChar(const wxMBConv& conv) const
81727065 527{
f54cb154 528#if wxUSE_UNICODE_UTF8
111d9948 529 if ( conv.IsUTF8() )
f54cb154 530 return m_impl.c_str();
111d9948 531
f54cb154
VZ
532 const wchar_t * const strWC = AsWChar(wxMBConvStrictUTF8());
533 const size_t lenWC = m_convertedToWChar.m_len;
534#else // wxUSE_UNICODE_WCHAR
535 const wchar_t * const strWC = m_impl.c_str();
536 const size_t lenWC = m_impl.length();
537#endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR
81727065 538
f54cb154
VZ
539 const size_t lenMB = conv.FromWChar(NULL, 0, strWC, lenWC);
540 if ( lenMB == wxCONV_FAILED )
541 return NULL;
542
543 if ( !m_convertedToChar.m_str || lenMB != m_convertedToChar.m_len )
544 {
545 if ( !const_cast<wxString *>(this)->m_convertedToChar.Extend(lenMB) )
546 return NULL;
547 }
81727065 548
f54cb154
VZ
549 m_convertedToChar.m_str[lenMB] = '\0';
550 if ( conv.FromWChar(m_convertedToChar.m_str, lenMB,
551 strWC, lenWC) == wxCONV_FAILED )
552 return NULL;
eec47cc6 553
f54cb154 554 return m_convertedToChar.m_str;
265d5cce 555}
7663d0d4 556
f54cb154 557#endif // wxUSE_UNICODE
e87b7833
MB
558
559// shrink to minimal size (releasing extra memory)
560bool wxString::Shrink()
561{
562 wxString tmp(begin(), end());
563 swap(tmp);
564 return tmp.length() == length();
565}
566
d8a4b666 567// deprecated compatibility code:
a7ea63e2 568#if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
c87a0bc8 569wxStringCharType *wxString::GetWriteBuf(size_t nLen)
d8a4b666
VS
570{
571 return DoGetWriteBuf(nLen);
572}
573
574void wxString::UngetWriteBuf()
575{
576 DoUngetWriteBuf();
577}
578
579void wxString::UngetWriteBuf(size_t nLen)
580{
581 DoUngetWriteBuf(nLen);
582}
a7ea63e2 583#endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
e87b7833 584
d8a4b666 585
e87b7833
MB
586// ---------------------------------------------------------------------------
587// data access
588// ---------------------------------------------------------------------------
589
590// all functions are inline in string.h
591
592// ---------------------------------------------------------------------------
e8f59039 593// concatenation operators
e87b7833
MB
594// ---------------------------------------------------------------------------
595
c801d85f 596/*
c801d85f
KB
597 * concatenation functions come in 5 flavours:
598 * string + string
599 * char + string and string + char
600 * C str + string and string + C str
601 */
602
b1801e0e 603wxString operator+(const wxString& str1, const wxString& str2)
c801d85f 604{
992527a5 605#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
606 wxASSERT( str1.IsValid() );
607 wxASSERT( str2.IsValid() );
e87b7833 608#endif
097c080b 609
3458e408
WS
610 wxString s = str1;
611 s += str2;
3168a13f 612
3458e408 613 return s;
c801d85f
KB
614}
615
c9f78968 616wxString operator+(const wxString& str, wxUniChar ch)
c801d85f 617{
992527a5 618#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 619 wxASSERT( str.IsValid() );
e87b7833 620#endif
3168a13f 621
3458e408
WS
622 wxString s = str;
623 s += ch;
097c080b 624
3458e408 625 return s;
c801d85f
KB
626}
627
c9f78968 628wxString operator+(wxUniChar ch, const wxString& str)
c801d85f 629{
992527a5 630#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 631 wxASSERT( str.IsValid() );
e87b7833 632#endif
097c080b 633
3458e408
WS
634 wxString s = ch;
635 s += str;
3168a13f 636
3458e408 637 return s;
c801d85f
KB
638}
639
8f93a29f 640wxString operator+(const wxString& str, const char *psz)
c801d85f 641{
992527a5 642#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 643 wxASSERT( str.IsValid() );
e87b7833 644#endif
097c080b 645
3458e408 646 wxString s;
8f93a29f 647 if ( !s.Alloc(strlen(psz) + str.length()) ) {
9a83f860 648 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
3458e408
WS
649 }
650 s += str;
651 s += psz;
3168a13f 652
3458e408 653 return s;
c801d85f
KB
654}
655
8f93a29f 656wxString operator+(const wxString& str, const wchar_t *pwz)
c801d85f 657{
992527a5 658#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
659 wxASSERT( str.IsValid() );
660#endif
661
662 wxString s;
663 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
9a83f860 664 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
8f93a29f
VS
665 }
666 s += str;
667 s += pwz;
668
669 return s;
670}
671
672wxString operator+(const char *psz, const wxString& str)
673{
a7ea63e2
VS
674#if !wxUSE_STL_BASED_WXSTRING
675 wxASSERT( str.IsValid() );
676#endif
677
678 wxString s;
679 if ( !s.Alloc(strlen(psz) + str.length()) ) {
9a83f860 680 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
a7ea63e2
VS
681 }
682 s = psz;
683 s += str;
684
685 return s;
686}
687
688wxString operator+(const wchar_t *pwz, const wxString& str)
689{
690#if !wxUSE_STL_BASED_WXSTRING
691 wxASSERT( str.IsValid() );
692#endif
693
694 wxString s;
695 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
9a83f860 696 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
a7ea63e2
VS
697 }
698 s = pwz;
699 s += str;
700
701 return s;
702}
703
704// ---------------------------------------------------------------------------
705// string comparison
706// ---------------------------------------------------------------------------
707
52de37c7
VS
708bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
709{
710 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
711 : wxToupper(GetChar(0u)) == wxToupper(c));
712}
713
a7ea63e2
VS
714#ifdef HAVE_STD_STRING_COMPARE
715
716// NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
717// UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
718// sort strings in characters code point order by sorting the byte sequence
719// in byte values order (i.e. what strcmp() and memcmp() do).
720
721int wxString::compare(const wxString& str) const
722{
723 return m_impl.compare(str.m_impl);
724}
725
726int wxString::compare(size_t nStart, size_t nLen,
727 const wxString& str) const
728{
729 size_t pos, len;
730 PosLenToImpl(nStart, nLen, &pos, &len);
731 return m_impl.compare(pos, len, str.m_impl);
732}
733
734int wxString::compare(size_t nStart, size_t nLen,
735 const wxString& str,
736 size_t nStart2, size_t nLen2) const
737{
738 size_t pos, len;
739 PosLenToImpl(nStart, nLen, &pos, &len);
740
741 size_t pos2, len2;
742 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
743
744 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
745}
746
747int wxString::compare(const char* sz) const
748{
749 return m_impl.compare(ImplStr(sz));
750}
751
752int wxString::compare(const wchar_t* sz) const
753{
754 return m_impl.compare(ImplStr(sz));
755}
756
757int wxString::compare(size_t nStart, size_t nLen,
758 const char* sz, size_t nCount) const
759{
760 size_t pos, len;
761 PosLenToImpl(nStart, nLen, &pos, &len);
762
763 SubstrBufFromMB str(ImplStr(sz, nCount));
764
765 return m_impl.compare(pos, len, str.data, str.len);
766}
767
768int wxString::compare(size_t nStart, size_t nLen,
769 const wchar_t* sz, size_t nCount) const
770{
771 size_t pos, len;
772 PosLenToImpl(nStart, nLen, &pos, &len);
773
774 SubstrBufFromWC str(ImplStr(sz, nCount));
775
776 return m_impl.compare(pos, len, str.data, str.len);
777}
778
779#else // !HAVE_STD_STRING_COMPARE
780
781static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
782 const wxStringCharType* s2, size_t l2)
783{
784 if( l1 == l2 )
785 return wxStringMemcmp(s1, s2, l1);
786 else if( l1 < l2 )
787 {
788 int ret = wxStringMemcmp(s1, s2, l1);
789 return ret == 0 ? -1 : ret;
790 }
791 else
792 {
793 int ret = wxStringMemcmp(s1, s2, l2);
794 return ret == 0 ? +1 : ret;
795 }
796}
797
798int wxString::compare(const wxString& str) const
799{
800 return ::wxDoCmp(m_impl.data(), m_impl.length(),
801 str.m_impl.data(), str.m_impl.length());
802}
803
804int wxString::compare(size_t nStart, size_t nLen,
805 const wxString& str) const
806{
807 wxASSERT(nStart <= length());
808 size_type strLen = length() - nStart;
809 nLen = strLen < nLen ? strLen : nLen;
810
811 size_t pos, len;
812 PosLenToImpl(nStart, nLen, &pos, &len);
813
814 return ::wxDoCmp(m_impl.data() + pos, len,
815 str.m_impl.data(), str.m_impl.length());
816}
817
818int wxString::compare(size_t nStart, size_t nLen,
819 const wxString& str,
820 size_t nStart2, size_t nLen2) const
821{
822 wxASSERT(nStart <= length());
823 wxASSERT(nStart2 <= str.length());
824 size_type strLen = length() - nStart,
825 strLen2 = str.length() - nStart2;
826 nLen = strLen < nLen ? strLen : nLen;
827 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
828
829 size_t pos, len;
830 PosLenToImpl(nStart, nLen, &pos, &len);
831 size_t pos2, len2;
832 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
833
834 return ::wxDoCmp(m_impl.data() + pos, len,
835 str.m_impl.data() + pos2, len2);
836}
837
838int wxString::compare(const char* sz) const
839{
840 SubstrBufFromMB str(ImplStr(sz, npos));
841 if ( str.len == npos )
842 str.len = wxStringStrlen(str.data);
843 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
844}
845
846int wxString::compare(const wchar_t* sz) const
847{
848 SubstrBufFromWC str(ImplStr(sz, npos));
849 if ( str.len == npos )
850 str.len = wxStringStrlen(str.data);
851 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
852}
853
854int wxString::compare(size_t nStart, size_t nLen,
855 const char* sz, size_t nCount) const
856{
857 wxASSERT(nStart <= length());
858 size_type strLen = length() - nStart;
859 nLen = strLen < nLen ? strLen : nLen;
097c080b 860
a7ea63e2
VS
861 size_t pos, len;
862 PosLenToImpl(nStart, nLen, &pos, &len);
3168a13f 863
a7ea63e2
VS
864 SubstrBufFromMB str(ImplStr(sz, nCount));
865 if ( str.len == npos )
866 str.len = wxStringStrlen(str.data);
867
868 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
c801d85f
KB
869}
870
a7ea63e2
VS
871int wxString::compare(size_t nStart, size_t nLen,
872 const wchar_t* sz, size_t nCount) const
8f93a29f 873{
a7ea63e2
VS
874 wxASSERT(nStart <= length());
875 size_type strLen = length() - nStart;
876 nLen = strLen < nLen ? strLen : nLen;
8f93a29f 877
a7ea63e2
VS
878 size_t pos, len;
879 PosLenToImpl(nStart, nLen, &pos, &len);
8f93a29f 880
a7ea63e2
VS
881 SubstrBufFromWC str(ImplStr(sz, nCount));
882 if ( str.len == npos )
883 str.len = wxStringStrlen(str.data);
884
885 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
8f93a29f
VS
886}
887
a7ea63e2
VS
888#endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
889
890
8f93a29f
VS
891// ---------------------------------------------------------------------------
892// find_{first,last}_[not]_of functions
893// ---------------------------------------------------------------------------
894
895#if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
c801d85f 896
8f93a29f
VS
897// NB: All these functions are implemented with the argument being wxChar*,
898// i.e. widechar string in any Unicode build, even though native string
899// representation is char* in the UTF-8 build. This is because we couldn't
900// use memchr() to determine if a character is in a set encoded as UTF-8.
901
902size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
dcb68102 903{
8f93a29f 904 return find_first_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
905}
906
8f93a29f 907size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
dcb68102 908{
8f93a29f 909 return find_first_not_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
910}
911
8f93a29f 912size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 913{
9a83f860 914 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
dcb68102 915
8f93a29f
VS
916 size_t idx = nStart;
917 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 918 {
8f93a29f
VS
919 if ( wxTmemchr(sz, *i, n) )
920 return idx;
dcb68102 921 }
8f93a29f
VS
922
923 return npos;
924}
925
926size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
927{
9a83f860 928 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
8f93a29f
VS
929
930 size_t idx = nStart;
931 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 932 {
8f93a29f
VS
933 if ( !wxTmemchr(sz, *i, n) )
934 return idx;
935 }
936
937 return npos;
938}
939
940
941size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
942{
943 return find_last_of(sz, nStart, wxStrlen(sz));
944}
945
946size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
947{
948 return find_last_not_of(sz, nStart, wxStrlen(sz));
949}
950
951size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
952{
953 size_t len = length();
954
955 if ( nStart == npos )
956 {
957 nStart = len - 1;
dcb68102 958 }
2c09fb3b 959 else
dcb68102 960 {
9a83f860 961 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
dcb68102 962 }
8f93a29f
VS
963
964 size_t idx = nStart;
965 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
966 i != rend(); --idx, ++i )
967 {
968 if ( wxTmemchr(sz, *i, n) )
969 return idx;
970 }
971
972 return npos;
dcb68102
RN
973}
974
8f93a29f 975size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 976{
8f93a29f
VS
977 size_t len = length();
978
979 if ( nStart == npos )
980 {
981 nStart = len - 1;
982 }
983 else
984 {
9a83f860 985 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
8f93a29f
VS
986 }
987
988 size_t idx = nStart;
989 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
990 i != rend(); --idx, ++i )
991 {
992 if ( !wxTmemchr(sz, *i, n) )
993 return idx;
994 }
995
996 return npos;
dcb68102
RN
997}
998
8f93a29f 999size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
dcb68102 1000{
9a83f860 1001 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
8f93a29f
VS
1002
1003 size_t idx = nStart;
1004 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1005 {
1006 if ( *i != ch )
1007 return idx;
1008 }
1009
1010 return npos;
1011}
1012
1013size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1014{
1015 size_t len = length();
1016
1017 if ( nStart == npos )
1018 {
1019 nStart = len - 1;
1020 }
1021 else
1022 {
9a83f860 1023 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
8f93a29f
VS
1024 }
1025
1026 size_t idx = nStart;
1027 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1028 i != rend(); --idx, ++i )
1029 {
1030 if ( *i != ch )
1031 return idx;
1032 }
1033
1034 return npos;
1035}
1036
1037// the functions above were implemented for wchar_t* arguments in Unicode
1038// build and char* in ANSI build; below are implementations for the other
1039// version:
1040#if wxUSE_UNICODE
1041 #define wxOtherCharType char
1042 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1043#else
1044 #define wxOtherCharType wchar_t
1045 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1046#endif
1047
1048size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1049 { return find_first_of(STRCONV(sz), nStart); }
1050
1051size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1052 size_t n) const
1053 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1054size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1055 { return find_last_of(STRCONV(sz), nStart); }
1056size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1057 size_t n) const
1058 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1059size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1060 { return find_first_not_of(STRCONV(sz), nStart); }
1061size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1062 size_t n) const
1063 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1064size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1065 { return find_last_not_of(STRCONV(sz), nStart); }
1066size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1067 size_t n) const
1068 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1069
1070#undef wxOtherCharType
1071#undef STRCONV
1072
1073#endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1074
1075// ===========================================================================
1076// other common string functions
1077// ===========================================================================
1078
1079int wxString::CmpNoCase(const wxString& s) const
1080{
825d69c1 1081#if defined(__WXMSW__) && !wxUSE_UNICODE_UTF8
1935acd7
VZ
1082 // Prefer to use CompareString() if available as it's more efficient than
1083 // doing it manually or even using wxStricmp() (see #10375)
1084 //
1085 // Also note that not using NORM_STRINGSORT may result in not having a
1086 // strict weak ordering (e.g. s1 < s2 and s2 < s3 but s3 < s1) and so break
1087 // algorithms such as std::sort that rely on it. It's also more consistent
1088 // with the fall back version below.
1089 switch ( ::CompareString(LOCALE_USER_DEFAULT,
1090 NORM_IGNORECASE | SORT_STRINGSORT,
825d69c1
VZ
1091 m_impl.c_str(), m_impl.length(),
1092 s.m_impl.c_str(), s.m_impl.length()) )
1093 {
1094 case CSTR_LESS_THAN:
1095 return -1;
1096
1097 case CSTR_EQUAL:
1098 return 0;
1099
1100 case CSTR_GREATER_THAN:
1101 return 1;
8f93a29f 1102
825d69c1
VZ
1103 default:
1104 wxFAIL_MSG( "unexpected CompareString() return value" );
1105 // fall through
1106
1107 case 0:
1108 wxLogLastError("CompareString");
1109 // use generic code below
1110 }
1111#endif // __WXMSW__ && !wxUSE_UNICODE_UTF8
1112
1113 // do the comparison manually: notice that we can't use wxStricmp() as it
1114 // doesn't handle embedded NULs
1115
1116 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
8f93a29f
VS
1117 const_iterator i1 = begin();
1118 const_iterator end1 = end();
1119 const_iterator i2 = s.begin();
1120 const_iterator end2 = s.end();
1121
0d8b0f94 1122 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
8f93a29f
VS
1123 {
1124 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1125 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1126 if ( lower1 != lower2 )
1127 return lower1 < lower2 ? -1 : 1;
1128 }
1129
1130 size_t len1 = length();
1131 size_t len2 = s.length();
dcb68102 1132
8f93a29f
VS
1133 if ( len1 < len2 )
1134 return -1;
1135 else if ( len1 > len2 )
1136 return 1;
1137 return 0;
dcb68102
RN
1138}
1139
1140
b1ac3b56 1141#if wxUSE_UNICODE
e015c2a3 1142
cf6bedce
SC
1143#ifdef __MWERKS__
1144#ifndef __SCHAR_MAX__
1145#define __SCHAR_MAX__ 127
1146#endif
1147#endif
1148
e6310bbc 1149wxString wxString::FromAscii(const char *ascii, size_t len)
b1ac3b56 1150{
e6310bbc 1151 if (!ascii || len == 0)
b1ac3b56 1152 return wxEmptyString;
e015c2a3 1153
b1ac3b56 1154 wxString res;
e015c2a3 1155
e6310bbc 1156 {
6798451b 1157 wxStringInternalBuffer buf(res, len);
602a857b 1158 wxStringCharType *dest = buf;
c1eada83 1159
602a857b
VS
1160 for ( ; len > 0; --len )
1161 {
1162 unsigned char c = (unsigned char)*ascii++;
1163 wxASSERT_MSG( c < 0x80,
9a83f860 1164 wxT("Non-ASCII value passed to FromAscii().") );
c1eada83 1165
602a857b
VS
1166 *dest++ = (wchar_t)c;
1167 }
e015c2a3
VZ
1168 }
1169
b1ac3b56
RR
1170 return res;
1171}
1172
e6310bbc
VS
1173wxString wxString::FromAscii(const char *ascii)
1174{
0081dd72 1175 return FromAscii(ascii, wxStrlen(ascii));
e6310bbc
VS
1176}
1177
c5288c5c 1178wxString wxString::FromAscii(char ascii)
2b5f62a0
VZ
1179{
1180 // What do we do with '\0' ?
1181
c1eada83 1182 unsigned char c = (unsigned char)ascii;
8760bc65 1183
9a83f860 1184 wxASSERT_MSG( c < 0x80, wxT("Non-ASCII value passed to FromAscii().") );
c1eada83
VS
1185
1186 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1187 return wxString(wxUniChar((wchar_t)c));
2b5f62a0
VZ
1188}
1189
de4983f3 1190const wxScopedCharBuffer wxString::ToAscii() const
b1ac3b56 1191{
e015c2a3
VZ
1192 // this will allocate enough space for the terminating NUL too
1193 wxCharBuffer buffer(length());
6e394fc6 1194 char *dest = buffer.data();
e015c2a3 1195
c1eada83 1196 for ( const_iterator i = begin(); i != end(); ++i )
b1ac3b56 1197 {
c1eada83
VS
1198 wxUniChar c(*i);
1199 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1200 *dest++ = c.IsAscii() ? (char)c : '_';
e015c2a3
VZ
1201
1202 // the output string can't have embedded NULs anyhow, so we can safely
1203 // stop at first of them even if we do have any
c1eada83 1204 if ( !c )
e015c2a3 1205 break;
b1ac3b56 1206 }
e015c2a3 1207
b1ac3b56
RR
1208 return buffer;
1209}
e015c2a3 1210
c1eada83 1211#endif // wxUSE_UNICODE
b1ac3b56 1212
c801d85f 1213// extract string of length nCount starting at nFirst
c801d85f
KB
1214wxString wxString::Mid(size_t nFirst, size_t nCount) const
1215{
73f507f5 1216 size_t nLen = length();
30d9011f 1217
73f507f5
WS
1218 // default value of nCount is npos and means "till the end"
1219 if ( nCount == npos )
1220 {
1221 nCount = nLen - nFirst;
1222 }
30d9011f 1223
73f507f5
WS
1224 // out-of-bounds requests return sensible things
1225 if ( nFirst + nCount > nLen )
1226 {
1227 nCount = nLen - nFirst;
1228 }
c801d85f 1229
73f507f5
WS
1230 if ( nFirst > nLen )
1231 {
1232 // AllocCopy() will return empty string
1233 return wxEmptyString;
1234 }
c801d85f 1235
73f507f5
WS
1236 wxString dest(*this, nFirst, nCount);
1237 if ( dest.length() != nCount )
1238 {
9a83f860 1239 wxFAIL_MSG( wxT("out of memory in wxString::Mid") );
73f507f5 1240 }
30d9011f 1241
73f507f5 1242 return dest;
c801d85f
KB
1243}
1244
e87b7833 1245// check that the string starts with prefix and return the rest of the string
d775fa82 1246// in the provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1247bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
f6bcfd97 1248{
c5e7a7d7
VS
1249 if ( compare(0, prefix.length(), prefix) != 0 )
1250 return false;
f6bcfd97
BP
1251
1252 if ( rest )
1253 {
1254 // put the rest of the string into provided pointer
c5e7a7d7 1255 rest->assign(*this, prefix.length(), npos);
f6bcfd97
BP
1256 }
1257
d775fa82 1258 return true;
f6bcfd97
BP
1259}
1260
3affcd07
VZ
1261
1262// check that the string ends with suffix and return the rest of it in the
1263// provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1264bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
3affcd07 1265{
c5e7a7d7 1266 int start = length() - suffix.length();
81727065
VS
1267
1268 if ( start < 0 || compare(start, npos, suffix) != 0 )
3affcd07
VZ
1269 return false;
1270
1271 if ( rest )
1272 {
1273 // put the rest of the string into provided pointer
1274 rest->assign(*this, 0, start);
1275 }
1276
1277 return true;
1278}
1279
1280
c801d85f
KB
1281// extract nCount last (rightmost) characters
1282wxString wxString::Right(size_t nCount) const
1283{
e87b7833
MB
1284 if ( nCount > length() )
1285 nCount = length();
c801d85f 1286
e87b7833
MB
1287 wxString dest(*this, length() - nCount, nCount);
1288 if ( dest.length() != nCount ) {
9a83f860 1289 wxFAIL_MSG( wxT("out of memory in wxString::Right") );
b1801e0e 1290 }
c801d85f
KB
1291 return dest;
1292}
1293
7929902d 1294// get all characters after the last occurrence of ch
c801d85f 1295// (returns the whole string if ch not found)
c9f78968 1296wxString wxString::AfterLast(wxUniChar ch) const
c801d85f
KB
1297{
1298 wxString str;
d775fa82 1299 int iPos = Find(ch, true);
3c67202d 1300 if ( iPos == wxNOT_FOUND )
c801d85f
KB
1301 str = *this;
1302 else
c565abe1 1303 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1304
1305 return str;
1306}
1307
1308// extract nCount first (leftmost) characters
1309wxString wxString::Left(size_t nCount) const
1310{
e87b7833
MB
1311 if ( nCount > length() )
1312 nCount = length();
c801d85f 1313
e87b7833
MB
1314 wxString dest(*this, 0, nCount);
1315 if ( dest.length() != nCount ) {
9a83f860 1316 wxFAIL_MSG( wxT("out of memory in wxString::Left") );
b1801e0e 1317 }
c801d85f
KB
1318 return dest;
1319}
1320
7929902d 1321// get all characters before the first occurrence of ch
c801d85f 1322// (returns the whole string if ch not found)
c9f78968 1323wxString wxString::BeforeFirst(wxUniChar ch) const
c801d85f 1324{
e87b7833 1325 int iPos = Find(ch);
c565abe1
VZ
1326 if ( iPos == wxNOT_FOUND )
1327 iPos = length();
e87b7833 1328 return wxString(*this, 0, iPos);
c801d85f
KB
1329}
1330
7929902d 1331/// get all characters before the last occurrence of ch
c801d85f 1332/// (returns empty string if ch not found)
c9f78968 1333wxString wxString::BeforeLast(wxUniChar ch) const
c801d85f
KB
1334{
1335 wxString str;
d775fa82 1336 int iPos = Find(ch, true);
3c67202d 1337 if ( iPos != wxNOT_FOUND && iPos != 0 )
d1c9bbf6 1338 str = wxString(c_str(), iPos);
c801d85f
KB
1339
1340 return str;
1341}
1342
7929902d 1343/// get all characters after the first occurrence of ch
c801d85f 1344/// (returns empty string if ch not found)
c9f78968 1345wxString wxString::AfterFirst(wxUniChar ch) const
c801d85f
KB
1346{
1347 wxString str;
1348 int iPos = Find(ch);
3c67202d 1349 if ( iPos != wxNOT_FOUND )
c565abe1 1350 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1351
1352 return str;
1353}
1354
7929902d 1355// replace first (or all) occurrences of some substring with another one
8a540c88
VS
1356size_t wxString::Replace(const wxString& strOld,
1357 const wxString& strNew, bool bReplaceAll)
c801d85f 1358{
a8f1f1b2 1359 // if we tried to replace an empty string we'd enter an infinite loop below
8a540c88 1360 wxCHECK_MSG( !strOld.empty(), 0,
9a83f860 1361 wxT("wxString::Replace(): invalid parameter") );
a8f1f1b2 1362
68482dc5
VZ
1363 wxSTRING_INVALIDATE_CACHE();
1364
510bb748 1365 size_t uiCount = 0; // count of replacements made
c801d85f 1366
8a627032
VZ
1367 // optimize the special common case: replacement of one character by
1368 // another one (in UTF-8 case we can only do this for ASCII characters)
1369 //
1370 // benchmarks show that this special version is around 3 times faster
1371 // (depending on the proportion of matching characters and UTF-8/wchar_t
1372 // build)
1373 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1374 {
1375 const wxStringCharType chOld = strOld.m_impl[0],
1376 chNew = strNew.m_impl[0];
1377
1378 // this loop is the simplified version of the one below
1379 for ( size_t pos = 0; ; )
1380 {
1381 pos = m_impl.find(chOld, pos);
1382 if ( pos == npos )
1383 break;
c801d85f 1384
8a627032
VZ
1385 m_impl[pos++] = chNew;
1386
1387 uiCount++;
1388
1389 if ( !bReplaceAll )
1390 break;
1391 }
1392 }
072682ce
VZ
1393 else if ( !bReplaceAll)
1394 {
1395 size_t pos = m_impl.find(strOld, 0);
1396 if ( pos != npos )
1397 {
1398 m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl);
1399 uiCount = 1;
1400 }
1401 }
1402 else // replace all occurrences
510bb748 1403 {
8a627032
VZ
1404 const size_t uiOldLen = strOld.m_impl.length();
1405 const size_t uiNewLen = strNew.m_impl.length();
1406
072682ce
VZ
1407 // first scan the string to find all positions at which the replacement
1408 // should be made
1409 wxVector<size_t> replacePositions;
1410
1411 size_t pos;
1412 for ( pos = m_impl.find(strOld.m_impl, 0);
1413 pos != npos;
1414 pos = m_impl.find(strOld.m_impl, pos + uiOldLen))
8a627032 1415 {
072682ce
VZ
1416 replacePositions.push_back(pos);
1417 ++uiCount;
1418 }
510bb748 1419
072682ce
VZ
1420 if ( !uiCount )
1421 return 0;
510bb748 1422
072682ce
VZ
1423 // allocate enough memory for the whole new string
1424 wxString tmp;
1425 tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen));
ad5bb7d6 1426
072682ce
VZ
1427 // copy this string to tmp doing replacements on the fly
1428 size_t replNum = 0;
1429 for ( pos = 0; replNum < uiCount; replNum++ )
1430 {
1431 const size_t nextReplPos = replacePositions[replNum];
394b2900 1432
072682ce
VZ
1433 if ( pos != nextReplPos )
1434 {
1435 tmp.m_impl.append(m_impl, pos, nextReplPos - pos);
1436 }
1437
1438 tmp.m_impl.append(strNew.m_impl);
1439 pos = nextReplPos + uiOldLen;
8a627032 1440 }
072682ce
VZ
1441
1442 if ( pos != m_impl.length() )
1443 {
1444 // append the rest of the string unchanged
1445 tmp.m_impl.append(m_impl, pos, m_impl.length() - pos);
1446 }
1447
1448 swap(tmp);
c801d85f 1449 }
c801d85f 1450
510bb748 1451 return uiCount;
c801d85f
KB
1452}
1453
1454bool wxString::IsAscii() const
1455{
a4a44612
VS
1456 for ( const_iterator i = begin(); i != end(); ++i )
1457 {
1458 if ( !(*i).IsAscii() )
1459 return false;
1460 }
1461
1462 return true;
c801d85f 1463}
dd1eaa89 1464
c801d85f
KB
1465bool wxString::IsWord() const
1466{
a4a44612
VS
1467 for ( const_iterator i = begin(); i != end(); ++i )
1468 {
1469 if ( !wxIsalpha(*i) )
1470 return false;
1471 }
1472
1473 return true;
c801d85f 1474}
dd1eaa89 1475
c801d85f
KB
1476bool wxString::IsNumber() const
1477{
a4a44612
VS
1478 if ( empty() )
1479 return true;
1480
1481 const_iterator i = begin();
1482
9a83f860 1483 if ( *i == wxT('-') || *i == wxT('+') )
a4a44612
VS
1484 ++i;
1485
1486 for ( ; i != end(); ++i )
1487 {
1488 if ( !wxIsdigit(*i) )
1489 return false;
1490 }
1491
1492 return true;
c801d85f
KB
1493}
1494
c801d85f
KB
1495wxString wxString::Strip(stripType w) const
1496{
1497 wxString s = *this;
d775fa82
WS
1498 if ( w & leading ) s.Trim(false);
1499 if ( w & trailing ) s.Trim(true);
c801d85f
KB
1500 return s;
1501}
1502
c801d85f
KB
1503// ---------------------------------------------------------------------------
1504// case conversion
1505// ---------------------------------------------------------------------------
1506
1507wxString& wxString::MakeUpper()
1508{
e87b7833
MB
1509 for ( iterator it = begin(), en = end(); it != en; ++it )
1510 *it = (wxChar)wxToupper(*it);
c801d85f
KB
1511
1512 return *this;
1513}
1514
1515wxString& wxString::MakeLower()
1516{
e87b7833
MB
1517 for ( iterator it = begin(), en = end(); it != en; ++it )
1518 *it = (wxChar)wxTolower(*it);
c801d85f
KB
1519
1520 return *this;
1521}
1522
0c7db140
VZ
1523wxString& wxString::MakeCapitalized()
1524{
1525 const iterator en = end();
1526 iterator it = begin();
1527 if ( it != en )
1528 {
1529 *it = (wxChar)wxToupper(*it);
1530 for ( ++it; it != en; ++it )
1531 *it = (wxChar)wxTolower(*it);
1532 }
1533
1534 return *this;
1535}
1536
c801d85f
KB
1537// ---------------------------------------------------------------------------
1538// trimming and padding
1539// ---------------------------------------------------------------------------
1540
d775fa82 1541// some compilers (VC++ 6.0 not to name them) return true for a call to
9d55bfef 1542// isspace('\xEA') in the C locale which seems to be broken to me, but we have
c95e653c 1543// to live with this by checking that the character is a 7 bit one - even if
9d55bfef 1544// this may fail to detect some spaces (I don't know if Unicode doesn't have
576c608d
VZ
1545// space-like symbols somewhere except in the first 128 chars), it is arguably
1546// still better than trimming away accented letters
1547inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1548
c801d85f
KB
1549// trims spaces (in the sense of isspace) from left or right side
1550wxString& wxString::Trim(bool bFromRight)
1551{
3458e408
WS
1552 // first check if we're going to modify the string at all
1553 if ( !empty() &&
1554 (
1555 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1556 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1557 )
2c3b684c 1558 )
2c3b684c 1559 {
3458e408
WS
1560 if ( bFromRight )
1561 {
1562 // find last non-space character
d4d02bd5 1563 reverse_iterator psz = rbegin();
32c62191 1564 while ( (psz != rend()) && wxSafeIsspace(*psz) )
0d8b0f94 1565 ++psz;
92df97b8 1566
3458e408 1567 // truncate at trailing space start
d4d02bd5 1568 erase(psz.base(), end());
3458e408
WS
1569 }
1570 else
1571 {
1572 // find first non-space character
1573 iterator psz = begin();
32c62191 1574 while ( (psz != end()) && wxSafeIsspace(*psz) )
0d8b0f94 1575 ++psz;
2c3b684c 1576
3458e408
WS
1577 // fix up data and length
1578 erase(begin(), psz);
1579 }
2c3b684c 1580 }
c801d85f 1581
3458e408 1582 return *this;
c801d85f
KB
1583}
1584
1585// adds nCount characters chPad to the string from either side
c9f78968 1586wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
c801d85f 1587{
3458e408 1588 wxString s(chPad, nCount);
c801d85f 1589
3458e408
WS
1590 if ( bFromRight )
1591 *this += s;
1592 else
1593 {
1594 s += *this;
1595 swap(s);
1596 }
c801d85f 1597
3458e408 1598 return *this;
c801d85f
KB
1599}
1600
1601// truncate the string
1602wxString& wxString::Truncate(size_t uiLen)
1603{
3458e408
WS
1604 if ( uiLen < length() )
1605 {
1606 erase(begin() + uiLen, end());
1607 }
1608 //else: nothing to do, string is already short enough
c801d85f 1609
3458e408 1610 return *this;
c801d85f
KB
1611}
1612
1613// ---------------------------------------------------------------------------
3c67202d 1614// finding (return wxNOT_FOUND if not found and index otherwise)
c801d85f
KB
1615// ---------------------------------------------------------------------------
1616
1617// find a character
c9f78968 1618int wxString::Find(wxUniChar ch, bool bFromEnd) const
c801d85f 1619{
3458e408 1620 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
c801d85f 1621
3458e408 1622 return (idx == npos) ? wxNOT_FOUND : (int)idx;
c801d85f
KB
1623}
1624
cd0b1709
VZ
1625// ----------------------------------------------------------------------------
1626// conversion to numbers
1627// ----------------------------------------------------------------------------
1628
52de37c7
VS
1629// The implementation of all the functions below is exactly the same so factor
1630// it out. Note that number extraction works correctly on UTF-8 strings, so
1631// we can use wxStringCharType and wx_str() for maximum efficiency.
122f3c5d 1632
92df97b8 1633#ifndef __WXWINCE__
941a4e62
VS
1634 #define DO_IF_NOT_WINCE(x) x
1635#else
1636 #define DO_IF_NOT_WINCE(x)
92df97b8 1637#endif
4ea4767e 1638
529e491c 1639#define WX_STRING_TO_X_TYPE_START \
9a83f860 1640 wxCHECK_MSG( pVal, false, wxT("NULL output pointer") ); \
941a4e62 1641 DO_IF_NOT_WINCE( errno = 0; ) \
941a4e62 1642 const wxStringCharType *start = wx_str(); \
529e491c
FM
1643 wxStringCharType *end;
1644
69d31e31
VZ
1645// notice that we return false without modifying the output parameter at all if
1646// nothing could be parsed but we do modify it and return false then if we did
1647// parse something successfully but not the entire string
529e491c 1648#define WX_STRING_TO_X_TYPE_END \
69d31e31 1649 if ( end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
c95e653c 1650 return false; \
529e491c 1651 *pVal = val; \
69d31e31 1652 return !*end;
cd0b1709 1653
c95e653c 1654bool wxString::ToLong(long *pVal, int base) const
cd0b1709 1655{
9a83f860 1656 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
529e491c
FM
1657
1658 WX_STRING_TO_X_TYPE_START
1659 long val = wxStrtol(start, &end, base);
1660 WX_STRING_TO_X_TYPE_END
619dcb09 1661}
cd0b1709 1662
c95e653c 1663bool wxString::ToULong(unsigned long *pVal, int base) const
619dcb09 1664{
9a83f860 1665 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
529e491c
FM
1666
1667 WX_STRING_TO_X_TYPE_START
1668 unsigned long val = wxStrtoul(start, &end, base);
1669 WX_STRING_TO_X_TYPE_END
cd0b1709
VZ
1670}
1671
c95e653c 1672bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
d6718dd1 1673{
9a83f860 1674 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
529e491c
FM
1675
1676 WX_STRING_TO_X_TYPE_START
1677 wxLongLong_t val = wxStrtoll(start, &end, base);
1678 WX_STRING_TO_X_TYPE_END
d6718dd1
VZ
1679}
1680
c95e653c 1681bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
d6718dd1 1682{
9a83f860 1683 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
529e491c
FM
1684
1685 WX_STRING_TO_X_TYPE_START
1686 wxULongLong_t val = wxStrtoull(start, &end, base);
1687 WX_STRING_TO_X_TYPE_END
d6718dd1
VZ
1688}
1689
c95e653c 1690bool wxString::ToDouble(double *pVal) const
cd0b1709 1691{
529e491c
FM
1692 WX_STRING_TO_X_TYPE_START
1693 double val = wxStrtod(start, &end);
1694 WX_STRING_TO_X_TYPE_END
1695}
cd0b1709 1696
529e491c 1697#if wxUSE_XLOCALE
e71e5b37 1698
529e491c
FM
1699bool wxString::ToCLong(long *pVal, int base) const
1700{
9a83f860 1701 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
cd0b1709 1702
529e491c 1703 WX_STRING_TO_X_TYPE_START
a51fdf81 1704#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
529e491c
FM
1705 long val = wxStrtol_lA(start, &end, base, wxCLocale);
1706#else
1707 long val = wxStrtol_l(start, &end, base, wxCLocale);
1708#endif
1709 WX_STRING_TO_X_TYPE_END
1710}
c95e653c 1711
529e491c
FM
1712bool wxString::ToCULong(unsigned long *pVal, int base) const
1713{
9a83f860 1714 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
c95e653c 1715
529e491c 1716 WX_STRING_TO_X_TYPE_START
a51fdf81 1717#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
529e491c
FM
1718 unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale);
1719#else
1720 unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale);
1721#endif
1722 WX_STRING_TO_X_TYPE_END
cd0b1709
VZ
1723}
1724
529e491c
FM
1725bool wxString::ToCDouble(double *pVal) const
1726{
1727 WX_STRING_TO_X_TYPE_START
a51fdf81 1728#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
529e491c
FM
1729 double val = wxStrtod_lA(start, &end, wxCLocale);
1730#else
1731 double val = wxStrtod_l(start, &end, wxCLocale);
1732#endif
1733 WX_STRING_TO_X_TYPE_END
1734}
1735
1736#endif // wxUSE_XLOCALE
1737
c801d85f 1738// ---------------------------------------------------------------------------
9efd3367 1739// formatted output
c801d85f 1740// ---------------------------------------------------------------------------
378b05f7 1741
d1f6e2cf 1742#if !wxUSE_UTF8_LOCALE_ONLY
341e7d28 1743/* static */
c9f78968 1744#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1528e0b8 1745wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
c9f78968 1746#else
d1f6e2cf 1747wxString wxString::DoFormatWchar(const wxChar *format, ...)
c9f78968 1748#endif
341e7d28 1749{
77c3e48a 1750 va_list argptr;
c9f78968 1751 va_start(argptr, format);
341e7d28 1752
77c3e48a 1753 wxString s;
c9f78968 1754 s.PrintfV(format, argptr);
341e7d28 1755
77c3e48a 1756 va_end(argptr);
341e7d28 1757
77c3e48a 1758 return s;
341e7d28 1759}
d1f6e2cf
VS
1760#endif // !wxUSE_UTF8_LOCALE_ONLY
1761
1762#if wxUSE_UNICODE_UTF8
1763/* static */
1764wxString wxString::DoFormatUtf8(const char *format, ...)
1765{
1766 va_list argptr;
1767 va_start(argptr, format);
1768
1769 wxString s;
1770 s.PrintfV(format, argptr);
1771
1772 va_end(argptr);
1773
1774 return s;
1775}
1776#endif // wxUSE_UNICODE_UTF8
341e7d28
VZ
1777
1778/* static */
c9f78968 1779wxString wxString::FormatV(const wxString& format, va_list argptr)
341e7d28
VZ
1780{
1781 wxString s;
c9f78968 1782 s.PrintfV(format, argptr);
341e7d28
VZ
1783 return s;
1784}
1785
d1f6e2cf 1786#if !wxUSE_UTF8_LOCALE_ONLY
c9f78968 1787#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
d1f6e2cf 1788int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1789#else
d1f6e2cf 1790int wxString::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1791#endif
c801d85f 1792{
ba9bbf13 1793 va_list argptr;
c9f78968 1794 va_start(argptr, format);
c801d85f 1795
c9f78968
VS
1796#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1797 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1798 // because it's the only cast that works safely for downcasting when
1799 // multiple inheritance is used:
1800 wxString *str = static_cast<wxString*>(this);
1801#else
1802 wxString *str = this;
1803#endif
1804
1805 int iLen = str->PrintfV(format, argptr);
c801d85f 1806
ba9bbf13 1807 va_end(argptr);
c801d85f 1808
ba9bbf13 1809 return iLen;
c801d85f 1810}
d1f6e2cf
VS
1811#endif // !wxUSE_UTF8_LOCALE_ONLY
1812
1813#if wxUSE_UNICODE_UTF8
1814int wxString::DoPrintfUtf8(const char *format, ...)
1815{
1816 va_list argptr;
1817 va_start(argptr, format);
1818
1819 int iLen = PrintfV(format, argptr);
1820
1821 va_end(argptr);
1822
1823 return iLen;
1824}
1825#endif // wxUSE_UNICODE_UTF8
c801d85f 1826
67612ff1
DE
1827/*
1828 Uses wxVsnprintf and places the result into the this string.
1829
1830 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1831 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1832 the ISO C99 (and thus SUSv3) standard the return value for the case of
1833 an undersized buffer is inconsistent. For conforming vsnprintf
1834 implementations the function must return the number of characters that
1835 would have been printed had the buffer been large enough. For conforming
1836 vswprintf implementations the function must return a negative number
1837 and set errno.
1838
1839 What vswprintf sets errno to is undefined but Darwin seems to set it to
a9a854d7
DE
1840 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1841 those are defined in the standard and backed up by several conformance
1842 statements. Note that ENOMEM mentioned in the manual page does not
1843 apply to swprintf, only wprintf and fwprintf.
1844
1845 Official manual page:
1846 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1847
1848 Some conformance statements (AIX, Solaris):
1849 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1850 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1851
1852 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1853 EILSEQ and EINVAL are specifically defined to mean the error is other than
1854 an undersized buffer and no other errno are defined we treat those two
1855 as meaning hard errors and everything else gets the old behavior which
1856 is to keep looping and increasing buffer size until the function succeeds.
c95e653c 1857
67612ff1
DE
1858 In practice it's impossible to determine before compilation which behavior
1859 may be used. The vswprintf function may have vsnprintf-like behavior or
1860 vice-versa. Behavior detected on one release can theoretically change
1861 with an updated release. Not to mention that configure testing for it
1862 would require the test to be run on the host system, not the build system
1863 which makes cross compilation difficult. Therefore, we make no assumptions
1864 about behavior and try our best to handle every known case, including the
1865 case where wxVsnprintf returns a negative number and fails to set errno.
1866
1867 There is yet one more non-standard implementation and that is our own.
1868 Fortunately, that can be detected at compile-time.
1869
1870 On top of all that, ISO C99 explicitly defines snprintf to write a null
1871 character to the last position of the specified buffer. That would be at
1872 at the given buffer size minus 1. It is supposed to do this even if it
1873 turns out that the buffer is sized too small.
1874
1875 Darwin (tested on 10.5) follows the C99 behavior exactly.
1876
1877 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1878 errno even when it fails. However, it only seems to ever fail due
1879 to an undersized buffer.
1880*/
2523e9b7
VS
1881#if wxUSE_UNICODE_UTF8
1882template<typename BufferType>
1883#else
1884// we only need one version in non-UTF8 builds and at least two Windows
1885// compilers have problems with this function template, so use just one
1886// normal function here
1887#endif
1888static int DoStringPrintfV(wxString& str,
1889 const wxString& format, va_list argptr)
c801d85f 1890{
f6f5941b 1891 int size = 1024;
e87b7833 1892
f6f5941b
VZ
1893 for ( ;; )
1894 {
2523e9b7
VS
1895#if wxUSE_UNICODE_UTF8
1896 BufferType tmp(str, size + 1);
1897 typename BufferType::CharType *buf = tmp;
1898#else
1899 wxStringBuffer tmp(str, size + 1);
de2589be 1900 wxChar *buf = tmp;
2523e9b7 1901#endif
2bb67b80 1902
ba9bbf13
WS
1903 if ( !buf )
1904 {
1905 // out of memory
a33c7045
VS
1906
1907 // in UTF-8 build, leaving uninitialized junk in the buffer
1908 // could result in invalid non-empty UTF-8 string, so just
1909 // reset the string to empty on failure:
1910 buf[0] = '\0';
ba9bbf13 1911 return -1;
e87b7833 1912 }
f6f5941b 1913
ba9bbf13
WS
1914 // wxVsnprintf() may modify the original arg pointer, so pass it
1915 // only a copy
1916 va_list argptrcopy;
1917 wxVaCopy(argptrcopy, argptr);
67612ff1
DE
1918
1919#ifndef __WXWINCE__
1920 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1921 errno = 0;
1922#endif
2523e9b7 1923 int len = wxVsnprintf(buf, size, format, argptrcopy);
ba9bbf13
WS
1924 va_end(argptrcopy);
1925
1926 // some implementations of vsnprintf() don't NUL terminate
1927 // the string if there is not enough space for it so
1928 // always do it manually
67612ff1
DE
1929 // FIXME: This really seems to be the wrong and would be an off-by-one
1930 // bug except the code above allocates an extra character.
9a83f860 1931 buf[size] = wxT('\0');
ba9bbf13 1932
caff62f2
VZ
1933 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1934 // total number of characters which would have been written if the
b1727cfe 1935 // buffer were large enough (newer standards such as Unix98)
de2589be
VZ
1936 if ( len < 0 )
1937 {
52de37c7
VS
1938 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1939 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1940 // is true if *both* of them use our own implementation,
1941 // otherwise we can't be sure
f2bbe5b6
VZ
1942#if wxUSE_WXVSNPRINTF
1943 // we know that our own implementation of wxVsnprintf() returns -1
1944 // only for a format error - thus there's something wrong with
1945 // the user's format string
a33c7045 1946 buf[0] = '\0';
f2bbe5b6 1947 return -1;
52de37c7
VS
1948#else // possibly using system version
1949 // assume it only returns error if there is not enough space, but
1950 // as we don't know how much we need, double the current size of
1951 // the buffer
67612ff1 1952#ifndef __WXWINCE__
a9a854d7
DE
1953 if( (errno == EILSEQ) || (errno == EINVAL) )
1954 // If errno was set to one of the two well-known hard errors
1955 // then fail immediately to avoid an infinite loop.
1956 return -1;
1957 else
1958#endif // __WXWINCE__
67612ff1
DE
1959 // still not enough, as we don't know how much we need, double the
1960 // current size of the buffer
1961 size *= 2;
f2bbe5b6 1962#endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
de2589be 1963 }
64f8f94c 1964 else if ( len >= size )
de2589be 1965 {
f2bbe5b6 1966#if wxUSE_WXVSNPRINTF
c95e653c 1967 // we know that our own implementation of wxVsnprintf() returns
f2bbe5b6
VZ
1968 // size+1 when there's not enough space but that's not the size
1969 // of the required buffer!
1970 size *= 2; // so we just double the current size of the buffer
1971#else
64f8f94c
VZ
1972 // some vsnprintf() implementations NUL-terminate the buffer and
1973 // some don't in len == size case, to be safe always add 1
67612ff1
DE
1974 // FIXME: I don't quite understand this comment. The vsnprintf
1975 // function is specifically defined to return the number of
1976 // characters printed not including the null terminator.
1977 // So OF COURSE you need to add 1 to get the right buffer size.
1978 // The following line is definitely correct, no question.
64f8f94c 1979 size = len + 1;
f2bbe5b6 1980#endif
de2589be
VZ
1981 }
1982 else // ok, there was enough space
f6f5941b 1983 {
f6f5941b
VZ
1984 break;
1985 }
f6f5941b
VZ
1986 }
1987
1988 // we could have overshot
2523e9b7
VS
1989 str.Shrink();
1990
1991 return str.length();
1992}
c801d85f 1993
2523e9b7
VS
1994int wxString::PrintfV(const wxString& format, va_list argptr)
1995{
2523e9b7
VS
1996#if wxUSE_UNICODE_UTF8
1997 #if wxUSE_STL_BASED_WXSTRING
1998 typedef wxStringTypeBuffer<char> Utf8Buffer;
1999 #else
6798451b 2000 typedef wxStringInternalBuffer Utf8Buffer;
2523e9b7
VS
2001 #endif
2002#endif
2003
2004#if wxUSE_UTF8_LOCALE_ONLY
c6255a6e 2005 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
2006#else
2007 #if wxUSE_UNICODE_UTF8
2008 if ( wxLocaleIsUtf8 )
c6255a6e 2009 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
2010 else
2011 // wxChar* version
c6255a6e 2012 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2523e9b7 2013 #else
c6255a6e 2014 return DoStringPrintfV(*this, format, argptr);
2523e9b7
VS
2015 #endif // UTF8/WCHAR
2016#endif
c801d85f
KB
2017}
2018
097c080b
VZ
2019// ----------------------------------------------------------------------------
2020// misc other operations
2021// ----------------------------------------------------------------------------
0c5d3e1c 2022
d775fa82 2023// returns true if the string matches the pattern which may contain '*' and
0c5d3e1c
VZ
2024// '?' metacharacters (as usual, '?' matches any character and '*' any number
2025// of them)
8a540c88 2026bool wxString::Matches(const wxString& mask) const
097c080b 2027{
d6044f58
VZ
2028 // I disable this code as it doesn't seem to be faster (in fact, it seems
2029 // to be much slower) than the old, hand-written code below and using it
2030 // here requires always linking with libregex even if the user code doesn't
2031 // use it
2032#if 0 // wxUSE_REGEX
706c2ac9
VZ
2033 // first translate the shell-like mask into a regex
2034 wxString pattern;
2035 pattern.reserve(wxStrlen(pszMask));
2036
9a83f860 2037 pattern += wxT('^');
706c2ac9
VZ
2038 while ( *pszMask )
2039 {
2040 switch ( *pszMask )
2041 {
9a83f860
VZ
2042 case wxT('?'):
2043 pattern += wxT('.');
706c2ac9
VZ
2044 break;
2045
9a83f860
VZ
2046 case wxT('*'):
2047 pattern += wxT(".*");
706c2ac9
VZ
2048 break;
2049
9a83f860
VZ
2050 case wxT('^'):
2051 case wxT('.'):
2052 case wxT('$'):
2053 case wxT('('):
2054 case wxT(')'):
2055 case wxT('|'):
2056 case wxT('+'):
2057 case wxT('\\'):
706c2ac9
VZ
2058 // these characters are special in a RE, quote them
2059 // (however note that we don't quote '[' and ']' to allow
2060 // using them for Unix shell like matching)
9a83f860 2061 pattern += wxT('\\');
706c2ac9
VZ
2062 // fall through
2063
2064 default:
2065 pattern += *pszMask;
2066 }
2067
2068 pszMask++;
2069 }
9a83f860 2070 pattern += wxT('$');
706c2ac9
VZ
2071
2072 // and now use it
2073 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2074#else // !wxUSE_REGEX
9a4232dc
VZ
2075 // TODO: this is, of course, awfully inefficient...
2076
8a540c88
VS
2077 // FIXME-UTF8: implement using iterators, remove #if
2078#if wxUSE_UNICODE_UTF8
de4983f3
VS
2079 const wxScopedWCharBuffer maskBuf = mask.wc_str();
2080 const wxScopedWCharBuffer txtBuf = wc_str();
8a540c88
VS
2081 const wxChar *pszMask = maskBuf.data();
2082 const wxChar *pszTxt = txtBuf.data();
2083#else
2084 const wxChar *pszMask = mask.wx_str();
9a4232dc 2085 // the char currently being checked
8a540c88
VS
2086 const wxChar *pszTxt = wx_str();
2087#endif
9a4232dc
VZ
2088
2089 // the last location where '*' matched
2090 const wxChar *pszLastStarInText = NULL;
2091 const wxChar *pszLastStarInMask = NULL;
2092
2093match:
2094 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
097c080b 2095 switch ( *pszMask ) {
223d09f6
KB
2096 case wxT('?'):
2097 if ( *pszTxt == wxT('\0') )
d775fa82 2098 return false;
097c080b 2099
9a4232dc 2100 // pszTxt and pszMask will be incremented in the loop statement
0c5d3e1c 2101
097c080b
VZ
2102 break;
2103
223d09f6 2104 case wxT('*'):
097c080b 2105 {
9a4232dc
VZ
2106 // remember where we started to be able to backtrack later
2107 pszLastStarInText = pszTxt;
2108 pszLastStarInMask = pszMask;
2109
097c080b 2110 // ignore special chars immediately following this one
9a4232dc 2111 // (should this be an error?)
223d09f6 2112 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
097c080b
VZ
2113 pszMask++;
2114
2115 // if there is nothing more, match
223d09f6 2116 if ( *pszMask == wxT('\0') )
d775fa82 2117 return true;
097c080b
VZ
2118
2119 // are there any other metacharacters in the mask?
c86f1403 2120 size_t uiLenMask;
223d09f6 2121 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
097c080b
VZ
2122
2123 if ( pEndMask != NULL ) {
2124 // we have to match the string between two metachars
2125 uiLenMask = pEndMask - pszMask;
2126 }
2127 else {
2128 // we have to match the remainder of the string
2bb67b80 2129 uiLenMask = wxStrlen(pszMask);
097c080b
VZ
2130 }
2131
2132 wxString strToMatch(pszMask, uiLenMask);
2bb67b80 2133 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
097c080b 2134 if ( pMatch == NULL )
d775fa82 2135 return false;
097c080b
VZ
2136
2137 // -1 to compensate "++" in the loop
2138 pszTxt = pMatch + uiLenMask - 1;
2139 pszMask += uiLenMask - 1;
2140 }
2141 break;
2142
2143 default:
2144 if ( *pszMask != *pszTxt )
d775fa82 2145 return false;
097c080b
VZ
2146 break;
2147 }
2148 }
2149
2150 // match only if nothing left
9a4232dc 2151 if ( *pszTxt == wxT('\0') )
d775fa82 2152 return true;
9a4232dc
VZ
2153
2154 // if we failed to match, backtrack if we can
2155 if ( pszLastStarInText ) {
2156 pszTxt = pszLastStarInText + 1;
2157 pszMask = pszLastStarInMask;
2158
2159 pszLastStarInText = NULL;
2160
2161 // don't bother resetting pszLastStarInMask, it's unnecessary
2162
2163 goto match;
2164 }
2165
d775fa82 2166 return false;
706c2ac9 2167#endif // wxUSE_REGEX/!wxUSE_REGEX
097c080b
VZ
2168}
2169
1fc5dd6f 2170// Count the number of chars
c9f78968 2171int wxString::Freq(wxUniChar ch) const
1fc5dd6f
JS
2172{
2173 int count = 0;
8f93a29f 2174 for ( const_iterator i = begin(); i != end(); ++i )
1fc5dd6f 2175 {
8f93a29f 2176 if ( *i == ch )
1fc5dd6f
JS
2177 count ++;
2178 }
2179 return count;
2180}
4e79262f 2181