]> git.saurik.com Git - wxWidgets.git/blame - src/common/string.cpp
Don't change file access time implicitly when setting it explicitly.
[wxWidgets.git] / src / common / string.cpp
CommitLineData
c801d85f 1/////////////////////////////////////////////////////////////////////////////
8898456d 2// Name: src/common/string.cpp
c801d85f 3// Purpose: wxString class
59059feb 4// Author: Vadim Zeitlin, Ryan Norton
c801d85f
KB
5// Modified by:
6// Created: 29/01/98
7// RCS-ID: $Id$
8// Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
59059feb 9// (c) 2004 Ryan Norton <wxprojects@comcast.net>
65571936 10// Licence: wxWindows licence
c801d85f
KB
11/////////////////////////////////////////////////////////////////////////////
12
c801d85f
KB
13// ===========================================================================
14// headers, declarations, constants
15// ===========================================================================
16
17// For compilers that support precompilation, includes "wx.h".
18#include "wx/wxprec.h"
19
20#ifdef __BORLANDC__
8898456d 21 #pragma hdrstop
c801d85f
KB
22#endif
23
24#ifndef WX_PRECOMP
8898456d 25 #include "wx/string.h"
2523e9b7 26 #include "wx/wxcrtvararg.h"
ba7e7253 27 #include "wx/log.h"
6b769f3d 28#endif
c801d85f
KB
29
30#include <ctype.h>
92df97b8
WS
31
32#ifndef __WXWINCE__
33 #include <errno.h>
34#endif
35
c801d85f
KB
36#include <string.h>
37#include <stdlib.h>
9a08c20e 38
8116a0c5 39#include "wx/hashmap.h"
072682ce 40#include "wx/vector.h"
529e491c 41#include "wx/xlocale.h"
8f93a29f 42
825d69c1
VZ
43#ifdef __WXMSW__
44 #include "wx/msw/wrapwin.h"
45#endif // __WXMSW__
46
8f93a29f
VS
47// string handling functions used by wxString:
48#if wxUSE_UNICODE_UTF8
49 #define wxStringMemcpy memcpy
50 #define wxStringMemcmp memcmp
51 #define wxStringMemchr memchr
52 #define wxStringStrlen strlen
53#else
54 #define wxStringMemcpy wxTmemcpy
55 #define wxStringMemcmp wxTmemcmp
a7ea63e2
VS
56 #define wxStringMemchr wxTmemchr
57 #define wxStringStrlen wxStrlen
58#endif
8f93a29f 59
4e79262f
VZ
60// ----------------------------------------------------------------------------
61// global variables
62// ----------------------------------------------------------------------------
63
64namespace wxPrivate
65{
66
6df09f32 67static UntypedBufferData s_untypedNullData(NULL, 0);
4e79262f
VZ
68
69UntypedBufferData * const untypedNullDataPtr = &s_untypedNullData;
70
71} // namespace wxPrivate
e87b7833 72
a7ea63e2
VS
73// ---------------------------------------------------------------------------
74// static class variables definition
75// ---------------------------------------------------------------------------
e87b7833 76
a7ea63e2
VS
77//According to STL _must_ be a -1 size_t
78const size_t wxString::npos = (size_t) -1;
8f93a29f 79
68482dc5 80#if wxUSE_STRING_POS_CACHE
68482dc5 81
e810df36
VZ
82#ifdef wxHAS_COMPILER_TLS
83
84wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
85
86#else // !wxHAS_COMPILER_TLS
87
ad8ae788
VZ
88struct wxStrCacheInitializer
89{
90 wxStrCacheInitializer()
91 {
92 // calling this function triggers s_cache initialization in it, and
93 // from now on it becomes safe to call from multiple threads
94 wxString::GetCache();
95 }
96};
97
e317bd3f
SC
98/*
99wxString::Cache& wxString::GetCache()
100{
101 static wxTLS_TYPE(Cache) s_cache;
102
103 return wxTLS_VALUE(s_cache);
104}
105*/
106
ad8ae788
VZ
107static wxStrCacheInitializer gs_stringCacheInit;
108
e810df36
VZ
109#endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
110
68482dc5
VZ
111// gdb seems to be unable to display thread-local variables correctly, at least
112// not my 6.4.98 version under amd64, so provide this debugging helper to do it
4b6a582b 113#if wxDEBUG_LEVEL >= 2
68482dc5
VZ
114
115struct wxStrCacheDumper
116{
117 static void ShowAll()
118 {
119 puts("*** wxString cache dump:");
120 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
121 {
122 const wxString::Cache::Element&
8b73c531 123 c = wxString::GetCacheBegin()[n];
68482dc5
VZ
124
125 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
126 n,
8b73c531 127 n == wxString::LastUsedCacheElement() ? " [*]" : "",
68482dc5
VZ
128 c.str,
129 (unsigned long)c.pos,
130 (unsigned long)c.impl,
131 (long)c.len);
132 }
133 }
134};
135
136void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
137
4b6a582b 138#endif // wxDEBUG_LEVEL >= 2
68482dc5
VZ
139
140#ifdef wxPROFILE_STRING_CACHE
141
142wxString::CacheStats wxString::ms_cacheStats;
143
8c3b65d9 144struct wxStrCacheStatsDumper
68482dc5 145{
8c3b65d9 146 ~wxStrCacheStatsDumper()
68482dc5
VZ
147 {
148 const wxString::CacheStats& stats = wxString::ms_cacheStats;
149
150 if ( stats.postot )
151 {
152 puts("*** wxString cache statistics:");
153 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
154 stats.postot);
155 printf("\tHits %u (of which %u not used) or %.2f%%\n",
156 stats.poshits,
157 stats.mishits,
158 100.*float(stats.poshits - stats.mishits)/stats.postot);
159 printf("\tAverage position requested: %.2f\n",
160 float(stats.sumpos) / stats.postot);
161 printf("\tAverage offset after cached hint: %.2f\n",
162 float(stats.sumofs) / stats.postot);
163 }
164
165 if ( stats.lentot )
166 {
167 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
168 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
169 }
170 }
8c3b65d9 171};
68482dc5 172
8c3b65d9 173static wxStrCacheStatsDumper s_showCacheStats;
68482dc5
VZ
174
175#endif // wxPROFILE_STRING_CACHE
176
177#endif // wxUSE_STRING_POS_CACHE
178
a7ea63e2
VS
179// ----------------------------------------------------------------------------
180// global functions
181// ----------------------------------------------------------------------------
e87b7833 182
a7ea63e2 183#if wxUSE_STD_IOSTREAM
8f93a29f 184
a7ea63e2 185#include <iostream>
8f93a29f 186
a7ea63e2 187wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
8f93a29f 188{
7a906e1a 189#if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
de4983f3 190 const wxScopedCharBuffer buf(str.AsCharBuf());
ddf01bdb
VZ
191 if ( !buf )
192 os.clear(wxSTD ios_base::failbit);
193 else
194 os << buf.data();
195
196 return os;
a7ea63e2 197#else
7a906e1a 198 return os << str.AsInternal();
a7ea63e2 199#endif
8f93a29f
VS
200}
201
04abe4bc
VS
202wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
203{
204 return os << str.c_str();
205}
206
de4983f3 207wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str)
04abe4bc
VS
208{
209 return os << str.data();
210}
211
212#ifndef __BORLANDC__
de4983f3 213wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str)
04abe4bc
VS
214{
215 return os << str.data();
216}
217#endif
218
6a6ea041 219#if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594
VZ
220
221wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
222{
223 return wos << str.wc_str();
224}
225
226wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
227{
228 return wos << str.AsWChar();
229}
230
de4983f3 231wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str)
6b61b594
VZ
232{
233 return wos << str.data();
234}
235
6a6ea041 236#endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594 237
a7ea63e2 238#endif // wxUSE_STD_IOSTREAM
e87b7833 239
81727065
VS
240// ===========================================================================
241// wxString class core
242// ===========================================================================
243
244#if wxUSE_UNICODE_UTF8
245
81727065
VS
246void wxString::PosLenToImpl(size_t pos, size_t len,
247 size_t *implPos, size_t *implLen) const
248{
249 if ( pos == npos )
68482dc5 250 {
81727065 251 *implPos = npos;
68482dc5
VZ
252 }
253 else // have valid start position
81727065 254 {
68482dc5
VZ
255 const const_iterator b = GetIterForNthChar(pos);
256 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
81727065 257 if ( len == npos )
68482dc5 258 {
81727065 259 *implLen = npos;
68482dc5
VZ
260 }
261 else // have valid length too
81727065 262 {
68482dc5
VZ
263 // we need to handle the case of length specifying a substring
264 // going beyond the end of the string, just as std::string does
265 const const_iterator e(end());
266 const_iterator i(b);
267 while ( len && i <= e )
268 {
269 ++i;
270 --len;
271 }
272
273 *implLen = i.impl() - b.impl();
81727065
VS
274 }
275 }
276}
277
278#endif // wxUSE_UNICODE_UTF8
279
11aac4ba
VS
280// ----------------------------------------------------------------------------
281// wxCStrData converted strings caching
282// ----------------------------------------------------------------------------
283
132276cf
VS
284// FIXME-UTF8: temporarily disabled because it doesn't work with global
285// string objects; re-enable after fixing this bug and benchmarking
286// performance to see if using a hash is a good idea at all
287#if 0
288
11aac4ba
VS
289// For backward compatibility reasons, it must be possible to assign the value
290// returned by wxString::c_str() to a char* or wchar_t* variable and work with
291// it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
292// because the memory would be freed immediately, but it has to be valid as long
293// as the string is not modified, so that code like this still works:
294//
295// const wxChar *s = str.c_str();
296// while ( s ) { ... }
297
298// FIXME-UTF8: not thread safe!
299// FIXME-UTF8: we currently clear the cached conversion only when the string is
300// destroyed, but we should do it when the string is modified, to
301// keep memory usage down
302// FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
303// invalidated the cache on every change, we could keep the previous
304// conversion
305// FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
306// to use mb_str() or wc_str() instead of (const [w]char*)c_str()
307
308template<typename T>
309static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
310{
6c4ebcda 311 typename T::iterator i = hash.find(wxConstCast(s, wxString));
11aac4ba
VS
312 if ( i != hash.end() )
313 {
314 free(i->second);
315 hash.erase(i);
316 }
317}
318
319#if wxUSE_UNICODE
6c4ebcda
VS
320// NB: non-STL implementation doesn't compile with "const wxString*" key type,
321// so we have to use wxString* here and const-cast when used
11aac4ba
VS
322WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
323 wxStringCharConversionCache);
324static wxStringCharConversionCache gs_stringsCharCache;
325
326const char* wxCStrData::AsChar() const
327{
328 // remove previously cache value, if any (see FIXMEs above):
329 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
330
331 // convert the string and keep it:
6c4ebcda
VS
332 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
333 m_str->mb_str().release();
11aac4ba
VS
334
335 return s + m_offset;
336}
337#endif // wxUSE_UNICODE
338
339#if !wxUSE_UNICODE_WCHAR
340WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
341 wxStringWCharConversionCache);
342static wxStringWCharConversionCache gs_stringsWCharCache;
343
344const wchar_t* wxCStrData::AsWChar() const
345{
346 // remove previously cache value, if any (see FIXMEs above):
347 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
348
349 // convert the string and keep it:
6c4ebcda
VS
350 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
351 m_str->wc_str().release();
11aac4ba
VS
352
353 return s + m_offset;
354}
355#endif // !wxUSE_UNICODE_WCHAR
356
11aac4ba
VS
357wxString::~wxString()
358{
359#if wxUSE_UNICODE
360 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
361 DeleteStringFromConversionCache(gs_stringsCharCache, this);
362#endif
363#if !wxUSE_UNICODE_WCHAR
364 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
365#endif
366}
132276cf
VS
367#endif
368
132276cf
VS
369// ===========================================================================
370// wxString class core
371// ===========================================================================
372
373// ---------------------------------------------------------------------------
374// construction and conversion
375// ---------------------------------------------------------------------------
11aac4ba 376
81727065 377#if wxUSE_UNICODE_WCHAR
8f93a29f
VS
378/* static */
379wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
04abe4bc 380 const wxMBConv& conv)
8f93a29f
VS
381{
382 // anything to do?
383 if ( !psz || nLength == 0 )
de4983f3 384 return SubstrBufFromMB(wxWCharBuffer(L""), 0);
8f93a29f
VS
385
386 if ( nLength == npos )
387 nLength = wxNO_LEN;
388
389 size_t wcLen;
de4983f3 390 wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
8f93a29f 391 if ( !wcLen )
de4983f3 392 return SubstrBufFromMB(wxWCharBuffer(L""), 0);
8f93a29f
VS
393 else
394 return SubstrBufFromMB(wcBuf, wcLen);
395}
81727065
VS
396#endif // wxUSE_UNICODE_WCHAR
397
398#if wxUSE_UNICODE_UTF8
399/* static */
400wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
401 const wxMBConv& conv)
402{
81727065
VS
403 // anything to do?
404 if ( !psz || nLength == 0 )
de4983f3 405 return SubstrBufFromMB(wxCharBuffer(""), 0);
81727065 406
111d9948
VS
407 // if psz is already in UTF-8, we don't have to do the roundtrip to
408 // wchar_t* and back:
409 if ( conv.IsUTF8() )
410 {
411 // we need to validate the input because UTF8 iterators assume valid
412 // UTF-8 sequence and psz may be invalid:
413 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
414 {
9ef1ad0d
VZ
415 // we must pass the real string length to SubstrBufFromMB ctor
416 if ( nLength == npos )
417 nLength = psz ? strlen(psz) : 0;
38d26d60 418 return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz, nLength),
6df09f32 419 nLength);
111d9948
VS
420 }
421 // else: do the roundtrip through wchar_t*
422 }
423
81727065
VS
424 if ( nLength == npos )
425 nLength = wxNO_LEN;
426
427 // first convert to wide string:
428 size_t wcLen;
de4983f3 429 wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
81727065 430 if ( !wcLen )
de4983f3 431 return SubstrBufFromMB(wxCharBuffer(""), 0);
81727065
VS
432
433 // and then to UTF-8:
4fdfe2f3 434 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
81727065 435 // widechar -> UTF-8 conversion isn't supposed to ever fail:
9a83f860 436 wxASSERT_MSG( buf.data, wxT("conversion to UTF-8 failed") );
81727065
VS
437
438 return buf;
439}
440#endif // wxUSE_UNICODE_UTF8
441
442#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
443/* static */
444wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
04abe4bc 445 const wxMBConv& conv)
8f93a29f
VS
446{
447 // anything to do?
448 if ( !pwz || nLength == 0 )
de4983f3 449 return SubstrBufFromWC(wxCharBuffer(""), 0);
8f93a29f
VS
450
451 if ( nLength == npos )
452 nLength = wxNO_LEN;
453
454 size_t mbLen;
de4983f3 455 wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
8f93a29f 456 if ( !mbLen )
de4983f3 457 return SubstrBufFromWC(wxCharBuffer(""), 0);
8f93a29f
VS
458 else
459 return SubstrBufFromWC(mbBuf, mbLen);
460}
81727065 461#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f 462
f54cb154
VZ
463// This std::string::c_str()-like method returns a wide char pointer to string
464// contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return
465// a pointer to the internal representation. Otherwise a conversion is required
466// and it returns a temporary buffer.
467//
468// However for compatibility with c_str() and to avoid breaking existing code
469// doing
470//
471// for ( const wchar_t *p = s.wc_str(); *p; p++ )
472// ... use *p...
473//
474// we actually need to ensure that the returned buffer is _not_ temporary and
475// so we use wxString::m_convertedToWChar to store the returned data
476#if !wxUSE_UNICODE_WCHAR
8f93a29f 477
f54cb154 478const wchar_t *wxString::AsWChar(const wxMBConv& conv) const
265d5cce 479{
f54cb154
VZ
480 const char * const strMB = m_impl.c_str();
481 const size_t lenMB = m_impl.length();
482
483 // find out the size of the buffer needed
484 const size_t lenWC = conv.ToWChar(NULL, 0, strMB, lenMB);
485 if ( lenWC == wxCONV_FAILED )
486 return NULL;
487
488 // keep the same buffer if the string size didn't change: this is not only
489 // an optimization but also ensure that code which modifies string
490 // character by character (without changing its length) can continue to use
491 // the pointer returned by a previous wc_str() call even after changing the
492 // string
493
494 // TODO-UTF8: we could check for ">" instead of "!=" here as this would
495 // allow to save on buffer reallocations but at the cost of
496 // consuming (even) more memory, we should benchmark this to
497 // determine if it's worth doing
498 if ( !m_convertedToWChar.m_str || lenWC != m_convertedToWChar.m_len )
499 {
500 if ( !const_cast<wxString *>(this)->m_convertedToWChar.Extend(lenWC) )
501 return NULL;
502 }
e87b7833 503
f54cb154
VZ
504 // finally do convert
505 m_convertedToWChar.m_str[lenWC] = L'\0';
506 if ( conv.ToWChar(m_convertedToWChar.m_str, lenWC,
507 strMB, lenMB) == wxCONV_FAILED )
508 return NULL;
e87b7833 509
f54cb154 510 return m_convertedToWChar.m_str;
81727065
VS
511}
512
f54cb154
VZ
513#endif // !wxUSE_UNICODE_WCHAR
514
515
516// Same thing for mb_str() which returns a normal char pointer to string
517// contents: this always requires converting it to the specified encoding in
518// non-ANSI build except if we need to convert to UTF-8 and this is what we
519// already use internally.
520#if wxUSE_UNICODE
521
522const char *wxString::AsChar(const wxMBConv& conv) const
81727065 523{
f54cb154 524#if wxUSE_UNICODE_UTF8
111d9948 525 if ( conv.IsUTF8() )
f54cb154 526 return m_impl.c_str();
111d9948 527
f54cb154
VZ
528 const wchar_t * const strWC = AsWChar(wxMBConvStrictUTF8());
529 const size_t lenWC = m_convertedToWChar.m_len;
530#else // wxUSE_UNICODE_WCHAR
531 const wchar_t * const strWC = m_impl.c_str();
532 const size_t lenWC = m_impl.length();
533#endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR
81727065 534
f54cb154
VZ
535 const size_t lenMB = conv.FromWChar(NULL, 0, strWC, lenWC);
536 if ( lenMB == wxCONV_FAILED )
537 return NULL;
538
539 if ( !m_convertedToChar.m_str || lenMB != m_convertedToChar.m_len )
540 {
541 if ( !const_cast<wxString *>(this)->m_convertedToChar.Extend(lenMB) )
542 return NULL;
543 }
81727065 544
f54cb154
VZ
545 m_convertedToChar.m_str[lenMB] = '\0';
546 if ( conv.FromWChar(m_convertedToChar.m_str, lenMB,
547 strWC, lenWC) == wxCONV_FAILED )
548 return NULL;
eec47cc6 549
f54cb154 550 return m_convertedToChar.m_str;
265d5cce 551}
7663d0d4 552
f54cb154 553#endif // wxUSE_UNICODE
e87b7833
MB
554
555// shrink to minimal size (releasing extra memory)
556bool wxString::Shrink()
557{
558 wxString tmp(begin(), end());
559 swap(tmp);
560 return tmp.length() == length();
561}
562
d8a4b666 563// deprecated compatibility code:
a7ea63e2 564#if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
c87a0bc8 565wxStringCharType *wxString::GetWriteBuf(size_t nLen)
d8a4b666
VS
566{
567 return DoGetWriteBuf(nLen);
568}
569
570void wxString::UngetWriteBuf()
571{
572 DoUngetWriteBuf();
573}
574
575void wxString::UngetWriteBuf(size_t nLen)
576{
577 DoUngetWriteBuf(nLen);
578}
a7ea63e2 579#endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
e87b7833 580
d8a4b666 581
e87b7833
MB
582// ---------------------------------------------------------------------------
583// data access
584// ---------------------------------------------------------------------------
585
586// all functions are inline in string.h
587
588// ---------------------------------------------------------------------------
e8f59039 589// concatenation operators
e87b7833
MB
590// ---------------------------------------------------------------------------
591
c801d85f 592/*
c801d85f
KB
593 * concatenation functions come in 5 flavours:
594 * string + string
595 * char + string and string + char
596 * C str + string and string + C str
597 */
598
b1801e0e 599wxString operator+(const wxString& str1, const wxString& str2)
c801d85f 600{
992527a5 601#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
602 wxASSERT( str1.IsValid() );
603 wxASSERT( str2.IsValid() );
e87b7833 604#endif
097c080b 605
3458e408
WS
606 wxString s = str1;
607 s += str2;
3168a13f 608
3458e408 609 return s;
c801d85f
KB
610}
611
c9f78968 612wxString operator+(const wxString& str, wxUniChar ch)
c801d85f 613{
992527a5 614#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 615 wxASSERT( str.IsValid() );
e87b7833 616#endif
3168a13f 617
3458e408
WS
618 wxString s = str;
619 s += ch;
097c080b 620
3458e408 621 return s;
c801d85f
KB
622}
623
c9f78968 624wxString operator+(wxUniChar ch, const wxString& str)
c801d85f 625{
992527a5 626#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 627 wxASSERT( str.IsValid() );
e87b7833 628#endif
097c080b 629
3458e408
WS
630 wxString s = ch;
631 s += str;
3168a13f 632
3458e408 633 return s;
c801d85f
KB
634}
635
8f93a29f 636wxString operator+(const wxString& str, const char *psz)
c801d85f 637{
992527a5 638#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 639 wxASSERT( str.IsValid() );
e87b7833 640#endif
097c080b 641
3458e408 642 wxString s;
8f93a29f 643 if ( !s.Alloc(strlen(psz) + str.length()) ) {
9a83f860 644 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
3458e408
WS
645 }
646 s += str;
647 s += psz;
3168a13f 648
3458e408 649 return s;
c801d85f
KB
650}
651
8f93a29f 652wxString operator+(const wxString& str, const wchar_t *pwz)
c801d85f 653{
992527a5 654#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
655 wxASSERT( str.IsValid() );
656#endif
657
658 wxString s;
659 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
9a83f860 660 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
8f93a29f
VS
661 }
662 s += str;
663 s += pwz;
664
665 return s;
666}
667
668wxString operator+(const char *psz, const wxString& str)
669{
a7ea63e2
VS
670#if !wxUSE_STL_BASED_WXSTRING
671 wxASSERT( str.IsValid() );
672#endif
673
674 wxString s;
675 if ( !s.Alloc(strlen(psz) + str.length()) ) {
9a83f860 676 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
a7ea63e2
VS
677 }
678 s = psz;
679 s += str;
680
681 return s;
682}
683
684wxString operator+(const wchar_t *pwz, const wxString& str)
685{
686#if !wxUSE_STL_BASED_WXSTRING
687 wxASSERT( str.IsValid() );
688#endif
689
690 wxString s;
691 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
9a83f860 692 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
a7ea63e2
VS
693 }
694 s = pwz;
695 s += str;
696
697 return s;
698}
699
700// ---------------------------------------------------------------------------
701// string comparison
702// ---------------------------------------------------------------------------
703
52de37c7
VS
704bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
705{
706 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
707 : wxToupper(GetChar(0u)) == wxToupper(c));
708}
709
a7ea63e2
VS
710#ifdef HAVE_STD_STRING_COMPARE
711
712// NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
713// UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
714// sort strings in characters code point order by sorting the byte sequence
715// in byte values order (i.e. what strcmp() and memcmp() do).
716
717int wxString::compare(const wxString& str) const
718{
719 return m_impl.compare(str.m_impl);
720}
721
722int wxString::compare(size_t nStart, size_t nLen,
723 const wxString& str) const
724{
725 size_t pos, len;
726 PosLenToImpl(nStart, nLen, &pos, &len);
727 return m_impl.compare(pos, len, str.m_impl);
728}
729
730int wxString::compare(size_t nStart, size_t nLen,
731 const wxString& str,
732 size_t nStart2, size_t nLen2) const
733{
734 size_t pos, len;
735 PosLenToImpl(nStart, nLen, &pos, &len);
736
737 size_t pos2, len2;
738 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
739
740 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
741}
742
743int wxString::compare(const char* sz) const
744{
745 return m_impl.compare(ImplStr(sz));
746}
747
748int wxString::compare(const wchar_t* sz) const
749{
750 return m_impl.compare(ImplStr(sz));
751}
752
753int wxString::compare(size_t nStart, size_t nLen,
754 const char* sz, size_t nCount) const
755{
756 size_t pos, len;
757 PosLenToImpl(nStart, nLen, &pos, &len);
758
759 SubstrBufFromMB str(ImplStr(sz, nCount));
760
761 return m_impl.compare(pos, len, str.data, str.len);
762}
763
764int wxString::compare(size_t nStart, size_t nLen,
765 const wchar_t* sz, size_t nCount) const
766{
767 size_t pos, len;
768 PosLenToImpl(nStart, nLen, &pos, &len);
769
770 SubstrBufFromWC str(ImplStr(sz, nCount));
771
772 return m_impl.compare(pos, len, str.data, str.len);
773}
774
775#else // !HAVE_STD_STRING_COMPARE
776
777static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
778 const wxStringCharType* s2, size_t l2)
779{
780 if( l1 == l2 )
781 return wxStringMemcmp(s1, s2, l1);
782 else if( l1 < l2 )
783 {
784 int ret = wxStringMemcmp(s1, s2, l1);
785 return ret == 0 ? -1 : ret;
786 }
787 else
788 {
789 int ret = wxStringMemcmp(s1, s2, l2);
790 return ret == 0 ? +1 : ret;
791 }
792}
793
794int wxString::compare(const wxString& str) const
795{
796 return ::wxDoCmp(m_impl.data(), m_impl.length(),
797 str.m_impl.data(), str.m_impl.length());
798}
799
800int wxString::compare(size_t nStart, size_t nLen,
801 const wxString& str) const
802{
803 wxASSERT(nStart <= length());
804 size_type strLen = length() - nStart;
805 nLen = strLen < nLen ? strLen : nLen;
806
807 size_t pos, len;
808 PosLenToImpl(nStart, nLen, &pos, &len);
809
810 return ::wxDoCmp(m_impl.data() + pos, len,
811 str.m_impl.data(), str.m_impl.length());
812}
813
814int wxString::compare(size_t nStart, size_t nLen,
815 const wxString& str,
816 size_t nStart2, size_t nLen2) const
817{
818 wxASSERT(nStart <= length());
819 wxASSERT(nStart2 <= str.length());
820 size_type strLen = length() - nStart,
821 strLen2 = str.length() - nStart2;
822 nLen = strLen < nLen ? strLen : nLen;
823 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
824
825 size_t pos, len;
826 PosLenToImpl(nStart, nLen, &pos, &len);
827 size_t pos2, len2;
828 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
829
830 return ::wxDoCmp(m_impl.data() + pos, len,
831 str.m_impl.data() + pos2, len2);
832}
833
834int wxString::compare(const char* sz) const
835{
836 SubstrBufFromMB str(ImplStr(sz, npos));
837 if ( str.len == npos )
838 str.len = wxStringStrlen(str.data);
839 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
840}
841
842int wxString::compare(const wchar_t* sz) const
843{
844 SubstrBufFromWC str(ImplStr(sz, npos));
845 if ( str.len == npos )
846 str.len = wxStringStrlen(str.data);
847 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
848}
849
850int wxString::compare(size_t nStart, size_t nLen,
851 const char* sz, size_t nCount) const
852{
853 wxASSERT(nStart <= length());
854 size_type strLen = length() - nStart;
855 nLen = strLen < nLen ? strLen : nLen;
097c080b 856
a7ea63e2
VS
857 size_t pos, len;
858 PosLenToImpl(nStart, nLen, &pos, &len);
3168a13f 859
a7ea63e2
VS
860 SubstrBufFromMB str(ImplStr(sz, nCount));
861 if ( str.len == npos )
862 str.len = wxStringStrlen(str.data);
863
864 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
c801d85f
KB
865}
866
a7ea63e2
VS
867int wxString::compare(size_t nStart, size_t nLen,
868 const wchar_t* sz, size_t nCount) const
8f93a29f 869{
a7ea63e2
VS
870 wxASSERT(nStart <= length());
871 size_type strLen = length() - nStart;
872 nLen = strLen < nLen ? strLen : nLen;
8f93a29f 873
a7ea63e2
VS
874 size_t pos, len;
875 PosLenToImpl(nStart, nLen, &pos, &len);
8f93a29f 876
a7ea63e2
VS
877 SubstrBufFromWC str(ImplStr(sz, nCount));
878 if ( str.len == npos )
879 str.len = wxStringStrlen(str.data);
880
881 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
8f93a29f
VS
882}
883
a7ea63e2
VS
884#endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
885
886
8f93a29f
VS
887// ---------------------------------------------------------------------------
888// find_{first,last}_[not]_of functions
889// ---------------------------------------------------------------------------
890
891#if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
c801d85f 892
8f93a29f
VS
893// NB: All these functions are implemented with the argument being wxChar*,
894// i.e. widechar string in any Unicode build, even though native string
895// representation is char* in the UTF-8 build. This is because we couldn't
896// use memchr() to determine if a character is in a set encoded as UTF-8.
897
898size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
dcb68102 899{
8f93a29f 900 return find_first_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
901}
902
8f93a29f 903size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
dcb68102 904{
8f93a29f 905 return find_first_not_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
906}
907
8f93a29f 908size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 909{
9a83f860 910 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
dcb68102 911
8f93a29f
VS
912 size_t idx = nStart;
913 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 914 {
8f93a29f
VS
915 if ( wxTmemchr(sz, *i, n) )
916 return idx;
dcb68102 917 }
8f93a29f
VS
918
919 return npos;
920}
921
922size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
923{
9a83f860 924 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
8f93a29f
VS
925
926 size_t idx = nStart;
927 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 928 {
8f93a29f
VS
929 if ( !wxTmemchr(sz, *i, n) )
930 return idx;
931 }
932
933 return npos;
934}
935
936
937size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
938{
939 return find_last_of(sz, nStart, wxStrlen(sz));
940}
941
942size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
943{
944 return find_last_not_of(sz, nStart, wxStrlen(sz));
945}
946
947size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
948{
949 size_t len = length();
950
951 if ( nStart == npos )
952 {
953 nStart = len - 1;
dcb68102 954 }
2c09fb3b 955 else
dcb68102 956 {
9a83f860 957 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
dcb68102 958 }
8f93a29f
VS
959
960 size_t idx = nStart;
961 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
962 i != rend(); --idx, ++i )
963 {
964 if ( wxTmemchr(sz, *i, n) )
965 return idx;
966 }
967
968 return npos;
dcb68102
RN
969}
970
8f93a29f 971size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 972{
8f93a29f
VS
973 size_t len = length();
974
975 if ( nStart == npos )
976 {
977 nStart = len - 1;
978 }
979 else
980 {
9a83f860 981 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
8f93a29f
VS
982 }
983
984 size_t idx = nStart;
985 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
986 i != rend(); --idx, ++i )
987 {
988 if ( !wxTmemchr(sz, *i, n) )
989 return idx;
990 }
991
992 return npos;
dcb68102
RN
993}
994
8f93a29f 995size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
dcb68102 996{
9a83f860 997 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
8f93a29f
VS
998
999 size_t idx = nStart;
1000 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1001 {
1002 if ( *i != ch )
1003 return idx;
1004 }
1005
1006 return npos;
1007}
1008
1009size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1010{
1011 size_t len = length();
1012
1013 if ( nStart == npos )
1014 {
1015 nStart = len - 1;
1016 }
1017 else
1018 {
9a83f860 1019 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
8f93a29f
VS
1020 }
1021
1022 size_t idx = nStart;
1023 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1024 i != rend(); --idx, ++i )
1025 {
1026 if ( *i != ch )
1027 return idx;
1028 }
1029
1030 return npos;
1031}
1032
1033// the functions above were implemented for wchar_t* arguments in Unicode
1034// build and char* in ANSI build; below are implementations for the other
1035// version:
1036#if wxUSE_UNICODE
1037 #define wxOtherCharType char
1038 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1039#else
1040 #define wxOtherCharType wchar_t
1041 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1042#endif
1043
1044size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1045 { return find_first_of(STRCONV(sz), nStart); }
1046
1047size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1048 size_t n) const
1049 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1050size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1051 { return find_last_of(STRCONV(sz), nStart); }
1052size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1053 size_t n) const
1054 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1055size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1056 { return find_first_not_of(STRCONV(sz), nStart); }
1057size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1058 size_t n) const
1059 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1060size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1061 { return find_last_not_of(STRCONV(sz), nStart); }
1062size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1063 size_t n) const
1064 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1065
1066#undef wxOtherCharType
1067#undef STRCONV
1068
1069#endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1070
1071// ===========================================================================
1072// other common string functions
1073// ===========================================================================
1074
1075int wxString::CmpNoCase(const wxString& s) const
1076{
825d69c1
VZ
1077#if defined(__WXMSW__) && !wxUSE_UNICODE_UTF8
1078 // prefer to use CompareString() if available as it's more efficient than
1079 // doing it manual or even using wxStricmp() (see #10375)
1080 switch ( ::CompareString(LOCALE_USER_DEFAULT, NORM_IGNORECASE,
1081 m_impl.c_str(), m_impl.length(),
1082 s.m_impl.c_str(), s.m_impl.length()) )
1083 {
1084 case CSTR_LESS_THAN:
1085 return -1;
1086
1087 case CSTR_EQUAL:
1088 return 0;
1089
1090 case CSTR_GREATER_THAN:
1091 return 1;
8f93a29f 1092
825d69c1
VZ
1093 default:
1094 wxFAIL_MSG( "unexpected CompareString() return value" );
1095 // fall through
1096
1097 case 0:
1098 wxLogLastError("CompareString");
1099 // use generic code below
1100 }
1101#endif // __WXMSW__ && !wxUSE_UNICODE_UTF8
1102
1103 // do the comparison manually: notice that we can't use wxStricmp() as it
1104 // doesn't handle embedded NULs
1105
1106 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
8f93a29f
VS
1107 const_iterator i1 = begin();
1108 const_iterator end1 = end();
1109 const_iterator i2 = s.begin();
1110 const_iterator end2 = s.end();
1111
0d8b0f94 1112 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
8f93a29f
VS
1113 {
1114 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1115 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1116 if ( lower1 != lower2 )
1117 return lower1 < lower2 ? -1 : 1;
1118 }
1119
1120 size_t len1 = length();
1121 size_t len2 = s.length();
dcb68102 1122
8f93a29f
VS
1123 if ( len1 < len2 )
1124 return -1;
1125 else if ( len1 > len2 )
1126 return 1;
1127 return 0;
dcb68102
RN
1128}
1129
1130
b1ac3b56 1131#if wxUSE_UNICODE
e015c2a3 1132
cf6bedce
SC
1133#ifdef __MWERKS__
1134#ifndef __SCHAR_MAX__
1135#define __SCHAR_MAX__ 127
1136#endif
1137#endif
1138
e6310bbc 1139wxString wxString::FromAscii(const char *ascii, size_t len)
b1ac3b56 1140{
e6310bbc 1141 if (!ascii || len == 0)
b1ac3b56 1142 return wxEmptyString;
e015c2a3 1143
b1ac3b56 1144 wxString res;
e015c2a3 1145
e6310bbc 1146 {
6798451b 1147 wxStringInternalBuffer buf(res, len);
602a857b 1148 wxStringCharType *dest = buf;
c1eada83 1149
602a857b
VS
1150 for ( ; len > 0; --len )
1151 {
1152 unsigned char c = (unsigned char)*ascii++;
1153 wxASSERT_MSG( c < 0x80,
9a83f860 1154 wxT("Non-ASCII value passed to FromAscii().") );
c1eada83 1155
602a857b
VS
1156 *dest++ = (wchar_t)c;
1157 }
e015c2a3
VZ
1158 }
1159
b1ac3b56
RR
1160 return res;
1161}
1162
e6310bbc
VS
1163wxString wxString::FromAscii(const char *ascii)
1164{
0081dd72 1165 return FromAscii(ascii, wxStrlen(ascii));
e6310bbc
VS
1166}
1167
c5288c5c 1168wxString wxString::FromAscii(char ascii)
2b5f62a0
VZ
1169{
1170 // What do we do with '\0' ?
1171
c1eada83 1172 unsigned char c = (unsigned char)ascii;
8760bc65 1173
9a83f860 1174 wxASSERT_MSG( c < 0x80, wxT("Non-ASCII value passed to FromAscii().") );
c1eada83
VS
1175
1176 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1177 return wxString(wxUniChar((wchar_t)c));
2b5f62a0
VZ
1178}
1179
de4983f3 1180const wxScopedCharBuffer wxString::ToAscii() const
b1ac3b56 1181{
e015c2a3
VZ
1182 // this will allocate enough space for the terminating NUL too
1183 wxCharBuffer buffer(length());
6e394fc6 1184 char *dest = buffer.data();
e015c2a3 1185
c1eada83 1186 for ( const_iterator i = begin(); i != end(); ++i )
b1ac3b56 1187 {
c1eada83
VS
1188 wxUniChar c(*i);
1189 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1190 *dest++ = c.IsAscii() ? (char)c : '_';
e015c2a3
VZ
1191
1192 // the output string can't have embedded NULs anyhow, so we can safely
1193 // stop at first of them even if we do have any
c1eada83 1194 if ( !c )
e015c2a3 1195 break;
b1ac3b56 1196 }
e015c2a3 1197
b1ac3b56
RR
1198 return buffer;
1199}
e015c2a3 1200
c1eada83 1201#endif // wxUSE_UNICODE
b1ac3b56 1202
c801d85f 1203// extract string of length nCount starting at nFirst
c801d85f
KB
1204wxString wxString::Mid(size_t nFirst, size_t nCount) const
1205{
73f507f5 1206 size_t nLen = length();
30d9011f 1207
73f507f5
WS
1208 // default value of nCount is npos and means "till the end"
1209 if ( nCount == npos )
1210 {
1211 nCount = nLen - nFirst;
1212 }
30d9011f 1213
73f507f5
WS
1214 // out-of-bounds requests return sensible things
1215 if ( nFirst + nCount > nLen )
1216 {
1217 nCount = nLen - nFirst;
1218 }
c801d85f 1219
73f507f5
WS
1220 if ( nFirst > nLen )
1221 {
1222 // AllocCopy() will return empty string
1223 return wxEmptyString;
1224 }
c801d85f 1225
73f507f5
WS
1226 wxString dest(*this, nFirst, nCount);
1227 if ( dest.length() != nCount )
1228 {
9a83f860 1229 wxFAIL_MSG( wxT("out of memory in wxString::Mid") );
73f507f5 1230 }
30d9011f 1231
73f507f5 1232 return dest;
c801d85f
KB
1233}
1234
e87b7833 1235// check that the string starts with prefix and return the rest of the string
d775fa82 1236// in the provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1237bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
f6bcfd97 1238{
c5e7a7d7
VS
1239 if ( compare(0, prefix.length(), prefix) != 0 )
1240 return false;
f6bcfd97
BP
1241
1242 if ( rest )
1243 {
1244 // put the rest of the string into provided pointer
c5e7a7d7 1245 rest->assign(*this, prefix.length(), npos);
f6bcfd97
BP
1246 }
1247
d775fa82 1248 return true;
f6bcfd97
BP
1249}
1250
3affcd07
VZ
1251
1252// check that the string ends with suffix and return the rest of it in the
1253// provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1254bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
3affcd07 1255{
c5e7a7d7 1256 int start = length() - suffix.length();
81727065
VS
1257
1258 if ( start < 0 || compare(start, npos, suffix) != 0 )
3affcd07
VZ
1259 return false;
1260
1261 if ( rest )
1262 {
1263 // put the rest of the string into provided pointer
1264 rest->assign(*this, 0, start);
1265 }
1266
1267 return true;
1268}
1269
1270
c801d85f
KB
1271// extract nCount last (rightmost) characters
1272wxString wxString::Right(size_t nCount) const
1273{
e87b7833
MB
1274 if ( nCount > length() )
1275 nCount = length();
c801d85f 1276
e87b7833
MB
1277 wxString dest(*this, length() - nCount, nCount);
1278 if ( dest.length() != nCount ) {
9a83f860 1279 wxFAIL_MSG( wxT("out of memory in wxString::Right") );
b1801e0e 1280 }
c801d85f
KB
1281 return dest;
1282}
1283
7929902d 1284// get all characters after the last occurrence of ch
c801d85f 1285// (returns the whole string if ch not found)
c9f78968 1286wxString wxString::AfterLast(wxUniChar ch) const
c801d85f
KB
1287{
1288 wxString str;
d775fa82 1289 int iPos = Find(ch, true);
3c67202d 1290 if ( iPos == wxNOT_FOUND )
c801d85f
KB
1291 str = *this;
1292 else
c565abe1 1293 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1294
1295 return str;
1296}
1297
1298// extract nCount first (leftmost) characters
1299wxString wxString::Left(size_t nCount) const
1300{
e87b7833
MB
1301 if ( nCount > length() )
1302 nCount = length();
c801d85f 1303
e87b7833
MB
1304 wxString dest(*this, 0, nCount);
1305 if ( dest.length() != nCount ) {
9a83f860 1306 wxFAIL_MSG( wxT("out of memory in wxString::Left") );
b1801e0e 1307 }
c801d85f
KB
1308 return dest;
1309}
1310
7929902d 1311// get all characters before the first occurrence of ch
c801d85f 1312// (returns the whole string if ch not found)
c9f78968 1313wxString wxString::BeforeFirst(wxUniChar ch) const
c801d85f 1314{
e87b7833 1315 int iPos = Find(ch);
c565abe1
VZ
1316 if ( iPos == wxNOT_FOUND )
1317 iPos = length();
e87b7833 1318 return wxString(*this, 0, iPos);
c801d85f
KB
1319}
1320
7929902d 1321/// get all characters before the last occurrence of ch
c801d85f 1322/// (returns empty string if ch not found)
c9f78968 1323wxString wxString::BeforeLast(wxUniChar ch) const
c801d85f
KB
1324{
1325 wxString str;
d775fa82 1326 int iPos = Find(ch, true);
3c67202d 1327 if ( iPos != wxNOT_FOUND && iPos != 0 )
d1c9bbf6 1328 str = wxString(c_str(), iPos);
c801d85f
KB
1329
1330 return str;
1331}
1332
7929902d 1333/// get all characters after the first occurrence of ch
c801d85f 1334/// (returns empty string if ch not found)
c9f78968 1335wxString wxString::AfterFirst(wxUniChar ch) const
c801d85f
KB
1336{
1337 wxString str;
1338 int iPos = Find(ch);
3c67202d 1339 if ( iPos != wxNOT_FOUND )
c565abe1 1340 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1341
1342 return str;
1343}
1344
7929902d 1345// replace first (or all) occurrences of some substring with another one
8a540c88
VS
1346size_t wxString::Replace(const wxString& strOld,
1347 const wxString& strNew, bool bReplaceAll)
c801d85f 1348{
a8f1f1b2 1349 // if we tried to replace an empty string we'd enter an infinite loop below
8a540c88 1350 wxCHECK_MSG( !strOld.empty(), 0,
9a83f860 1351 wxT("wxString::Replace(): invalid parameter") );
a8f1f1b2 1352
68482dc5
VZ
1353 wxSTRING_INVALIDATE_CACHE();
1354
510bb748 1355 size_t uiCount = 0; // count of replacements made
c801d85f 1356
8a627032
VZ
1357 // optimize the special common case: replacement of one character by
1358 // another one (in UTF-8 case we can only do this for ASCII characters)
1359 //
1360 // benchmarks show that this special version is around 3 times faster
1361 // (depending on the proportion of matching characters and UTF-8/wchar_t
1362 // build)
1363 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1364 {
1365 const wxStringCharType chOld = strOld.m_impl[0],
1366 chNew = strNew.m_impl[0];
1367
1368 // this loop is the simplified version of the one below
1369 for ( size_t pos = 0; ; )
1370 {
1371 pos = m_impl.find(chOld, pos);
1372 if ( pos == npos )
1373 break;
c801d85f 1374
8a627032
VZ
1375 m_impl[pos++] = chNew;
1376
1377 uiCount++;
1378
1379 if ( !bReplaceAll )
1380 break;
1381 }
1382 }
072682ce
VZ
1383 else if ( !bReplaceAll)
1384 {
1385 size_t pos = m_impl.find(strOld, 0);
1386 if ( pos != npos )
1387 {
1388 m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl);
1389 uiCount = 1;
1390 }
1391 }
1392 else // replace all occurrences
510bb748 1393 {
8a627032
VZ
1394 const size_t uiOldLen = strOld.m_impl.length();
1395 const size_t uiNewLen = strNew.m_impl.length();
1396
072682ce
VZ
1397 // first scan the string to find all positions at which the replacement
1398 // should be made
1399 wxVector<size_t> replacePositions;
1400
1401 size_t pos;
1402 for ( pos = m_impl.find(strOld.m_impl, 0);
1403 pos != npos;
1404 pos = m_impl.find(strOld.m_impl, pos + uiOldLen))
8a627032 1405 {
072682ce
VZ
1406 replacePositions.push_back(pos);
1407 ++uiCount;
1408 }
510bb748 1409
072682ce
VZ
1410 if ( !uiCount )
1411 return 0;
510bb748 1412
072682ce
VZ
1413 // allocate enough memory for the whole new string
1414 wxString tmp;
1415 tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen));
ad5bb7d6 1416
072682ce
VZ
1417 // copy this string to tmp doing replacements on the fly
1418 size_t replNum = 0;
1419 for ( pos = 0; replNum < uiCount; replNum++ )
1420 {
1421 const size_t nextReplPos = replacePositions[replNum];
394b2900 1422
072682ce
VZ
1423 if ( pos != nextReplPos )
1424 {
1425 tmp.m_impl.append(m_impl, pos, nextReplPos - pos);
1426 }
1427
1428 tmp.m_impl.append(strNew.m_impl);
1429 pos = nextReplPos + uiOldLen;
8a627032 1430 }
072682ce
VZ
1431
1432 if ( pos != m_impl.length() )
1433 {
1434 // append the rest of the string unchanged
1435 tmp.m_impl.append(m_impl, pos, m_impl.length() - pos);
1436 }
1437
1438 swap(tmp);
c801d85f 1439 }
c801d85f 1440
510bb748 1441 return uiCount;
c801d85f
KB
1442}
1443
1444bool wxString::IsAscii() const
1445{
a4a44612
VS
1446 for ( const_iterator i = begin(); i != end(); ++i )
1447 {
1448 if ( !(*i).IsAscii() )
1449 return false;
1450 }
1451
1452 return true;
c801d85f 1453}
dd1eaa89 1454
c801d85f
KB
1455bool wxString::IsWord() const
1456{
a4a44612
VS
1457 for ( const_iterator i = begin(); i != end(); ++i )
1458 {
1459 if ( !wxIsalpha(*i) )
1460 return false;
1461 }
1462
1463 return true;
c801d85f 1464}
dd1eaa89 1465
c801d85f
KB
1466bool wxString::IsNumber() const
1467{
a4a44612
VS
1468 if ( empty() )
1469 return true;
1470
1471 const_iterator i = begin();
1472
9a83f860 1473 if ( *i == wxT('-') || *i == wxT('+') )
a4a44612
VS
1474 ++i;
1475
1476 for ( ; i != end(); ++i )
1477 {
1478 if ( !wxIsdigit(*i) )
1479 return false;
1480 }
1481
1482 return true;
c801d85f
KB
1483}
1484
c801d85f
KB
1485wxString wxString::Strip(stripType w) const
1486{
1487 wxString s = *this;
d775fa82
WS
1488 if ( w & leading ) s.Trim(false);
1489 if ( w & trailing ) s.Trim(true);
c801d85f
KB
1490 return s;
1491}
1492
c801d85f
KB
1493// ---------------------------------------------------------------------------
1494// case conversion
1495// ---------------------------------------------------------------------------
1496
1497wxString& wxString::MakeUpper()
1498{
e87b7833
MB
1499 for ( iterator it = begin(), en = end(); it != en; ++it )
1500 *it = (wxChar)wxToupper(*it);
c801d85f
KB
1501
1502 return *this;
1503}
1504
1505wxString& wxString::MakeLower()
1506{
e87b7833
MB
1507 for ( iterator it = begin(), en = end(); it != en; ++it )
1508 *it = (wxChar)wxTolower(*it);
c801d85f
KB
1509
1510 return *this;
1511}
1512
0c7db140
VZ
1513wxString& wxString::MakeCapitalized()
1514{
1515 const iterator en = end();
1516 iterator it = begin();
1517 if ( it != en )
1518 {
1519 *it = (wxChar)wxToupper(*it);
1520 for ( ++it; it != en; ++it )
1521 *it = (wxChar)wxTolower(*it);
1522 }
1523
1524 return *this;
1525}
1526
c801d85f
KB
1527// ---------------------------------------------------------------------------
1528// trimming and padding
1529// ---------------------------------------------------------------------------
1530
d775fa82 1531// some compilers (VC++ 6.0 not to name them) return true for a call to
9d55bfef 1532// isspace('\xEA') in the C locale which seems to be broken to me, but we have
c95e653c 1533// to live with this by checking that the character is a 7 bit one - even if
9d55bfef 1534// this may fail to detect some spaces (I don't know if Unicode doesn't have
576c608d
VZ
1535// space-like symbols somewhere except in the first 128 chars), it is arguably
1536// still better than trimming away accented letters
1537inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1538
c801d85f
KB
1539// trims spaces (in the sense of isspace) from left or right side
1540wxString& wxString::Trim(bool bFromRight)
1541{
3458e408
WS
1542 // first check if we're going to modify the string at all
1543 if ( !empty() &&
1544 (
1545 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1546 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1547 )
2c3b684c 1548 )
2c3b684c 1549 {
3458e408
WS
1550 if ( bFromRight )
1551 {
1552 // find last non-space character
d4d02bd5 1553 reverse_iterator psz = rbegin();
32c62191 1554 while ( (psz != rend()) && wxSafeIsspace(*psz) )
0d8b0f94 1555 ++psz;
92df97b8 1556
3458e408 1557 // truncate at trailing space start
d4d02bd5 1558 erase(psz.base(), end());
3458e408
WS
1559 }
1560 else
1561 {
1562 // find first non-space character
1563 iterator psz = begin();
32c62191 1564 while ( (psz != end()) && wxSafeIsspace(*psz) )
0d8b0f94 1565 ++psz;
2c3b684c 1566
3458e408
WS
1567 // fix up data and length
1568 erase(begin(), psz);
1569 }
2c3b684c 1570 }
c801d85f 1571
3458e408 1572 return *this;
c801d85f
KB
1573}
1574
1575// adds nCount characters chPad to the string from either side
c9f78968 1576wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
c801d85f 1577{
3458e408 1578 wxString s(chPad, nCount);
c801d85f 1579
3458e408
WS
1580 if ( bFromRight )
1581 *this += s;
1582 else
1583 {
1584 s += *this;
1585 swap(s);
1586 }
c801d85f 1587
3458e408 1588 return *this;
c801d85f
KB
1589}
1590
1591// truncate the string
1592wxString& wxString::Truncate(size_t uiLen)
1593{
3458e408
WS
1594 if ( uiLen < length() )
1595 {
1596 erase(begin() + uiLen, end());
1597 }
1598 //else: nothing to do, string is already short enough
c801d85f 1599
3458e408 1600 return *this;
c801d85f
KB
1601}
1602
1603// ---------------------------------------------------------------------------
3c67202d 1604// finding (return wxNOT_FOUND if not found and index otherwise)
c801d85f
KB
1605// ---------------------------------------------------------------------------
1606
1607// find a character
c9f78968 1608int wxString::Find(wxUniChar ch, bool bFromEnd) const
c801d85f 1609{
3458e408 1610 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
c801d85f 1611
3458e408 1612 return (idx == npos) ? wxNOT_FOUND : (int)idx;
c801d85f
KB
1613}
1614
cd0b1709
VZ
1615// ----------------------------------------------------------------------------
1616// conversion to numbers
1617// ----------------------------------------------------------------------------
1618
52de37c7
VS
1619// The implementation of all the functions below is exactly the same so factor
1620// it out. Note that number extraction works correctly on UTF-8 strings, so
1621// we can use wxStringCharType and wx_str() for maximum efficiency.
122f3c5d 1622
92df97b8 1623#ifndef __WXWINCE__
941a4e62
VS
1624 #define DO_IF_NOT_WINCE(x) x
1625#else
1626 #define DO_IF_NOT_WINCE(x)
92df97b8 1627#endif
4ea4767e 1628
529e491c 1629#define WX_STRING_TO_X_TYPE_START \
9a83f860 1630 wxCHECK_MSG( pVal, false, wxT("NULL output pointer") ); \
941a4e62 1631 DO_IF_NOT_WINCE( errno = 0; ) \
941a4e62 1632 const wxStringCharType *start = wx_str(); \
529e491c
FM
1633 wxStringCharType *end;
1634
69d31e31
VZ
1635// notice that we return false without modifying the output parameter at all if
1636// nothing could be parsed but we do modify it and return false then if we did
1637// parse something successfully but not the entire string
529e491c 1638#define WX_STRING_TO_X_TYPE_END \
69d31e31 1639 if ( end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
c95e653c 1640 return false; \
529e491c 1641 *pVal = val; \
69d31e31 1642 return !*end;
cd0b1709 1643
c95e653c 1644bool wxString::ToLong(long *pVal, int base) const
cd0b1709 1645{
9a83f860 1646 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
529e491c
FM
1647
1648 WX_STRING_TO_X_TYPE_START
1649 long val = wxStrtol(start, &end, base);
1650 WX_STRING_TO_X_TYPE_END
619dcb09 1651}
cd0b1709 1652
c95e653c 1653bool wxString::ToULong(unsigned long *pVal, int base) const
619dcb09 1654{
9a83f860 1655 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
529e491c
FM
1656
1657 WX_STRING_TO_X_TYPE_START
1658 unsigned long val = wxStrtoul(start, &end, base);
1659 WX_STRING_TO_X_TYPE_END
cd0b1709
VZ
1660}
1661
c95e653c 1662bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
d6718dd1 1663{
9a83f860 1664 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
529e491c
FM
1665
1666 WX_STRING_TO_X_TYPE_START
1667 wxLongLong_t val = wxStrtoll(start, &end, base);
1668 WX_STRING_TO_X_TYPE_END
d6718dd1
VZ
1669}
1670
c95e653c 1671bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
d6718dd1 1672{
9a83f860 1673 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
529e491c
FM
1674
1675 WX_STRING_TO_X_TYPE_START
1676 wxULongLong_t val = wxStrtoull(start, &end, base);
1677 WX_STRING_TO_X_TYPE_END
d6718dd1
VZ
1678}
1679
c95e653c 1680bool wxString::ToDouble(double *pVal) const
cd0b1709 1681{
529e491c
FM
1682 WX_STRING_TO_X_TYPE_START
1683 double val = wxStrtod(start, &end);
1684 WX_STRING_TO_X_TYPE_END
1685}
cd0b1709 1686
529e491c 1687#if wxUSE_XLOCALE
e71e5b37 1688
529e491c
FM
1689bool wxString::ToCLong(long *pVal, int base) const
1690{
9a83f860 1691 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
cd0b1709 1692
529e491c 1693 WX_STRING_TO_X_TYPE_START
a51fdf81 1694#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
529e491c
FM
1695 long val = wxStrtol_lA(start, &end, base, wxCLocale);
1696#else
1697 long val = wxStrtol_l(start, &end, base, wxCLocale);
1698#endif
1699 WX_STRING_TO_X_TYPE_END
1700}
c95e653c 1701
529e491c
FM
1702bool wxString::ToCULong(unsigned long *pVal, int base) const
1703{
9a83f860 1704 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
c95e653c 1705
529e491c 1706 WX_STRING_TO_X_TYPE_START
a51fdf81 1707#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
529e491c
FM
1708 unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale);
1709#else
1710 unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale);
1711#endif
1712 WX_STRING_TO_X_TYPE_END
cd0b1709
VZ
1713}
1714
529e491c
FM
1715bool wxString::ToCDouble(double *pVal) const
1716{
1717 WX_STRING_TO_X_TYPE_START
a51fdf81 1718#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
529e491c
FM
1719 double val = wxStrtod_lA(start, &end, wxCLocale);
1720#else
1721 double val = wxStrtod_l(start, &end, wxCLocale);
1722#endif
1723 WX_STRING_TO_X_TYPE_END
1724}
1725
1726#endif // wxUSE_XLOCALE
1727
c801d85f 1728// ---------------------------------------------------------------------------
9efd3367 1729// formatted output
c801d85f 1730// ---------------------------------------------------------------------------
378b05f7 1731
d1f6e2cf 1732#if !wxUSE_UTF8_LOCALE_ONLY
341e7d28 1733/* static */
c9f78968 1734#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1528e0b8 1735wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
c9f78968 1736#else
d1f6e2cf 1737wxString wxString::DoFormatWchar(const wxChar *format, ...)
c9f78968 1738#endif
341e7d28 1739{
77c3e48a 1740 va_list argptr;
c9f78968 1741 va_start(argptr, format);
341e7d28 1742
77c3e48a 1743 wxString s;
c9f78968 1744 s.PrintfV(format, argptr);
341e7d28 1745
77c3e48a 1746 va_end(argptr);
341e7d28 1747
77c3e48a 1748 return s;
341e7d28 1749}
d1f6e2cf
VS
1750#endif // !wxUSE_UTF8_LOCALE_ONLY
1751
1752#if wxUSE_UNICODE_UTF8
1753/* static */
1754wxString wxString::DoFormatUtf8(const char *format, ...)
1755{
1756 va_list argptr;
1757 va_start(argptr, format);
1758
1759 wxString s;
1760 s.PrintfV(format, argptr);
1761
1762 va_end(argptr);
1763
1764 return s;
1765}
1766#endif // wxUSE_UNICODE_UTF8
341e7d28
VZ
1767
1768/* static */
c9f78968 1769wxString wxString::FormatV(const wxString& format, va_list argptr)
341e7d28
VZ
1770{
1771 wxString s;
c9f78968 1772 s.PrintfV(format, argptr);
341e7d28
VZ
1773 return s;
1774}
1775
d1f6e2cf 1776#if !wxUSE_UTF8_LOCALE_ONLY
c9f78968 1777#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
d1f6e2cf 1778int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1779#else
d1f6e2cf 1780int wxString::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1781#endif
c801d85f 1782{
ba9bbf13 1783 va_list argptr;
c9f78968 1784 va_start(argptr, format);
c801d85f 1785
c9f78968
VS
1786#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1787 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1788 // because it's the only cast that works safely for downcasting when
1789 // multiple inheritance is used:
1790 wxString *str = static_cast<wxString*>(this);
1791#else
1792 wxString *str = this;
1793#endif
1794
1795 int iLen = str->PrintfV(format, argptr);
c801d85f 1796
ba9bbf13 1797 va_end(argptr);
c801d85f 1798
ba9bbf13 1799 return iLen;
c801d85f 1800}
d1f6e2cf
VS
1801#endif // !wxUSE_UTF8_LOCALE_ONLY
1802
1803#if wxUSE_UNICODE_UTF8
1804int wxString::DoPrintfUtf8(const char *format, ...)
1805{
1806 va_list argptr;
1807 va_start(argptr, format);
1808
1809 int iLen = PrintfV(format, argptr);
1810
1811 va_end(argptr);
1812
1813 return iLen;
1814}
1815#endif // wxUSE_UNICODE_UTF8
c801d85f 1816
67612ff1
DE
1817/*
1818 Uses wxVsnprintf and places the result into the this string.
1819
1820 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1821 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1822 the ISO C99 (and thus SUSv3) standard the return value for the case of
1823 an undersized buffer is inconsistent. For conforming vsnprintf
1824 implementations the function must return the number of characters that
1825 would have been printed had the buffer been large enough. For conforming
1826 vswprintf implementations the function must return a negative number
1827 and set errno.
1828
1829 What vswprintf sets errno to is undefined but Darwin seems to set it to
a9a854d7
DE
1830 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1831 those are defined in the standard and backed up by several conformance
1832 statements. Note that ENOMEM mentioned in the manual page does not
1833 apply to swprintf, only wprintf and fwprintf.
1834
1835 Official manual page:
1836 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1837
1838 Some conformance statements (AIX, Solaris):
1839 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1840 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
1841
1842 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
1843 EILSEQ and EINVAL are specifically defined to mean the error is other than
1844 an undersized buffer and no other errno are defined we treat those two
1845 as meaning hard errors and everything else gets the old behavior which
1846 is to keep looping and increasing buffer size until the function succeeds.
c95e653c 1847
67612ff1
DE
1848 In practice it's impossible to determine before compilation which behavior
1849 may be used. The vswprintf function may have vsnprintf-like behavior or
1850 vice-versa. Behavior detected on one release can theoretically change
1851 with an updated release. Not to mention that configure testing for it
1852 would require the test to be run on the host system, not the build system
1853 which makes cross compilation difficult. Therefore, we make no assumptions
1854 about behavior and try our best to handle every known case, including the
1855 case where wxVsnprintf returns a negative number and fails to set errno.
1856
1857 There is yet one more non-standard implementation and that is our own.
1858 Fortunately, that can be detected at compile-time.
1859
1860 On top of all that, ISO C99 explicitly defines snprintf to write a null
1861 character to the last position of the specified buffer. That would be at
1862 at the given buffer size minus 1. It is supposed to do this even if it
1863 turns out that the buffer is sized too small.
1864
1865 Darwin (tested on 10.5) follows the C99 behavior exactly.
1866
1867 Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
1868 errno even when it fails. However, it only seems to ever fail due
1869 to an undersized buffer.
1870*/
2523e9b7
VS
1871#if wxUSE_UNICODE_UTF8
1872template<typename BufferType>
1873#else
1874// we only need one version in non-UTF8 builds and at least two Windows
1875// compilers have problems with this function template, so use just one
1876// normal function here
1877#endif
1878static int DoStringPrintfV(wxString& str,
1879 const wxString& format, va_list argptr)
c801d85f 1880{
f6f5941b 1881 int size = 1024;
e87b7833 1882
f6f5941b
VZ
1883 for ( ;; )
1884 {
2523e9b7
VS
1885#if wxUSE_UNICODE_UTF8
1886 BufferType tmp(str, size + 1);
1887 typename BufferType::CharType *buf = tmp;
1888#else
1889 wxStringBuffer tmp(str, size + 1);
de2589be 1890 wxChar *buf = tmp;
2523e9b7 1891#endif
2bb67b80 1892
ba9bbf13
WS
1893 if ( !buf )
1894 {
1895 // out of memory
a33c7045
VS
1896
1897 // in UTF-8 build, leaving uninitialized junk in the buffer
1898 // could result in invalid non-empty UTF-8 string, so just
1899 // reset the string to empty on failure:
1900 buf[0] = '\0';
ba9bbf13 1901 return -1;
e87b7833 1902 }
f6f5941b 1903
ba9bbf13
WS
1904 // wxVsnprintf() may modify the original arg pointer, so pass it
1905 // only a copy
1906 va_list argptrcopy;
1907 wxVaCopy(argptrcopy, argptr);
67612ff1
DE
1908
1909#ifndef __WXWINCE__
1910 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
1911 errno = 0;
1912#endif
2523e9b7 1913 int len = wxVsnprintf(buf, size, format, argptrcopy);
ba9bbf13
WS
1914 va_end(argptrcopy);
1915
1916 // some implementations of vsnprintf() don't NUL terminate
1917 // the string if there is not enough space for it so
1918 // always do it manually
67612ff1
DE
1919 // FIXME: This really seems to be the wrong and would be an off-by-one
1920 // bug except the code above allocates an extra character.
9a83f860 1921 buf[size] = wxT('\0');
ba9bbf13 1922
caff62f2
VZ
1923 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
1924 // total number of characters which would have been written if the
b1727cfe 1925 // buffer were large enough (newer standards such as Unix98)
de2589be
VZ
1926 if ( len < 0 )
1927 {
52de37c7
VS
1928 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
1929 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
1930 // is true if *both* of them use our own implementation,
1931 // otherwise we can't be sure
f2bbe5b6
VZ
1932#if wxUSE_WXVSNPRINTF
1933 // we know that our own implementation of wxVsnprintf() returns -1
1934 // only for a format error - thus there's something wrong with
1935 // the user's format string
a33c7045 1936 buf[0] = '\0';
f2bbe5b6 1937 return -1;
52de37c7
VS
1938#else // possibly using system version
1939 // assume it only returns error if there is not enough space, but
1940 // as we don't know how much we need, double the current size of
1941 // the buffer
67612ff1 1942#ifndef __WXWINCE__
a9a854d7
DE
1943 if( (errno == EILSEQ) || (errno == EINVAL) )
1944 // If errno was set to one of the two well-known hard errors
1945 // then fail immediately to avoid an infinite loop.
1946 return -1;
1947 else
1948#endif // __WXWINCE__
67612ff1
DE
1949 // still not enough, as we don't know how much we need, double the
1950 // current size of the buffer
1951 size *= 2;
f2bbe5b6 1952#endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
de2589be 1953 }
64f8f94c 1954 else if ( len >= size )
de2589be 1955 {
f2bbe5b6 1956#if wxUSE_WXVSNPRINTF
c95e653c 1957 // we know that our own implementation of wxVsnprintf() returns
f2bbe5b6
VZ
1958 // size+1 when there's not enough space but that's not the size
1959 // of the required buffer!
1960 size *= 2; // so we just double the current size of the buffer
1961#else
64f8f94c
VZ
1962 // some vsnprintf() implementations NUL-terminate the buffer and
1963 // some don't in len == size case, to be safe always add 1
67612ff1
DE
1964 // FIXME: I don't quite understand this comment. The vsnprintf
1965 // function is specifically defined to return the number of
1966 // characters printed not including the null terminator.
1967 // So OF COURSE you need to add 1 to get the right buffer size.
1968 // The following line is definitely correct, no question.
64f8f94c 1969 size = len + 1;
f2bbe5b6 1970#endif
de2589be
VZ
1971 }
1972 else // ok, there was enough space
f6f5941b 1973 {
f6f5941b
VZ
1974 break;
1975 }
f6f5941b
VZ
1976 }
1977
1978 // we could have overshot
2523e9b7
VS
1979 str.Shrink();
1980
1981 return str.length();
1982}
c801d85f 1983
2523e9b7
VS
1984int wxString::PrintfV(const wxString& format, va_list argptr)
1985{
2523e9b7
VS
1986#if wxUSE_UNICODE_UTF8
1987 #if wxUSE_STL_BASED_WXSTRING
1988 typedef wxStringTypeBuffer<char> Utf8Buffer;
1989 #else
6798451b 1990 typedef wxStringInternalBuffer Utf8Buffer;
2523e9b7
VS
1991 #endif
1992#endif
1993
1994#if wxUSE_UTF8_LOCALE_ONLY
c6255a6e 1995 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
1996#else
1997 #if wxUSE_UNICODE_UTF8
1998 if ( wxLocaleIsUtf8 )
c6255a6e 1999 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
2000 else
2001 // wxChar* version
c6255a6e 2002 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2523e9b7 2003 #else
c6255a6e 2004 return DoStringPrintfV(*this, format, argptr);
2523e9b7
VS
2005 #endif // UTF8/WCHAR
2006#endif
c801d85f
KB
2007}
2008
097c080b
VZ
2009// ----------------------------------------------------------------------------
2010// misc other operations
2011// ----------------------------------------------------------------------------
0c5d3e1c 2012
d775fa82 2013// returns true if the string matches the pattern which may contain '*' and
0c5d3e1c
VZ
2014// '?' metacharacters (as usual, '?' matches any character and '*' any number
2015// of them)
8a540c88 2016bool wxString::Matches(const wxString& mask) const
097c080b 2017{
d6044f58
VZ
2018 // I disable this code as it doesn't seem to be faster (in fact, it seems
2019 // to be much slower) than the old, hand-written code below and using it
2020 // here requires always linking with libregex even if the user code doesn't
2021 // use it
2022#if 0 // wxUSE_REGEX
706c2ac9
VZ
2023 // first translate the shell-like mask into a regex
2024 wxString pattern;
2025 pattern.reserve(wxStrlen(pszMask));
2026
9a83f860 2027 pattern += wxT('^');
706c2ac9
VZ
2028 while ( *pszMask )
2029 {
2030 switch ( *pszMask )
2031 {
9a83f860
VZ
2032 case wxT('?'):
2033 pattern += wxT('.');
706c2ac9
VZ
2034 break;
2035
9a83f860
VZ
2036 case wxT('*'):
2037 pattern += wxT(".*");
706c2ac9
VZ
2038 break;
2039
9a83f860
VZ
2040 case wxT('^'):
2041 case wxT('.'):
2042 case wxT('$'):
2043 case wxT('('):
2044 case wxT(')'):
2045 case wxT('|'):
2046 case wxT('+'):
2047 case wxT('\\'):
706c2ac9
VZ
2048 // these characters are special in a RE, quote them
2049 // (however note that we don't quote '[' and ']' to allow
2050 // using them for Unix shell like matching)
9a83f860 2051 pattern += wxT('\\');
706c2ac9
VZ
2052 // fall through
2053
2054 default:
2055 pattern += *pszMask;
2056 }
2057
2058 pszMask++;
2059 }
9a83f860 2060 pattern += wxT('$');
706c2ac9
VZ
2061
2062 // and now use it
2063 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2064#else // !wxUSE_REGEX
9a4232dc
VZ
2065 // TODO: this is, of course, awfully inefficient...
2066
8a540c88
VS
2067 // FIXME-UTF8: implement using iterators, remove #if
2068#if wxUSE_UNICODE_UTF8
de4983f3
VS
2069 const wxScopedWCharBuffer maskBuf = mask.wc_str();
2070 const wxScopedWCharBuffer txtBuf = wc_str();
8a540c88
VS
2071 const wxChar *pszMask = maskBuf.data();
2072 const wxChar *pszTxt = txtBuf.data();
2073#else
2074 const wxChar *pszMask = mask.wx_str();
9a4232dc 2075 // the char currently being checked
8a540c88
VS
2076 const wxChar *pszTxt = wx_str();
2077#endif
9a4232dc
VZ
2078
2079 // the last location where '*' matched
2080 const wxChar *pszLastStarInText = NULL;
2081 const wxChar *pszLastStarInMask = NULL;
2082
2083match:
2084 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
097c080b 2085 switch ( *pszMask ) {
223d09f6
KB
2086 case wxT('?'):
2087 if ( *pszTxt == wxT('\0') )
d775fa82 2088 return false;
097c080b 2089
9a4232dc 2090 // pszTxt and pszMask will be incremented in the loop statement
0c5d3e1c 2091
097c080b
VZ
2092 break;
2093
223d09f6 2094 case wxT('*'):
097c080b 2095 {
9a4232dc
VZ
2096 // remember where we started to be able to backtrack later
2097 pszLastStarInText = pszTxt;
2098 pszLastStarInMask = pszMask;
2099
097c080b 2100 // ignore special chars immediately following this one
9a4232dc 2101 // (should this be an error?)
223d09f6 2102 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
097c080b
VZ
2103 pszMask++;
2104
2105 // if there is nothing more, match
223d09f6 2106 if ( *pszMask == wxT('\0') )
d775fa82 2107 return true;
097c080b
VZ
2108
2109 // are there any other metacharacters in the mask?
c86f1403 2110 size_t uiLenMask;
223d09f6 2111 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
097c080b
VZ
2112
2113 if ( pEndMask != NULL ) {
2114 // we have to match the string between two metachars
2115 uiLenMask = pEndMask - pszMask;
2116 }
2117 else {
2118 // we have to match the remainder of the string
2bb67b80 2119 uiLenMask = wxStrlen(pszMask);
097c080b
VZ
2120 }
2121
2122 wxString strToMatch(pszMask, uiLenMask);
2bb67b80 2123 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
097c080b 2124 if ( pMatch == NULL )
d775fa82 2125 return false;
097c080b
VZ
2126
2127 // -1 to compensate "++" in the loop
2128 pszTxt = pMatch + uiLenMask - 1;
2129 pszMask += uiLenMask - 1;
2130 }
2131 break;
2132
2133 default:
2134 if ( *pszMask != *pszTxt )
d775fa82 2135 return false;
097c080b
VZ
2136 break;
2137 }
2138 }
2139
2140 // match only if nothing left
9a4232dc 2141 if ( *pszTxt == wxT('\0') )
d775fa82 2142 return true;
9a4232dc
VZ
2143
2144 // if we failed to match, backtrack if we can
2145 if ( pszLastStarInText ) {
2146 pszTxt = pszLastStarInText + 1;
2147 pszMask = pszLastStarInMask;
2148
2149 pszLastStarInText = NULL;
2150
2151 // don't bother resetting pszLastStarInMask, it's unnecessary
2152
2153 goto match;
2154 }
2155
d775fa82 2156 return false;
706c2ac9 2157#endif // wxUSE_REGEX/!wxUSE_REGEX
097c080b
VZ
2158}
2159
1fc5dd6f 2160// Count the number of chars
c9f78968 2161int wxString::Freq(wxUniChar ch) const
1fc5dd6f
JS
2162{
2163 int count = 0;
8f93a29f 2164 for ( const_iterator i = begin(); i != end(); ++i )
1fc5dd6f 2165 {
8f93a29f 2166 if ( *i == ch )
1fc5dd6f
JS
2167 count ++;
2168 }
2169 return count;
2170}
4e79262f 2171