]> git.saurik.com Git - wxWidgets.git/blame - src/common/string.cpp
Fix ribbon documentation warnings.
[wxWidgets.git] / src / common / string.cpp
CommitLineData
c801d85f 1/////////////////////////////////////////////////////////////////////////////
8898456d 2// Name: src/common/string.cpp
c801d85f 3// Purpose: wxString class
59059feb 4// Author: Vadim Zeitlin, Ryan Norton
c801d85f
KB
5// Modified by:
6// Created: 29/01/98
c801d85f 7// Copyright: (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
59059feb 8// (c) 2004 Ryan Norton <wxprojects@comcast.net>
65571936 9// Licence: wxWindows licence
c801d85f
KB
10/////////////////////////////////////////////////////////////////////////////
11
c801d85f
KB
12// ===========================================================================
13// headers, declarations, constants
14// ===========================================================================
15
16// For compilers that support precompilation, includes "wx.h".
17#include "wx/wxprec.h"
18
19#ifdef __BORLANDC__
8898456d 20 #pragma hdrstop
c801d85f
KB
21#endif
22
23#ifndef WX_PRECOMP
8898456d 24 #include "wx/string.h"
2523e9b7 25 #include "wx/wxcrtvararg.h"
105993f7 26 #include "wx/intl.h"
ba7e7253 27 #include "wx/log.h"
6b769f3d 28#endif
c801d85f
KB
29
30#include <ctype.h>
92df97b8
WS
31
32#ifndef __WXWINCE__
33 #include <errno.h>
34#endif
35
c801d85f
KB
36#include <string.h>
37#include <stdlib.h>
9a08c20e 38
8116a0c5 39#include "wx/hashmap.h"
072682ce 40#include "wx/vector.h"
529e491c 41#include "wx/xlocale.h"
8f93a29f 42
d98a58c5 43#ifdef __WINDOWS__
825d69c1 44 #include "wx/msw/wrapwin.h"
d98a58c5 45#endif // __WINDOWS__
825d69c1 46
951201d8
VZ
47#if wxUSE_STD_IOSTREAM
48 #include <sstream>
49#endif
50
8f93a29f
VS
51// string handling functions used by wxString:
52#if wxUSE_UNICODE_UTF8
53 #define wxStringMemcpy memcpy
54 #define wxStringMemcmp memcmp
55 #define wxStringMemchr memchr
56 #define wxStringStrlen strlen
57#else
58 #define wxStringMemcpy wxTmemcpy
59 #define wxStringMemcmp wxTmemcmp
a7ea63e2
VS
60 #define wxStringMemchr wxTmemchr
61 #define wxStringStrlen wxStrlen
62#endif
8f93a29f 63
b96a56e6
VZ
64// define a function declared in wx/buffer.h here as we don't have buffer.cpp
65// and don't want to add it just because of this simple function
4e79262f
VZ
66namespace wxPrivate
67{
68
b96a56e6
VZ
69// wxXXXBuffer classes can be (implicitly) used during global statics
70// initialization so wrap the status UntypedBufferData variable in a function
71// to make it safe to access it even before all global statics are initialized
72UntypedBufferData *GetUntypedNullData()
73{
74 static UntypedBufferData s_untypedNullData(NULL, 0);
4e79262f 75
b96a56e6
VZ
76 return &s_untypedNullData;
77}
4e79262f
VZ
78
79} // namespace wxPrivate
e87b7833 80
a7ea63e2
VS
81// ---------------------------------------------------------------------------
82// static class variables definition
83// ---------------------------------------------------------------------------
e87b7833 84
a7ea63e2
VS
85//According to STL _must_ be a -1 size_t
86const size_t wxString::npos = (size_t) -1;
8f93a29f 87
68482dc5 88#if wxUSE_STRING_POS_CACHE
68482dc5 89
e810df36
VZ
90#ifdef wxHAS_COMPILER_TLS
91
92wxTLS_TYPE(wxString::Cache) wxString::ms_cache;
93
94#else // !wxHAS_COMPILER_TLS
95
ad8ae788
VZ
96struct wxStrCacheInitializer
97{
98 wxStrCacheInitializer()
99 {
100 // calling this function triggers s_cache initialization in it, and
101 // from now on it becomes safe to call from multiple threads
102 wxString::GetCache();
103 }
104};
105
e317bd3f
SC
106/*
107wxString::Cache& wxString::GetCache()
108{
109 static wxTLS_TYPE(Cache) s_cache;
110
111 return wxTLS_VALUE(s_cache);
112}
113*/
114
ad8ae788
VZ
115static wxStrCacheInitializer gs_stringCacheInit;
116
e810df36
VZ
117#endif // wxHAS_COMPILER_TLS/!wxHAS_COMPILER_TLS
118
68482dc5
VZ
119// gdb seems to be unable to display thread-local variables correctly, at least
120// not my 6.4.98 version under amd64, so provide this debugging helper to do it
4b6a582b 121#if wxDEBUG_LEVEL >= 2
68482dc5
VZ
122
123struct wxStrCacheDumper
124{
125 static void ShowAll()
126 {
127 puts("*** wxString cache dump:");
128 for ( unsigned n = 0; n < wxString::Cache::SIZE; n++ )
129 {
130 const wxString::Cache::Element&
8b73c531 131 c = wxString::GetCacheBegin()[n];
68482dc5
VZ
132
133 printf("\t%u%s\t%p: pos=(%lu, %lu), len=%ld\n",
134 n,
8b73c531 135 n == wxString::LastUsedCacheElement() ? " [*]" : "",
68482dc5
VZ
136 c.str,
137 (unsigned long)c.pos,
138 (unsigned long)c.impl,
139 (long)c.len);
140 }
141 }
142};
143
144void wxDumpStrCache() { wxStrCacheDumper::ShowAll(); }
145
4b6a582b 146#endif // wxDEBUG_LEVEL >= 2
68482dc5
VZ
147
148#ifdef wxPROFILE_STRING_CACHE
149
150wxString::CacheStats wxString::ms_cacheStats;
151
8c3b65d9 152struct wxStrCacheStatsDumper
68482dc5 153{
8c3b65d9 154 ~wxStrCacheStatsDumper()
68482dc5
VZ
155 {
156 const wxString::CacheStats& stats = wxString::ms_cacheStats;
157
158 if ( stats.postot )
159 {
160 puts("*** wxString cache statistics:");
161 printf("\tTotal non-trivial calls to PosToImpl(): %u\n",
162 stats.postot);
163 printf("\tHits %u (of which %u not used) or %.2f%%\n",
164 stats.poshits,
165 stats.mishits,
166 100.*float(stats.poshits - stats.mishits)/stats.postot);
167 printf("\tAverage position requested: %.2f\n",
168 float(stats.sumpos) / stats.postot);
169 printf("\tAverage offset after cached hint: %.2f\n",
170 float(stats.sumofs) / stats.postot);
171 }
172
173 if ( stats.lentot )
174 {
175 printf("\tNumber of calls to length(): %u, hits=%.2f%%\n",
176 stats.lentot, 100.*float(stats.lenhits)/stats.lentot);
177 }
178 }
8c3b65d9 179};
68482dc5 180
8c3b65d9 181static wxStrCacheStatsDumper s_showCacheStats;
68482dc5
VZ
182
183#endif // wxPROFILE_STRING_CACHE
184
185#endif // wxUSE_STRING_POS_CACHE
186
a7ea63e2
VS
187// ----------------------------------------------------------------------------
188// global functions
189// ----------------------------------------------------------------------------
e87b7833 190
a7ea63e2 191#if wxUSE_STD_IOSTREAM
8f93a29f 192
a7ea63e2 193#include <iostream>
8f93a29f 194
a7ea63e2 195wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
8f93a29f 196{
7a906e1a 197#if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
de4983f3 198 const wxScopedCharBuffer buf(str.AsCharBuf());
ddf01bdb
VZ
199 if ( !buf )
200 os.clear(wxSTD ios_base::failbit);
201 else
202 os << buf.data();
203
204 return os;
a7ea63e2 205#else
7a906e1a 206 return os << str.AsInternal();
a7ea63e2 207#endif
8f93a29f
VS
208}
209
04abe4bc
VS
210wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
211{
212 return os << str.c_str();
213}
214
de4983f3 215wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedCharBuffer& str)
04abe4bc
VS
216{
217 return os << str.data();
218}
219
220#ifndef __BORLANDC__
de4983f3 221wxSTD ostream& operator<<(wxSTD ostream& os, const wxScopedWCharBuffer& str)
04abe4bc
VS
222{
223 return os << str.data();
224}
225#endif
226
6a6ea041 227#if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594
VZ
228
229wxSTD wostream& operator<<(wxSTD wostream& wos, const wxString& str)
230{
231 return wos << str.wc_str();
232}
233
234wxSTD wostream& operator<<(wxSTD wostream& wos, const wxCStrData& str)
235{
236 return wos << str.AsWChar();
237}
238
de4983f3 239wxSTD wostream& operator<<(wxSTD wostream& wos, const wxScopedWCharBuffer& str)
6b61b594
VZ
240{
241 return wos << str.data();
242}
243
6a6ea041 244#endif // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
6b61b594 245
a7ea63e2 246#endif // wxUSE_STD_IOSTREAM
e87b7833 247
81727065
VS
248// ===========================================================================
249// wxString class core
250// ===========================================================================
251
252#if wxUSE_UNICODE_UTF8
253
81727065
VS
254void wxString::PosLenToImpl(size_t pos, size_t len,
255 size_t *implPos, size_t *implLen) const
256{
257 if ( pos == npos )
68482dc5 258 {
81727065 259 *implPos = npos;
68482dc5
VZ
260 }
261 else // have valid start position
81727065 262 {
68482dc5
VZ
263 const const_iterator b = GetIterForNthChar(pos);
264 *implPos = wxStringImpl::const_iterator(b.impl()) - m_impl.begin();
81727065 265 if ( len == npos )
68482dc5 266 {
81727065 267 *implLen = npos;
68482dc5
VZ
268 }
269 else // have valid length too
81727065 270 {
68482dc5
VZ
271 // we need to handle the case of length specifying a substring
272 // going beyond the end of the string, just as std::string does
273 const const_iterator e(end());
274 const_iterator i(b);
275 while ( len && i <= e )
276 {
277 ++i;
278 --len;
279 }
280
281 *implLen = i.impl() - b.impl();
81727065
VS
282 }
283 }
284}
285
286#endif // wxUSE_UNICODE_UTF8
287
11aac4ba
VS
288// ----------------------------------------------------------------------------
289// wxCStrData converted strings caching
290// ----------------------------------------------------------------------------
291
132276cf
VS
292// FIXME-UTF8: temporarily disabled because it doesn't work with global
293// string objects; re-enable after fixing this bug and benchmarking
294// performance to see if using a hash is a good idea at all
295#if 0
296
11aac4ba
VS
297// For backward compatibility reasons, it must be possible to assign the value
298// returned by wxString::c_str() to a char* or wchar_t* variable and work with
299// it. Returning wxCharBuffer from (const char*)c_str() wouldn't do the trick,
300// because the memory would be freed immediately, but it has to be valid as long
301// as the string is not modified, so that code like this still works:
302//
303// const wxChar *s = str.c_str();
304// while ( s ) { ... }
305
306// FIXME-UTF8: not thread safe!
307// FIXME-UTF8: we currently clear the cached conversion only when the string is
308// destroyed, but we should do it when the string is modified, to
309// keep memory usage down
310// FIXME-UTF8: we do the conversion every time As[W]Char() is called, but if we
311// invalidated the cache on every change, we could keep the previous
312// conversion
313// FIXME-UTF8: add tracing of usage of these two methods - new code is supposed
314// to use mb_str() or wc_str() instead of (const [w]char*)c_str()
315
316template<typename T>
317static inline void DeleteStringFromConversionCache(T& hash, const wxString *s)
318{
6c4ebcda 319 typename T::iterator i = hash.find(wxConstCast(s, wxString));
11aac4ba
VS
320 if ( i != hash.end() )
321 {
322 free(i->second);
323 hash.erase(i);
324 }
325}
326
327#if wxUSE_UNICODE
6c4ebcda
VS
328// NB: non-STL implementation doesn't compile with "const wxString*" key type,
329// so we have to use wxString* here and const-cast when used
11aac4ba
VS
330WX_DECLARE_HASH_MAP(wxString*, char*, wxPointerHash, wxPointerEqual,
331 wxStringCharConversionCache);
332static wxStringCharConversionCache gs_stringsCharCache;
333
334const char* wxCStrData::AsChar() const
335{
336 // remove previously cache value, if any (see FIXMEs above):
337 DeleteStringFromConversionCache(gs_stringsCharCache, m_str);
338
339 // convert the string and keep it:
6c4ebcda
VS
340 const char *s = gs_stringsCharCache[wxConstCast(m_str, wxString)] =
341 m_str->mb_str().release();
11aac4ba
VS
342
343 return s + m_offset;
344}
345#endif // wxUSE_UNICODE
346
347#if !wxUSE_UNICODE_WCHAR
348WX_DECLARE_HASH_MAP(wxString*, wchar_t*, wxPointerHash, wxPointerEqual,
349 wxStringWCharConversionCache);
350static wxStringWCharConversionCache gs_stringsWCharCache;
351
352const wchar_t* wxCStrData::AsWChar() const
353{
354 // remove previously cache value, if any (see FIXMEs above):
355 DeleteStringFromConversionCache(gs_stringsWCharCache, m_str);
356
357 // convert the string and keep it:
6c4ebcda
VS
358 const wchar_t *s = gs_stringsWCharCache[wxConstCast(m_str, wxString)] =
359 m_str->wc_str().release();
11aac4ba
VS
360
361 return s + m_offset;
362}
363#endif // !wxUSE_UNICODE_WCHAR
364
11aac4ba
VS
365wxString::~wxString()
366{
367#if wxUSE_UNICODE
368 // FIXME-UTF8: do this only if locale is not UTF8 if wxUSE_UNICODE_UTF8
369 DeleteStringFromConversionCache(gs_stringsCharCache, this);
370#endif
371#if !wxUSE_UNICODE_WCHAR
372 DeleteStringFromConversionCache(gs_stringsWCharCache, this);
373#endif
374}
132276cf
VS
375#endif
376
132276cf
VS
377// ===========================================================================
378// wxString class core
379// ===========================================================================
380
381// ---------------------------------------------------------------------------
382// construction and conversion
383// ---------------------------------------------------------------------------
11aac4ba 384
81727065 385#if wxUSE_UNICODE_WCHAR
8f93a29f
VS
386/* static */
387wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
04abe4bc 388 const wxMBConv& conv)
8f93a29f
VS
389{
390 // anything to do?
391 if ( !psz || nLength == 0 )
de4983f3 392 return SubstrBufFromMB(wxWCharBuffer(L""), 0);
8f93a29f
VS
393
394 if ( nLength == npos )
395 nLength = wxNO_LEN;
396
397 size_t wcLen;
de4983f3 398 wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
8f93a29f 399 if ( !wcLen )
de4983f3 400 return SubstrBufFromMB(wxWCharBuffer(L""), 0);
8f93a29f
VS
401 else
402 return SubstrBufFromMB(wcBuf, wcLen);
403}
81727065
VS
404#endif // wxUSE_UNICODE_WCHAR
405
406#if wxUSE_UNICODE_UTF8
407/* static */
408wxString::SubstrBufFromMB wxString::ConvertStr(const char *psz, size_t nLength,
409 const wxMBConv& conv)
410{
81727065
VS
411 // anything to do?
412 if ( !psz || nLength == 0 )
de4983f3 413 return SubstrBufFromMB(wxCharBuffer(""), 0);
81727065 414
111d9948
VS
415 // if psz is already in UTF-8, we don't have to do the roundtrip to
416 // wchar_t* and back:
417 if ( conv.IsUTF8() )
418 {
419 // we need to validate the input because UTF8 iterators assume valid
420 // UTF-8 sequence and psz may be invalid:
421 if ( wxStringOperations::IsValidUtf8String(psz, nLength) )
422 {
9ef1ad0d
VZ
423 // we must pass the real string length to SubstrBufFromMB ctor
424 if ( nLength == npos )
425 nLength = psz ? strlen(psz) : 0;
38d26d60 426 return SubstrBufFromMB(wxScopedCharBuffer::CreateNonOwned(psz, nLength),
6df09f32 427 nLength);
111d9948
VS
428 }
429 // else: do the roundtrip through wchar_t*
430 }
431
81727065
VS
432 if ( nLength == npos )
433 nLength = wxNO_LEN;
434
435 // first convert to wide string:
436 size_t wcLen;
de4983f3 437 wxScopedWCharBuffer wcBuf(conv.cMB2WC(psz, nLength, &wcLen));
81727065 438 if ( !wcLen )
de4983f3 439 return SubstrBufFromMB(wxCharBuffer(""), 0);
81727065
VS
440
441 // and then to UTF-8:
4fdfe2f3 442 SubstrBufFromMB buf(ConvertStr(wcBuf, wcLen, wxMBConvStrictUTF8()));
81727065 443 // widechar -> UTF-8 conversion isn't supposed to ever fail:
9a83f860 444 wxASSERT_MSG( buf.data, wxT("conversion to UTF-8 failed") );
81727065
VS
445
446 return buf;
447}
448#endif // wxUSE_UNICODE_UTF8
449
450#if wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f
VS
451/* static */
452wxString::SubstrBufFromWC wxString::ConvertStr(const wchar_t *pwz, size_t nLength,
04abe4bc 453 const wxMBConv& conv)
8f93a29f
VS
454{
455 // anything to do?
456 if ( !pwz || nLength == 0 )
de4983f3 457 return SubstrBufFromWC(wxCharBuffer(""), 0);
8f93a29f
VS
458
459 if ( nLength == npos )
460 nLength = wxNO_LEN;
461
462 size_t mbLen;
de4983f3 463 wxScopedCharBuffer mbBuf(conv.cWC2MB(pwz, nLength, &mbLen));
8f93a29f 464 if ( !mbLen )
de4983f3 465 return SubstrBufFromWC(wxCharBuffer(""), 0);
8f93a29f
VS
466 else
467 return SubstrBufFromWC(mbBuf, mbLen);
468}
81727065 469#endif // wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE
8f93a29f 470
f54cb154
VZ
471// This std::string::c_str()-like method returns a wide char pointer to string
472// contents. In wxUSE_UNICODE_WCHAR case it is trivial as it can simply return
473// a pointer to the internal representation. Otherwise a conversion is required
474// and it returns a temporary buffer.
475//
476// However for compatibility with c_str() and to avoid breaking existing code
477// doing
478//
479// for ( const wchar_t *p = s.wc_str(); *p; p++ )
480// ... use *p...
481//
482// we actually need to ensure that the returned buffer is _not_ temporary and
483// so we use wxString::m_convertedToWChar to store the returned data
484#if !wxUSE_UNICODE_WCHAR
8f93a29f 485
f54cb154 486const wchar_t *wxString::AsWChar(const wxMBConv& conv) const
265d5cce 487{
f54cb154
VZ
488 const char * const strMB = m_impl.c_str();
489 const size_t lenMB = m_impl.length();
490
491 // find out the size of the buffer needed
492 const size_t lenWC = conv.ToWChar(NULL, 0, strMB, lenMB);
493 if ( lenWC == wxCONV_FAILED )
494 return NULL;
495
496 // keep the same buffer if the string size didn't change: this is not only
497 // an optimization but also ensure that code which modifies string
498 // character by character (without changing its length) can continue to use
499 // the pointer returned by a previous wc_str() call even after changing the
500 // string
501
502 // TODO-UTF8: we could check for ">" instead of "!=" here as this would
503 // allow to save on buffer reallocations but at the cost of
504 // consuming (even) more memory, we should benchmark this to
505 // determine if it's worth doing
506 if ( !m_convertedToWChar.m_str || lenWC != m_convertedToWChar.m_len )
507 {
508 if ( !const_cast<wxString *>(this)->m_convertedToWChar.Extend(lenWC) )
509 return NULL;
510 }
e87b7833 511
f54cb154
VZ
512 // finally do convert
513 m_convertedToWChar.m_str[lenWC] = L'\0';
514 if ( conv.ToWChar(m_convertedToWChar.m_str, lenWC,
515 strMB, lenMB) == wxCONV_FAILED )
516 return NULL;
e87b7833 517
f54cb154 518 return m_convertedToWChar.m_str;
81727065
VS
519}
520
f54cb154
VZ
521#endif // !wxUSE_UNICODE_WCHAR
522
523
524// Same thing for mb_str() which returns a normal char pointer to string
525// contents: this always requires converting it to the specified encoding in
526// non-ANSI build except if we need to convert to UTF-8 and this is what we
527// already use internally.
528#if wxUSE_UNICODE
529
530const char *wxString::AsChar(const wxMBConv& conv) const
81727065 531{
f54cb154 532#if wxUSE_UNICODE_UTF8
111d9948 533 if ( conv.IsUTF8() )
f54cb154 534 return m_impl.c_str();
111d9948 535
f54cb154
VZ
536 const wchar_t * const strWC = AsWChar(wxMBConvStrictUTF8());
537 const size_t lenWC = m_convertedToWChar.m_len;
538#else // wxUSE_UNICODE_WCHAR
539 const wchar_t * const strWC = m_impl.c_str();
540 const size_t lenWC = m_impl.length();
541#endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR
81727065 542
f54cb154
VZ
543 const size_t lenMB = conv.FromWChar(NULL, 0, strWC, lenWC);
544 if ( lenMB == wxCONV_FAILED )
545 return NULL;
546
547 if ( !m_convertedToChar.m_str || lenMB != m_convertedToChar.m_len )
548 {
549 if ( !const_cast<wxString *>(this)->m_convertedToChar.Extend(lenMB) )
550 return NULL;
551 }
81727065 552
f54cb154
VZ
553 m_convertedToChar.m_str[lenMB] = '\0';
554 if ( conv.FromWChar(m_convertedToChar.m_str, lenMB,
555 strWC, lenWC) == wxCONV_FAILED )
556 return NULL;
eec47cc6 557
f54cb154 558 return m_convertedToChar.m_str;
265d5cce 559}
7663d0d4 560
f54cb154 561#endif // wxUSE_UNICODE
e87b7833
MB
562
563// shrink to minimal size (releasing extra memory)
564bool wxString::Shrink()
565{
566 wxString tmp(begin(), end());
567 swap(tmp);
568 return tmp.length() == length();
569}
570
d8a4b666 571// deprecated compatibility code:
a7ea63e2 572#if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
c87a0bc8 573wxStringCharType *wxString::GetWriteBuf(size_t nLen)
d8a4b666
VS
574{
575 return DoGetWriteBuf(nLen);
576}
577
578void wxString::UngetWriteBuf()
579{
580 DoUngetWriteBuf();
581}
582
583void wxString::UngetWriteBuf(size_t nLen)
584{
585 DoUngetWriteBuf(nLen);
586}
a7ea63e2 587#endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
e87b7833 588
d8a4b666 589
e87b7833
MB
590// ---------------------------------------------------------------------------
591// data access
592// ---------------------------------------------------------------------------
593
594// all functions are inline in string.h
595
596// ---------------------------------------------------------------------------
e8f59039 597// concatenation operators
e87b7833
MB
598// ---------------------------------------------------------------------------
599
c801d85f 600/*
c801d85f
KB
601 * concatenation functions come in 5 flavours:
602 * string + string
603 * char + string and string + char
604 * C str + string and string + C str
605 */
606
b1801e0e 607wxString operator+(const wxString& str1, const wxString& str2)
c801d85f 608{
992527a5 609#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
610 wxASSERT( str1.IsValid() );
611 wxASSERT( str2.IsValid() );
e87b7833 612#endif
097c080b 613
3458e408
WS
614 wxString s = str1;
615 s += str2;
3168a13f 616
3458e408 617 return s;
c801d85f
KB
618}
619
c9f78968 620wxString operator+(const wxString& str, wxUniChar ch)
c801d85f 621{
992527a5 622#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 623 wxASSERT( str.IsValid() );
e87b7833 624#endif
3168a13f 625
3458e408
WS
626 wxString s = str;
627 s += ch;
097c080b 628
3458e408 629 return s;
c801d85f
KB
630}
631
c9f78968 632wxString operator+(wxUniChar ch, const wxString& str)
c801d85f 633{
992527a5 634#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 635 wxASSERT( str.IsValid() );
e87b7833 636#endif
097c080b 637
3458e408
WS
638 wxString s = ch;
639 s += str;
3168a13f 640
3458e408 641 return s;
c801d85f
KB
642}
643
8f93a29f 644wxString operator+(const wxString& str, const char *psz)
c801d85f 645{
992527a5 646#if !wxUSE_STL_BASED_WXSTRING
8f93a29f 647 wxASSERT( str.IsValid() );
e87b7833 648#endif
097c080b 649
3458e408 650 wxString s;
8f93a29f 651 if ( !s.Alloc(strlen(psz) + str.length()) ) {
9a83f860 652 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
3458e408
WS
653 }
654 s += str;
655 s += psz;
3168a13f 656
3458e408 657 return s;
c801d85f
KB
658}
659
8f93a29f 660wxString operator+(const wxString& str, const wchar_t *pwz)
c801d85f 661{
992527a5 662#if !wxUSE_STL_BASED_WXSTRING
8f93a29f
VS
663 wxASSERT( str.IsValid() );
664#endif
665
666 wxString s;
667 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
9a83f860 668 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
8f93a29f
VS
669 }
670 s += str;
671 s += pwz;
672
673 return s;
674}
675
676wxString operator+(const char *psz, const wxString& str)
677{
a7ea63e2
VS
678#if !wxUSE_STL_BASED_WXSTRING
679 wxASSERT( str.IsValid() );
680#endif
681
682 wxString s;
683 if ( !s.Alloc(strlen(psz) + str.length()) ) {
9a83f860 684 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
a7ea63e2
VS
685 }
686 s = psz;
687 s += str;
688
689 return s;
690}
691
692wxString operator+(const wchar_t *pwz, const wxString& str)
693{
694#if !wxUSE_STL_BASED_WXSTRING
695 wxASSERT( str.IsValid() );
696#endif
697
698 wxString s;
699 if ( !s.Alloc(wxWcslen(pwz) + str.length()) ) {
9a83f860 700 wxFAIL_MSG( wxT("out of memory in wxString::operator+") );
a7ea63e2
VS
701 }
702 s = pwz;
703 s += str;
704
705 return s;
706}
707
708// ---------------------------------------------------------------------------
709// string comparison
710// ---------------------------------------------------------------------------
711
52de37c7
VS
712bool wxString::IsSameAs(wxUniChar c, bool compareWithCase) const
713{
714 return (length() == 1) && (compareWithCase ? GetChar(0u) == c
715 : wxToupper(GetChar(0u)) == wxToupper(c));
716}
717
a7ea63e2
VS
718#ifdef HAVE_STD_STRING_COMPARE
719
720// NB: Comparison code (both if HAVE_STD_STRING_COMPARE and if not) works with
721// UTF-8 encoded strings too, thanks to UTF-8's design which allows us to
722// sort strings in characters code point order by sorting the byte sequence
723// in byte values order (i.e. what strcmp() and memcmp() do).
724
725int wxString::compare(const wxString& str) const
726{
727 return m_impl.compare(str.m_impl);
728}
729
730int wxString::compare(size_t nStart, size_t nLen,
731 const wxString& str) const
732{
733 size_t pos, len;
734 PosLenToImpl(nStart, nLen, &pos, &len);
735 return m_impl.compare(pos, len, str.m_impl);
736}
737
738int wxString::compare(size_t nStart, size_t nLen,
739 const wxString& str,
740 size_t nStart2, size_t nLen2) const
741{
742 size_t pos, len;
743 PosLenToImpl(nStart, nLen, &pos, &len);
744
745 size_t pos2, len2;
746 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
747
748 return m_impl.compare(pos, len, str.m_impl, pos2, len2);
749}
750
751int wxString::compare(const char* sz) const
752{
753 return m_impl.compare(ImplStr(sz));
754}
755
756int wxString::compare(const wchar_t* sz) const
757{
758 return m_impl.compare(ImplStr(sz));
759}
760
761int wxString::compare(size_t nStart, size_t nLen,
762 const char* sz, size_t nCount) const
763{
764 size_t pos, len;
765 PosLenToImpl(nStart, nLen, &pos, &len);
766
767 SubstrBufFromMB str(ImplStr(sz, nCount));
768
769 return m_impl.compare(pos, len, str.data, str.len);
770}
771
772int wxString::compare(size_t nStart, size_t nLen,
773 const wchar_t* sz, size_t nCount) const
774{
775 size_t pos, len;
776 PosLenToImpl(nStart, nLen, &pos, &len);
777
778 SubstrBufFromWC str(ImplStr(sz, nCount));
779
780 return m_impl.compare(pos, len, str.data, str.len);
781}
782
783#else // !HAVE_STD_STRING_COMPARE
784
785static inline int wxDoCmp(const wxStringCharType* s1, size_t l1,
786 const wxStringCharType* s2, size_t l2)
787{
788 if( l1 == l2 )
789 return wxStringMemcmp(s1, s2, l1);
790 else if( l1 < l2 )
791 {
792 int ret = wxStringMemcmp(s1, s2, l1);
793 return ret == 0 ? -1 : ret;
794 }
795 else
796 {
797 int ret = wxStringMemcmp(s1, s2, l2);
798 return ret == 0 ? +1 : ret;
799 }
800}
801
802int wxString::compare(const wxString& str) const
803{
804 return ::wxDoCmp(m_impl.data(), m_impl.length(),
805 str.m_impl.data(), str.m_impl.length());
806}
807
808int wxString::compare(size_t nStart, size_t nLen,
809 const wxString& str) const
810{
811 wxASSERT(nStart <= length());
812 size_type strLen = length() - nStart;
813 nLen = strLen < nLen ? strLen : nLen;
814
815 size_t pos, len;
816 PosLenToImpl(nStart, nLen, &pos, &len);
817
818 return ::wxDoCmp(m_impl.data() + pos, len,
819 str.m_impl.data(), str.m_impl.length());
820}
821
822int wxString::compare(size_t nStart, size_t nLen,
823 const wxString& str,
824 size_t nStart2, size_t nLen2) const
825{
826 wxASSERT(nStart <= length());
827 wxASSERT(nStart2 <= str.length());
828 size_type strLen = length() - nStart,
829 strLen2 = str.length() - nStart2;
830 nLen = strLen < nLen ? strLen : nLen;
831 nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
832
833 size_t pos, len;
834 PosLenToImpl(nStart, nLen, &pos, &len);
835 size_t pos2, len2;
836 str.PosLenToImpl(nStart2, nLen2, &pos2, &len2);
837
838 return ::wxDoCmp(m_impl.data() + pos, len,
839 str.m_impl.data() + pos2, len2);
840}
841
842int wxString::compare(const char* sz) const
843{
844 SubstrBufFromMB str(ImplStr(sz, npos));
845 if ( str.len == npos )
846 str.len = wxStringStrlen(str.data);
847 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
848}
849
850int wxString::compare(const wchar_t* sz) const
851{
852 SubstrBufFromWC str(ImplStr(sz, npos));
853 if ( str.len == npos )
854 str.len = wxStringStrlen(str.data);
855 return ::wxDoCmp(m_impl.data(), m_impl.length(), str.data, str.len);
856}
857
858int wxString::compare(size_t nStart, size_t nLen,
859 const char* sz, size_t nCount) const
860{
861 wxASSERT(nStart <= length());
862 size_type strLen = length() - nStart;
863 nLen = strLen < nLen ? strLen : nLen;
097c080b 864
a7ea63e2
VS
865 size_t pos, len;
866 PosLenToImpl(nStart, nLen, &pos, &len);
3168a13f 867
a7ea63e2
VS
868 SubstrBufFromMB str(ImplStr(sz, nCount));
869 if ( str.len == npos )
870 str.len = wxStringStrlen(str.data);
871
872 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
c801d85f
KB
873}
874
a7ea63e2
VS
875int wxString::compare(size_t nStart, size_t nLen,
876 const wchar_t* sz, size_t nCount) const
8f93a29f 877{
a7ea63e2
VS
878 wxASSERT(nStart <= length());
879 size_type strLen = length() - nStart;
880 nLen = strLen < nLen ? strLen : nLen;
8f93a29f 881
a7ea63e2
VS
882 size_t pos, len;
883 PosLenToImpl(nStart, nLen, &pos, &len);
8f93a29f 884
a7ea63e2
VS
885 SubstrBufFromWC str(ImplStr(sz, nCount));
886 if ( str.len == npos )
887 str.len = wxStringStrlen(str.data);
888
889 return ::wxDoCmp(m_impl.data() + pos, len, str.data, str.len);
8f93a29f
VS
890}
891
a7ea63e2
VS
892#endif // HAVE_STD_STRING_COMPARE/!HAVE_STD_STRING_COMPARE
893
894
8f93a29f
VS
895// ---------------------------------------------------------------------------
896// find_{first,last}_[not]_of functions
897// ---------------------------------------------------------------------------
898
899#if !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
c801d85f 900
8f93a29f
VS
901// NB: All these functions are implemented with the argument being wxChar*,
902// i.e. widechar string in any Unicode build, even though native string
903// representation is char* in the UTF-8 build. This is because we couldn't
904// use memchr() to determine if a character is in a set encoded as UTF-8.
905
906size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
dcb68102 907{
8f93a29f 908 return find_first_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
909}
910
8f93a29f 911size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart) const
dcb68102 912{
8f93a29f 913 return find_first_not_of(sz, nStart, wxStrlen(sz));
dcb68102
RN
914}
915
8f93a29f 916size_t wxString::find_first_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 917{
9a83f860 918 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
dcb68102 919
8f93a29f
VS
920 size_t idx = nStart;
921 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 922 {
8f93a29f
VS
923 if ( wxTmemchr(sz, *i, n) )
924 return idx;
dcb68102 925 }
8f93a29f
VS
926
927 return npos;
928}
929
930size_t wxString::find_first_not_of(const wxChar* sz, size_t nStart, size_t n) const
931{
9a83f860 932 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
8f93a29f
VS
933
934 size_t idx = nStart;
935 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
dcb68102 936 {
8f93a29f
VS
937 if ( !wxTmemchr(sz, *i, n) )
938 return idx;
939 }
940
941 return npos;
942}
943
944
945size_t wxString::find_last_of(const wxChar* sz, size_t nStart) const
946{
947 return find_last_of(sz, nStart, wxStrlen(sz));
948}
949
950size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart) const
951{
952 return find_last_not_of(sz, nStart, wxStrlen(sz));
953}
954
955size_t wxString::find_last_of(const wxChar* sz, size_t nStart, size_t n) const
956{
957 size_t len = length();
958
959 if ( nStart == npos )
960 {
961 nStart = len - 1;
dcb68102 962 }
2c09fb3b 963 else
dcb68102 964 {
9a83f860 965 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
dcb68102 966 }
8f93a29f
VS
967
968 size_t idx = nStart;
969 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
970 i != rend(); --idx, ++i )
971 {
972 if ( wxTmemchr(sz, *i, n) )
973 return idx;
974 }
975
976 return npos;
dcb68102
RN
977}
978
8f93a29f 979size_t wxString::find_last_not_of(const wxChar* sz, size_t nStart, size_t n) const
dcb68102 980{
8f93a29f
VS
981 size_t len = length();
982
983 if ( nStart == npos )
984 {
985 nStart = len - 1;
986 }
987 else
988 {
9a83f860 989 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
8f93a29f
VS
990 }
991
992 size_t idx = nStart;
993 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
994 i != rend(); --idx, ++i )
995 {
996 if ( !wxTmemchr(sz, *i, n) )
997 return idx;
998 }
999
1000 return npos;
dcb68102
RN
1001}
1002
8f93a29f 1003size_t wxString::find_first_not_of(wxUniChar ch, size_t nStart) const
dcb68102 1004{
9a83f860 1005 wxASSERT_MSG( nStart <= length(), wxT("invalid index") );
8f93a29f
VS
1006
1007 size_t idx = nStart;
1008 for ( const_iterator i = begin() + nStart; i != end(); ++idx, ++i )
1009 {
1010 if ( *i != ch )
1011 return idx;
1012 }
1013
1014 return npos;
1015}
1016
1017size_t wxString::find_last_not_of(wxUniChar ch, size_t nStart) const
1018{
1019 size_t len = length();
1020
1021 if ( nStart == npos )
1022 {
1023 nStart = len - 1;
1024 }
1025 else
1026 {
9a83f860 1027 wxASSERT_MSG( nStart <= len, wxT("invalid index") );
8f93a29f
VS
1028 }
1029
1030 size_t idx = nStart;
1031 for ( const_reverse_iterator i = rbegin() + (len - nStart - 1);
1032 i != rend(); --idx, ++i )
1033 {
1034 if ( *i != ch )
1035 return idx;
1036 }
1037
1038 return npos;
1039}
1040
1041// the functions above were implemented for wchar_t* arguments in Unicode
1042// build and char* in ANSI build; below are implementations for the other
1043// version:
1044#if wxUSE_UNICODE
1045 #define wxOtherCharType char
1046 #define STRCONV (const wxChar*)wxConvLibc.cMB2WC
1047#else
1048 #define wxOtherCharType wchar_t
1049 #define STRCONV (const wxChar*)wxConvLibc.cWC2MB
1050#endif
1051
1052size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart) const
1053 { return find_first_of(STRCONV(sz), nStart); }
1054
1055size_t wxString::find_first_of(const wxOtherCharType* sz, size_t nStart,
1056 size_t n) const
1057 { return find_first_of(STRCONV(sz, n, NULL), nStart, n); }
1058size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart) const
1059 { return find_last_of(STRCONV(sz), nStart); }
1060size_t wxString::find_last_of(const wxOtherCharType* sz, size_t nStart,
1061 size_t n) const
1062 { return find_last_of(STRCONV(sz, n, NULL), nStart, n); }
1063size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart) const
1064 { return find_first_not_of(STRCONV(sz), nStart); }
1065size_t wxString::find_first_not_of(const wxOtherCharType* sz, size_t nStart,
1066 size_t n) const
1067 { return find_first_not_of(STRCONV(sz, n, NULL), nStart, n); }
1068size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart) const
1069 { return find_last_not_of(STRCONV(sz), nStart); }
1070size_t wxString::find_last_not_of(const wxOtherCharType* sz, size_t nStart,
1071 size_t n) const
1072 { return find_last_not_of(STRCONV(sz, n, NULL), nStart, n); }
1073
1074#undef wxOtherCharType
1075#undef STRCONV
1076
1077#endif // !wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
1078
1079// ===========================================================================
1080// other common string functions
1081// ===========================================================================
1082
1083int wxString::CmpNoCase(const wxString& s) const
1084{
5858fe68
VZ
1085#if !wxUSE_UNICODE_UTF8
1086 // We compare NUL-delimited chunks of the strings inside the loop. We will
1087 // do as many iterations as there are embedded NULs in the string, i.e.
1088 // usually we will run it just once.
1089
1090 typedef const wxStringImpl::value_type *pchar_type;
1091 const pchar_type thisBegin = m_impl.c_str();
1092 const pchar_type thatBegin = s.m_impl.c_str();
1093
1094 const pchar_type thisEnd = thisBegin + m_impl.length();
1095 const pchar_type thatEnd = thatBegin + s.m_impl.length();
825d69c1 1096
5858fe68
VZ
1097 pchar_type thisCur = thisBegin;
1098 pchar_type thatCur = thatBegin;
825d69c1 1099
5858fe68
VZ
1100 int rc;
1101 for ( ;; )
1102 {
1103 // Compare until the next NUL, if the strings differ this is the final
1104 // result.
1105 rc = wxStricmp(thisCur, thatCur);
1106 if ( rc )
1107 break;
1108
1109 const size_t lenChunk = wxStrlen(thisCur);
1110 thisCur += lenChunk;
1111 thatCur += lenChunk;
1112
1113 // Skip all the NULs as wxStricmp() doesn't handle them.
1114 for ( ; !*thisCur; thisCur++, thatCur++ )
1115 {
1116 // Check if we exhausted either of the strings.
1117 if ( thisCur == thisEnd )
1118 {
1119 // This one is exhausted, is the other one too?
1120 return thatCur == thatEnd ? 0 : -1;
1121 }
8f93a29f 1122
5858fe68
VZ
1123 if ( thatCur == thatEnd )
1124 {
1125 // Because of the test above we know that this one is not
1126 // exhausted yet so it's greater than the other one that is.
1127 return 1;
1128 }
825d69c1 1129
5858fe68
VZ
1130 if ( *thatCur )
1131 {
1132 // Anything non-NUL is greater than NUL.
1133 return -1;
1134 }
1135 }
825d69c1 1136 }
825d69c1 1137
5858fe68
VZ
1138 return rc;
1139#else // wxUSE_UNICODE_UTF8
1140 // CRT functions can't be used for case-insensitive comparison of UTF-8
1141 // strings so do it in the naive, simple and inefficient way.
825d69c1
VZ
1142
1143 // FIXME-UTF8: use wxUniChar::ToLower/ToUpper once added
8f93a29f
VS
1144 const_iterator i1 = begin();
1145 const_iterator end1 = end();
1146 const_iterator i2 = s.begin();
1147 const_iterator end2 = s.end();
1148
0d8b0f94 1149 for ( ; i1 != end1 && i2 != end2; ++i1, ++i2 )
8f93a29f
VS
1150 {
1151 wxUniChar lower1 = (wxChar)wxTolower(*i1);
1152 wxUniChar lower2 = (wxChar)wxTolower(*i2);
1153 if ( lower1 != lower2 )
1154 return lower1 < lower2 ? -1 : 1;
1155 }
1156
1157 size_t len1 = length();
1158 size_t len2 = s.length();
dcb68102 1159
8f93a29f
VS
1160 if ( len1 < len2 )
1161 return -1;
1162 else if ( len1 > len2 )
1163 return 1;
1164 return 0;
5858fe68 1165#endif // !wxUSE_UNICODE_UTF8/wxUSE_UNICODE_UTF8
dcb68102
RN
1166}
1167
1168
b1ac3b56 1169#if wxUSE_UNICODE
e015c2a3 1170
e6310bbc 1171wxString wxString::FromAscii(const char *ascii, size_t len)
b1ac3b56 1172{
e6310bbc 1173 if (!ascii || len == 0)
b1ac3b56 1174 return wxEmptyString;
e015c2a3 1175
b1ac3b56 1176 wxString res;
e015c2a3 1177
e6310bbc 1178 {
6798451b 1179 wxStringInternalBuffer buf(res, len);
602a857b 1180 wxStringCharType *dest = buf;
c1eada83 1181
602a857b
VS
1182 for ( ; len > 0; --len )
1183 {
1184 unsigned char c = (unsigned char)*ascii++;
1185 wxASSERT_MSG( c < 0x80,
9a83f860 1186 wxT("Non-ASCII value passed to FromAscii().") );
c1eada83 1187
602a857b
VS
1188 *dest++ = (wchar_t)c;
1189 }
e015c2a3
VZ
1190 }
1191
b1ac3b56
RR
1192 return res;
1193}
1194
e6310bbc
VS
1195wxString wxString::FromAscii(const char *ascii)
1196{
0081dd72 1197 return FromAscii(ascii, wxStrlen(ascii));
e6310bbc
VS
1198}
1199
c5288c5c 1200wxString wxString::FromAscii(char ascii)
2b5f62a0
VZ
1201{
1202 // What do we do with '\0' ?
1203
c1eada83 1204 unsigned char c = (unsigned char)ascii;
8760bc65 1205
9a83f860 1206 wxASSERT_MSG( c < 0x80, wxT("Non-ASCII value passed to FromAscii().") );
c1eada83
VS
1207
1208 // NB: the cast to wchar_t causes interpretation of 'ascii' as Latin1 value
1209 return wxString(wxUniChar((wchar_t)c));
2b5f62a0
VZ
1210}
1211
de4983f3 1212const wxScopedCharBuffer wxString::ToAscii() const
b1ac3b56 1213{
e015c2a3
VZ
1214 // this will allocate enough space for the terminating NUL too
1215 wxCharBuffer buffer(length());
6e394fc6 1216 char *dest = buffer.data();
e015c2a3 1217
c1eada83 1218 for ( const_iterator i = begin(); i != end(); ++i )
b1ac3b56 1219 {
c1eada83
VS
1220 wxUniChar c(*i);
1221 // FIXME-UTF8: unify substituted char ('_') with wxUniChar ('?')
1222 *dest++ = c.IsAscii() ? (char)c : '_';
e015c2a3
VZ
1223
1224 // the output string can't have embedded NULs anyhow, so we can safely
1225 // stop at first of them even if we do have any
c1eada83 1226 if ( !c )
e015c2a3 1227 break;
b1ac3b56 1228 }
e015c2a3 1229
b1ac3b56
RR
1230 return buffer;
1231}
e015c2a3 1232
c1eada83 1233#endif // wxUSE_UNICODE
b1ac3b56 1234
c801d85f 1235// extract string of length nCount starting at nFirst
c801d85f
KB
1236wxString wxString::Mid(size_t nFirst, size_t nCount) const
1237{
73f507f5 1238 size_t nLen = length();
30d9011f 1239
73f507f5
WS
1240 // default value of nCount is npos and means "till the end"
1241 if ( nCount == npos )
1242 {
1243 nCount = nLen - nFirst;
1244 }
30d9011f 1245
73f507f5
WS
1246 // out-of-bounds requests return sensible things
1247 if ( nFirst + nCount > nLen )
1248 {
1249 nCount = nLen - nFirst;
1250 }
c801d85f 1251
73f507f5
WS
1252 if ( nFirst > nLen )
1253 {
1254 // AllocCopy() will return empty string
1255 return wxEmptyString;
1256 }
c801d85f 1257
73f507f5
WS
1258 wxString dest(*this, nFirst, nCount);
1259 if ( dest.length() != nCount )
1260 {
9a83f860 1261 wxFAIL_MSG( wxT("out of memory in wxString::Mid") );
73f507f5 1262 }
30d9011f 1263
73f507f5 1264 return dest;
c801d85f
KB
1265}
1266
e87b7833 1267// check that the string starts with prefix and return the rest of the string
d775fa82 1268// in the provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1269bool wxString::StartsWith(const wxString& prefix, wxString *rest) const
f6bcfd97 1270{
c5e7a7d7
VS
1271 if ( compare(0, prefix.length(), prefix) != 0 )
1272 return false;
f6bcfd97
BP
1273
1274 if ( rest )
1275 {
1276 // put the rest of the string into provided pointer
c5e7a7d7 1277 rest->assign(*this, prefix.length(), npos);
f6bcfd97
BP
1278 }
1279
d775fa82 1280 return true;
f6bcfd97
BP
1281}
1282
3affcd07
VZ
1283
1284// check that the string ends with suffix and return the rest of it in the
1285// provided pointer if it is not NULL, otherwise return false
c5e7a7d7 1286bool wxString::EndsWith(const wxString& suffix, wxString *rest) const
3affcd07 1287{
c5e7a7d7 1288 int start = length() - suffix.length();
81727065
VS
1289
1290 if ( start < 0 || compare(start, npos, suffix) != 0 )
3affcd07
VZ
1291 return false;
1292
1293 if ( rest )
1294 {
1295 // put the rest of the string into provided pointer
1296 rest->assign(*this, 0, start);
1297 }
1298
1299 return true;
1300}
1301
1302
c801d85f
KB
1303// extract nCount last (rightmost) characters
1304wxString wxString::Right(size_t nCount) const
1305{
e87b7833
MB
1306 if ( nCount > length() )
1307 nCount = length();
c801d85f 1308
e87b7833
MB
1309 wxString dest(*this, length() - nCount, nCount);
1310 if ( dest.length() != nCount ) {
9a83f860 1311 wxFAIL_MSG( wxT("out of memory in wxString::Right") );
b1801e0e 1312 }
c801d85f
KB
1313 return dest;
1314}
1315
7929902d 1316// get all characters after the last occurrence of ch
c801d85f 1317// (returns the whole string if ch not found)
c9f78968 1318wxString wxString::AfterLast(wxUniChar ch) const
c801d85f
KB
1319{
1320 wxString str;
d775fa82 1321 int iPos = Find(ch, true);
3c67202d 1322 if ( iPos == wxNOT_FOUND )
c801d85f
KB
1323 str = *this;
1324 else
c565abe1 1325 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1326
1327 return str;
1328}
1329
1330// extract nCount first (leftmost) characters
1331wxString wxString::Left(size_t nCount) const
1332{
e87b7833
MB
1333 if ( nCount > length() )
1334 nCount = length();
c801d85f 1335
e87b7833
MB
1336 wxString dest(*this, 0, nCount);
1337 if ( dest.length() != nCount ) {
9a83f860 1338 wxFAIL_MSG( wxT("out of memory in wxString::Left") );
b1801e0e 1339 }
c801d85f
KB
1340 return dest;
1341}
1342
7929902d 1343// get all characters before the first occurrence of ch
c801d85f 1344// (returns the whole string if ch not found)
6becc1e6 1345wxString wxString::BeforeFirst(wxUniChar ch, wxString *rest) const
c801d85f 1346{
e87b7833 1347 int iPos = Find(ch);
c565abe1 1348 if ( iPos == wxNOT_FOUND )
6becc1e6
VZ
1349 {
1350 iPos = length();
1351 if ( rest )
1352 rest->clear();
1353 }
1354 else
1355 {
1356 if ( rest )
1357 rest->assign(*this, iPos + 1, npos);
1358 }
1359
e87b7833 1360 return wxString(*this, 0, iPos);
c801d85f
KB
1361}
1362
7929902d 1363/// get all characters before the last occurrence of ch
c801d85f 1364/// (returns empty string if ch not found)
6becc1e6 1365wxString wxString::BeforeLast(wxUniChar ch, wxString *rest) const
c801d85f
KB
1366{
1367 wxString str;
d775fa82 1368 int iPos = Find(ch, true);
6becc1e6
VZ
1369 if ( iPos != wxNOT_FOUND )
1370 {
1371 if ( iPos != 0 )
1372 str.assign(*this, 0, iPos);
1373
1374 if ( rest )
1375 rest->assign(*this, iPos + 1, npos);
1376 }
1377 else
1378 {
1379 if ( rest )
1380 *rest = *this;
1381 }
c801d85f
KB
1382
1383 return str;
1384}
1385
7929902d 1386/// get all characters after the first occurrence of ch
c801d85f 1387/// (returns empty string if ch not found)
c9f78968 1388wxString wxString::AfterFirst(wxUniChar ch) const
c801d85f
KB
1389{
1390 wxString str;
1391 int iPos = Find(ch);
3c67202d 1392 if ( iPos != wxNOT_FOUND )
c565abe1 1393 str.assign(*this, iPos + 1, npos);
c801d85f
KB
1394
1395 return str;
1396}
1397
7929902d 1398// replace first (or all) occurrences of some substring with another one
8a540c88
VS
1399size_t wxString::Replace(const wxString& strOld,
1400 const wxString& strNew, bool bReplaceAll)
c801d85f 1401{
a8f1f1b2 1402 // if we tried to replace an empty string we'd enter an infinite loop below
8a540c88 1403 wxCHECK_MSG( !strOld.empty(), 0,
9a83f860 1404 wxT("wxString::Replace(): invalid parameter") );
a8f1f1b2 1405
68482dc5
VZ
1406 wxSTRING_INVALIDATE_CACHE();
1407
510bb748 1408 size_t uiCount = 0; // count of replacements made
c801d85f 1409
8a627032
VZ
1410 // optimize the special common case: replacement of one character by
1411 // another one (in UTF-8 case we can only do this for ASCII characters)
1412 //
1413 // benchmarks show that this special version is around 3 times faster
1414 // (depending on the proportion of matching characters and UTF-8/wchar_t
1415 // build)
1416 if ( strOld.m_impl.length() == 1 && strNew.m_impl.length() == 1 )
1417 {
1418 const wxStringCharType chOld = strOld.m_impl[0],
1419 chNew = strNew.m_impl[0];
1420
1421 // this loop is the simplified version of the one below
1422 for ( size_t pos = 0; ; )
1423 {
1424 pos = m_impl.find(chOld, pos);
1425 if ( pos == npos )
1426 break;
c801d85f 1427
8a627032
VZ
1428 m_impl[pos++] = chNew;
1429
1430 uiCount++;
1431
1432 if ( !bReplaceAll )
1433 break;
1434 }
1435 }
072682ce
VZ
1436 else if ( !bReplaceAll)
1437 {
da94537c 1438 size_t pos = m_impl.find(strOld.m_impl, 0);
072682ce
VZ
1439 if ( pos != npos )
1440 {
1441 m_impl.replace(pos, strOld.m_impl.length(), strNew.m_impl);
1442 uiCount = 1;
1443 }
1444 }
1445 else // replace all occurrences
510bb748 1446 {
8a627032
VZ
1447 const size_t uiOldLen = strOld.m_impl.length();
1448 const size_t uiNewLen = strNew.m_impl.length();
1449
072682ce
VZ
1450 // first scan the string to find all positions at which the replacement
1451 // should be made
1452 wxVector<size_t> replacePositions;
1453
1454 size_t pos;
1455 for ( pos = m_impl.find(strOld.m_impl, 0);
1456 pos != npos;
1457 pos = m_impl.find(strOld.m_impl, pos + uiOldLen))
8a627032 1458 {
072682ce
VZ
1459 replacePositions.push_back(pos);
1460 ++uiCount;
1461 }
510bb748 1462
072682ce
VZ
1463 if ( !uiCount )
1464 return 0;
510bb748 1465
072682ce
VZ
1466 // allocate enough memory for the whole new string
1467 wxString tmp;
1468 tmp.m_impl.reserve(m_impl.length() + uiCount*(uiNewLen - uiOldLen));
ad5bb7d6 1469
072682ce
VZ
1470 // copy this string to tmp doing replacements on the fly
1471 size_t replNum = 0;
1472 for ( pos = 0; replNum < uiCount; replNum++ )
1473 {
1474 const size_t nextReplPos = replacePositions[replNum];
394b2900 1475
072682ce
VZ
1476 if ( pos != nextReplPos )
1477 {
1478 tmp.m_impl.append(m_impl, pos, nextReplPos - pos);
1479 }
1480
1481 tmp.m_impl.append(strNew.m_impl);
1482 pos = nextReplPos + uiOldLen;
8a627032 1483 }
072682ce
VZ
1484
1485 if ( pos != m_impl.length() )
1486 {
1487 // append the rest of the string unchanged
1488 tmp.m_impl.append(m_impl, pos, m_impl.length() - pos);
1489 }
1490
1491 swap(tmp);
c801d85f 1492 }
c801d85f 1493
510bb748 1494 return uiCount;
c801d85f
KB
1495}
1496
1497bool wxString::IsAscii() const
1498{
a4a44612
VS
1499 for ( const_iterator i = begin(); i != end(); ++i )
1500 {
1501 if ( !(*i).IsAscii() )
1502 return false;
1503 }
1504
1505 return true;
c801d85f 1506}
dd1eaa89 1507
c801d85f
KB
1508bool wxString::IsWord() const
1509{
a4a44612
VS
1510 for ( const_iterator i = begin(); i != end(); ++i )
1511 {
1512 if ( !wxIsalpha(*i) )
1513 return false;
1514 }
1515
1516 return true;
c801d85f 1517}
dd1eaa89 1518
c801d85f
KB
1519bool wxString::IsNumber() const
1520{
a4a44612
VS
1521 if ( empty() )
1522 return true;
1523
1524 const_iterator i = begin();
1525
9a83f860 1526 if ( *i == wxT('-') || *i == wxT('+') )
a4a44612
VS
1527 ++i;
1528
1529 for ( ; i != end(); ++i )
1530 {
1531 if ( !wxIsdigit(*i) )
1532 return false;
1533 }
1534
1535 return true;
c801d85f
KB
1536}
1537
c801d85f
KB
1538wxString wxString::Strip(stripType w) const
1539{
1540 wxString s = *this;
d775fa82
WS
1541 if ( w & leading ) s.Trim(false);
1542 if ( w & trailing ) s.Trim(true);
c801d85f
KB
1543 return s;
1544}
1545
c801d85f
KB
1546// ---------------------------------------------------------------------------
1547// case conversion
1548// ---------------------------------------------------------------------------
1549
1550wxString& wxString::MakeUpper()
1551{
e87b7833
MB
1552 for ( iterator it = begin(), en = end(); it != en; ++it )
1553 *it = (wxChar)wxToupper(*it);
c801d85f
KB
1554
1555 return *this;
1556}
1557
1558wxString& wxString::MakeLower()
1559{
e87b7833
MB
1560 for ( iterator it = begin(), en = end(); it != en; ++it )
1561 *it = (wxChar)wxTolower(*it);
c801d85f
KB
1562
1563 return *this;
1564}
1565
0c7db140
VZ
1566wxString& wxString::MakeCapitalized()
1567{
1568 const iterator en = end();
1569 iterator it = begin();
1570 if ( it != en )
1571 {
1572 *it = (wxChar)wxToupper(*it);
1573 for ( ++it; it != en; ++it )
1574 *it = (wxChar)wxTolower(*it);
1575 }
1576
1577 return *this;
1578}
1579
c801d85f
KB
1580// ---------------------------------------------------------------------------
1581// trimming and padding
1582// ---------------------------------------------------------------------------
1583
d775fa82 1584// some compilers (VC++ 6.0 not to name them) return true for a call to
9d55bfef 1585// isspace('\xEA') in the C locale which seems to be broken to me, but we have
c95e653c 1586// to live with this by checking that the character is a 7 bit one - even if
9d55bfef 1587// this may fail to detect some spaces (I don't know if Unicode doesn't have
576c608d
VZ
1588// space-like symbols somewhere except in the first 128 chars), it is arguably
1589// still better than trimming away accented letters
1590inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
1591
c801d85f
KB
1592// trims spaces (in the sense of isspace) from left or right side
1593wxString& wxString::Trim(bool bFromRight)
1594{
3458e408
WS
1595 // first check if we're going to modify the string at all
1596 if ( !empty() &&
1597 (
1598 (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
1599 (!bFromRight && wxSafeIsspace(GetChar(0u)))
1600 )
2c3b684c 1601 )
2c3b684c 1602 {
3458e408
WS
1603 if ( bFromRight )
1604 {
1605 // find last non-space character
d4d02bd5 1606 reverse_iterator psz = rbegin();
32c62191 1607 while ( (psz != rend()) && wxSafeIsspace(*psz) )
0d8b0f94 1608 ++psz;
92df97b8 1609
3458e408 1610 // truncate at trailing space start
d4d02bd5 1611 erase(psz.base(), end());
3458e408
WS
1612 }
1613 else
1614 {
1615 // find first non-space character
1616 iterator psz = begin();
32c62191 1617 while ( (psz != end()) && wxSafeIsspace(*psz) )
0d8b0f94 1618 ++psz;
2c3b684c 1619
3458e408
WS
1620 // fix up data and length
1621 erase(begin(), psz);
1622 }
2c3b684c 1623 }
c801d85f 1624
3458e408 1625 return *this;
c801d85f
KB
1626}
1627
1628// adds nCount characters chPad to the string from either side
c9f78968 1629wxString& wxString::Pad(size_t nCount, wxUniChar chPad, bool bFromRight)
c801d85f 1630{
3458e408 1631 wxString s(chPad, nCount);
c801d85f 1632
3458e408
WS
1633 if ( bFromRight )
1634 *this += s;
1635 else
1636 {
1637 s += *this;
1638 swap(s);
1639 }
c801d85f 1640
3458e408 1641 return *this;
c801d85f
KB
1642}
1643
1644// truncate the string
1645wxString& wxString::Truncate(size_t uiLen)
1646{
3458e408
WS
1647 if ( uiLen < length() )
1648 {
1649 erase(begin() + uiLen, end());
1650 }
1651 //else: nothing to do, string is already short enough
c801d85f 1652
3458e408 1653 return *this;
c801d85f
KB
1654}
1655
1656// ---------------------------------------------------------------------------
3c67202d 1657// finding (return wxNOT_FOUND if not found and index otherwise)
c801d85f
KB
1658// ---------------------------------------------------------------------------
1659
1660// find a character
c9f78968 1661int wxString::Find(wxUniChar ch, bool bFromEnd) const
c801d85f 1662{
3458e408 1663 size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
c801d85f 1664
3458e408 1665 return (idx == npos) ? wxNOT_FOUND : (int)idx;
c801d85f
KB
1666}
1667
cd0b1709
VZ
1668// ----------------------------------------------------------------------------
1669// conversion to numbers
1670// ----------------------------------------------------------------------------
1671
52de37c7
VS
1672// The implementation of all the functions below is exactly the same so factor
1673// it out. Note that number extraction works correctly on UTF-8 strings, so
1674// we can use wxStringCharType and wx_str() for maximum efficiency.
122f3c5d 1675
92df97b8 1676#ifndef __WXWINCE__
941a4e62
VS
1677 #define DO_IF_NOT_WINCE(x) x
1678#else
1679 #define DO_IF_NOT_WINCE(x)
92df97b8 1680#endif
4ea4767e 1681
529e491c 1682#define WX_STRING_TO_X_TYPE_START \
9a83f860 1683 wxCHECK_MSG( pVal, false, wxT("NULL output pointer") ); \
941a4e62 1684 DO_IF_NOT_WINCE( errno = 0; ) \
941a4e62 1685 const wxStringCharType *start = wx_str(); \
529e491c
FM
1686 wxStringCharType *end;
1687
69d31e31
VZ
1688// notice that we return false without modifying the output parameter at all if
1689// nothing could be parsed but we do modify it and return false then if we did
1690// parse something successfully but not the entire string
529e491c 1691#define WX_STRING_TO_X_TYPE_END \
69d31e31 1692 if ( end == start DO_IF_NOT_WINCE(|| errno == ERANGE) ) \
c95e653c 1693 return false; \
529e491c 1694 *pVal = val; \
69d31e31 1695 return !*end;
cd0b1709 1696
c95e653c 1697bool wxString::ToLong(long *pVal, int base) const
cd0b1709 1698{
9a83f860 1699 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
529e491c
FM
1700
1701 WX_STRING_TO_X_TYPE_START
1702 long val = wxStrtol(start, &end, base);
1703 WX_STRING_TO_X_TYPE_END
619dcb09 1704}
cd0b1709 1705
c95e653c 1706bool wxString::ToULong(unsigned long *pVal, int base) const
619dcb09 1707{
9a83f860 1708 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
529e491c
FM
1709
1710 WX_STRING_TO_X_TYPE_START
1711 unsigned long val = wxStrtoul(start, &end, base);
1712 WX_STRING_TO_X_TYPE_END
cd0b1709
VZ
1713}
1714
c95e653c 1715bool wxString::ToLongLong(wxLongLong_t *pVal, int base) const
d6718dd1 1716{
9a83f860 1717 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
529e491c
FM
1718
1719 WX_STRING_TO_X_TYPE_START
1720 wxLongLong_t val = wxStrtoll(start, &end, base);
1721 WX_STRING_TO_X_TYPE_END
d6718dd1
VZ
1722}
1723
c95e653c 1724bool wxString::ToULongLong(wxULongLong_t *pVal, int base) const
d6718dd1 1725{
9a83f860 1726 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
529e491c
FM
1727
1728 WX_STRING_TO_X_TYPE_START
1729 wxULongLong_t val = wxStrtoull(start, &end, base);
1730 WX_STRING_TO_X_TYPE_END
d6718dd1
VZ
1731}
1732
c95e653c 1733bool wxString::ToDouble(double *pVal) const
cd0b1709 1734{
529e491c
FM
1735 WX_STRING_TO_X_TYPE_START
1736 double val = wxStrtod(start, &end);
1737 WX_STRING_TO_X_TYPE_END
1738}
cd0b1709 1739
529e491c 1740#if wxUSE_XLOCALE
e71e5b37 1741
529e491c
FM
1742bool wxString::ToCLong(long *pVal, int base) const
1743{
9a83f860 1744 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
cd0b1709 1745
529e491c 1746 WX_STRING_TO_X_TYPE_START
a51fdf81 1747#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
529e491c
FM
1748 long val = wxStrtol_lA(start, &end, base, wxCLocale);
1749#else
1750 long val = wxStrtol_l(start, &end, base, wxCLocale);
1751#endif
1752 WX_STRING_TO_X_TYPE_END
1753}
c95e653c 1754
529e491c
FM
1755bool wxString::ToCULong(unsigned long *pVal, int base) const
1756{
9a83f860 1757 wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
c95e653c 1758
529e491c 1759 WX_STRING_TO_X_TYPE_START
a51fdf81 1760#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
529e491c
FM
1761 unsigned long val = wxStrtoul_lA(start, &end, base, wxCLocale);
1762#else
1763 unsigned long val = wxStrtoul_l(start, &end, base, wxCLocale);
1764#endif
1765 WX_STRING_TO_X_TYPE_END
cd0b1709
VZ
1766}
1767
529e491c
FM
1768bool wxString::ToCDouble(double *pVal) const
1769{
1770 WX_STRING_TO_X_TYPE_START
a51fdf81 1771#if (wxUSE_UNICODE_UTF8 || !wxUSE_UNICODE) && defined(wxHAS_XLOCALE_SUPPORT)
529e491c
FM
1772 double val = wxStrtod_lA(start, &end, wxCLocale);
1773#else
1774 double val = wxStrtod_l(start, &end, wxCLocale);
1775#endif
1776 WX_STRING_TO_X_TYPE_END
1777}
1778
105993f7
VZ
1779#else // wxUSE_XLOCALE
1780
1781// Provide implementation of these functions even when wxUSE_XLOCALE is
1782// disabled, we still need them in wxWidgets internal code.
1783
1784// For integers we just assume the current locale uses the same number
1785// representation as the C one as there is nothing else we can do.
1786bool wxString::ToCLong(long *pVal, int base) const
1787{
1788 return ToLong(pVal, base);
1789}
1790
1791bool wxString::ToCULong(unsigned long *pVal, int base) const
1792{
1793 return ToULong(pVal, base);
1794}
1795
1796// For floating point numbers we have to handle the problem of the decimal
1797// point which is different in different locales.
1798bool wxString::ToCDouble(double *pVal) const
1799{
1800 // Create a copy of this string using the decimal point instead of whatever
1801 // separator the current locale uses.
1802#if wxUSE_INTL
1803 wxString sep = wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT,
1804 wxLOCALE_CAT_NUMBER);
1805 if ( sep == "." )
1806 {
1807 // We can avoid an unnecessary string copy in this case.
1808 return ToDouble(pVal);
1809 }
1810#else // !wxUSE_INTL
1811 // We don't know what the current separator is so it might even be a point
1812 // already, try to parse the string as a double:
1813 if ( ToDouble(pVal) )
1814 {
1815 // It must have been the point, nothing else to do.
1816 return true;
1817 }
1818
1819 // Try to guess the separator, using the most common alternative value.
1820 wxString sep(",");
1821#endif // wxUSE_INTL/!wxUSE_INTL
1822 wxString cstr(*this);
1823 cstr.Replace(".", sep);
1824
1825 return cstr.ToDouble(pVal);
1826}
1827
1828#endif // wxUSE_XLOCALE/!wxUSE_XLOCALE
529e491c 1829
951201d8
VZ
1830// ----------------------------------------------------------------------------
1831// number to string conversion
1832// ----------------------------------------------------------------------------
1833
1834/* static */
fd3a4cb9 1835wxString wxString::FromDouble(double val, int precision)
951201d8 1836{
fd3a4cb9
VZ
1837 wxCHECK_MSG( precision >= -1, wxString(), "Invalid negative precision" );
1838
1839 wxString format;
1840 if ( precision == -1 )
1841 {
1842 format = "%g";
1843 }
1844 else // Use fixed precision.
1845 {
1846 format.Printf("%%.%df", precision);
1847 }
1848
1849 return wxString::Format(format, val);
1850}
1851
1852/* static */
1853wxString wxString::FromCDouble(double val, int precision)
1854{
1855 wxCHECK_MSG( precision >= -1, wxString(), "Invalid negative precision" );
1856
951201d8
VZ
1857#if wxUSE_STD_IOSTREAM && wxUSE_STD_STRING
1858 // We assume that we can use the ostream and not wstream for numbers.
1859 wxSTD ostringstream os;
fd3a4cb9
VZ
1860 if ( precision != -1 )
1861 {
1862 os.precision(precision);
1863 os.setf(std::ios::fixed, std::ios::floatfield);
1864 }
1865
951201d8
VZ
1866 os << val;
1867 return os.str();
17a8f66c 1868#else // !wxUSE_STD_IOSTREAM
951201d8
VZ
1869 // Can't use iostream locale support, fall back to the manual method
1870 // instead.
fd3a4cb9 1871 wxString s = FromDouble(val, precision);
951201d8
VZ
1872#if wxUSE_INTL
1873 wxString sep = wxLocale::GetInfo(wxLOCALE_DECIMAL_POINT,
1874 wxLOCALE_CAT_NUMBER);
1875#else // !wxUSE_INTL
1876 // As above, this is the most common alternative value. Notice that here it
1877 // doesn't matter if we guess wrongly and the current separator is already
1878 // ".": we'll just waste a call to Replace() in this case.
1879 wxString sep(",");
1880#endif // wxUSE_INTL/!wxUSE_INTL
1881
1882 s.Replace(sep, ".");
1883 return s;
1884#endif // wxUSE_STD_IOSTREAM/!wxUSE_STD_IOSTREAM
1885}
1886
c801d85f 1887// ---------------------------------------------------------------------------
9efd3367 1888// formatted output
c801d85f 1889// ---------------------------------------------------------------------------
378b05f7 1890
d1f6e2cf 1891#if !wxUSE_UTF8_LOCALE_ONLY
341e7d28 1892/* static */
c9f78968 1893#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1528e0b8 1894wxString wxStringPrintfMixinBase::DoFormatWchar(const wxChar *format, ...)
c9f78968 1895#else
d1f6e2cf 1896wxString wxString::DoFormatWchar(const wxChar *format, ...)
c9f78968 1897#endif
341e7d28 1898{
77c3e48a 1899 va_list argptr;
c9f78968 1900 va_start(argptr, format);
341e7d28 1901
77c3e48a 1902 wxString s;
c9f78968 1903 s.PrintfV(format, argptr);
341e7d28 1904
77c3e48a 1905 va_end(argptr);
341e7d28 1906
77c3e48a 1907 return s;
341e7d28 1908}
d1f6e2cf
VS
1909#endif // !wxUSE_UTF8_LOCALE_ONLY
1910
1911#if wxUSE_UNICODE_UTF8
1912/* static */
1913wxString wxString::DoFormatUtf8(const char *format, ...)
1914{
1915 va_list argptr;
1916 va_start(argptr, format);
1917
1918 wxString s;
1919 s.PrintfV(format, argptr);
1920
1921 va_end(argptr);
1922
1923 return s;
1924}
1925#endif // wxUSE_UNICODE_UTF8
341e7d28
VZ
1926
1927/* static */
c9f78968 1928wxString wxString::FormatV(const wxString& format, va_list argptr)
341e7d28
VZ
1929{
1930 wxString s;
c9f78968 1931 s.PrintfV(format, argptr);
341e7d28
VZ
1932 return s;
1933}
1934
d1f6e2cf 1935#if !wxUSE_UTF8_LOCALE_ONLY
c9f78968 1936#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
d1f6e2cf 1937int wxStringPrintfMixinBase::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1938#else
d1f6e2cf 1939int wxString::DoPrintfWchar(const wxChar *format, ...)
c9f78968 1940#endif
c801d85f 1941{
ba9bbf13 1942 va_list argptr;
c9f78968 1943 va_start(argptr, format);
c801d85f 1944
c9f78968
VS
1945#ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
1946 // get a pointer to the wxString instance; we have to use dynamic_cast<>
1947 // because it's the only cast that works safely for downcasting when
1948 // multiple inheritance is used:
1949 wxString *str = static_cast<wxString*>(this);
1950#else
1951 wxString *str = this;
1952#endif
1953
1954 int iLen = str->PrintfV(format, argptr);
c801d85f 1955
ba9bbf13 1956 va_end(argptr);
c801d85f 1957
ba9bbf13 1958 return iLen;
c801d85f 1959}
d1f6e2cf
VS
1960#endif // !wxUSE_UTF8_LOCALE_ONLY
1961
1962#if wxUSE_UNICODE_UTF8
1963int wxString::DoPrintfUtf8(const char *format, ...)
1964{
1965 va_list argptr;
1966 va_start(argptr, format);
1967
1968 int iLen = PrintfV(format, argptr);
1969
1970 va_end(argptr);
1971
1972 return iLen;
1973}
1974#endif // wxUSE_UNICODE_UTF8
c801d85f 1975
67612ff1
DE
1976/*
1977 Uses wxVsnprintf and places the result into the this string.
1978
1979 In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
1980 it is vswprintf. Due to a discrepancy between vsnprintf and vswprintf in
1981 the ISO C99 (and thus SUSv3) standard the return value for the case of
1982 an undersized buffer is inconsistent. For conforming vsnprintf
1983 implementations the function must return the number of characters that
1984 would have been printed had the buffer been large enough. For conforming
1985 vswprintf implementations the function must return a negative number
1986 and set errno.
1987
1988 What vswprintf sets errno to is undefined but Darwin seems to set it to
a9a854d7
DE
1989 EOVERFLOW. The only expected errno are EILSEQ and EINVAL. Both of
1990 those are defined in the standard and backed up by several conformance
1991 statements. Note that ENOMEM mentioned in the manual page does not
1992 apply to swprintf, only wprintf and fwprintf.
1993
1994 Official manual page:
1995 http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
1996
1997 Some conformance statements (AIX, Solaris):
1998 http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
1999 http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
2000
2001 Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
2002 EILSEQ and EINVAL are specifically defined to mean the error is other than
2003 an undersized buffer and no other errno are defined we treat those two
4c51a665 2004 as meaning hard errors and everything else gets the old behaviour which
a9a854d7 2005 is to keep looping and increasing buffer size until the function succeeds.
c95e653c 2006
4c51a665
DS
2007 In practice it's impossible to determine before compilation which behaviour
2008 may be used. The vswprintf function may have vsnprintf-like behaviour or
2009 vice-versa. Behaviour detected on one release can theoretically change
67612ff1
DE
2010 with an updated release. Not to mention that configure testing for it
2011 would require the test to be run on the host system, not the build system
2012 which makes cross compilation difficult. Therefore, we make no assumptions
4c51a665 2013 about behaviour and try our best to handle every known case, including the
67612ff1
DE
2014 case where wxVsnprintf returns a negative number and fails to set errno.
2015
2016 There is yet one more non-standard implementation and that is our own.
2017 Fortunately, that can be detected at compile-time.
2018
2019 On top of all that, ISO C99 explicitly defines snprintf to write a null
2020 character to the last position of the specified buffer. That would be at
2021 at the given buffer size minus 1. It is supposed to do this even if it
2022 turns out that the buffer is sized too small.
2023
4c51a665 2024 Darwin (tested on 10.5) follows the C99 behaviour exactly.
67612ff1 2025
4c51a665 2026 Glibc 2.6 almost follows the C99 behaviour except vswprintf never sets
67612ff1
DE
2027 errno even when it fails. However, it only seems to ever fail due
2028 to an undersized buffer.
2029*/
2523e9b7
VS
2030#if wxUSE_UNICODE_UTF8
2031template<typename BufferType>
2032#else
2033// we only need one version in non-UTF8 builds and at least two Windows
2034// compilers have problems with this function template, so use just one
2035// normal function here
2036#endif
2037static int DoStringPrintfV(wxString& str,
2038 const wxString& format, va_list argptr)
c801d85f 2039{
f6f5941b 2040 int size = 1024;
e87b7833 2041
f6f5941b
VZ
2042 for ( ;; )
2043 {
2523e9b7
VS
2044#if wxUSE_UNICODE_UTF8
2045 BufferType tmp(str, size + 1);
2046 typename BufferType::CharType *buf = tmp;
2047#else
2048 wxStringBuffer tmp(str, size + 1);
de2589be 2049 wxChar *buf = tmp;
2523e9b7 2050#endif
2bb67b80 2051
ba9bbf13
WS
2052 if ( !buf )
2053 {
2054 // out of memory
2055 return -1;
e87b7833 2056 }
f6f5941b 2057
ba9bbf13
WS
2058 // wxVsnprintf() may modify the original arg pointer, so pass it
2059 // only a copy
2060 va_list argptrcopy;
2061 wxVaCopy(argptrcopy, argptr);
67612ff1
DE
2062
2063#ifndef __WXWINCE__
2064 // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
2065 errno = 0;
2066#endif
2523e9b7 2067 int len = wxVsnprintf(buf, size, format, argptrcopy);
ba9bbf13
WS
2068 va_end(argptrcopy);
2069
2070 // some implementations of vsnprintf() don't NUL terminate
2071 // the string if there is not enough space for it so
2072 // always do it manually
67612ff1
DE
2073 // FIXME: This really seems to be the wrong and would be an off-by-one
2074 // bug except the code above allocates an extra character.
9a83f860 2075 buf[size] = wxT('\0');
ba9bbf13 2076
caff62f2
VZ
2077 // vsnprintf() may return either -1 (traditional Unix behaviour) or the
2078 // total number of characters which would have been written if the
b1727cfe 2079 // buffer were large enough (newer standards such as Unix98)
de2589be
VZ
2080 if ( len < 0 )
2081 {
52de37c7
VS
2082 // NB: wxVsnprintf() may call either wxCRT_VsnprintfW or
2083 // wxCRT_VsnprintfA in UTF-8 build; wxUSE_WXVSNPRINTF
2084 // is true if *both* of them use our own implementation,
2085 // otherwise we can't be sure
f2bbe5b6
VZ
2086#if wxUSE_WXVSNPRINTF
2087 // we know that our own implementation of wxVsnprintf() returns -1
2088 // only for a format error - thus there's something wrong with
2089 // the user's format string
a33c7045 2090 buf[0] = '\0';
f2bbe5b6 2091 return -1;
52de37c7
VS
2092#else // possibly using system version
2093 // assume it only returns error if there is not enough space, but
2094 // as we don't know how much we need, double the current size of
2095 // the buffer
67612ff1 2096#ifndef __WXWINCE__
a9a854d7
DE
2097 if( (errno == EILSEQ) || (errno == EINVAL) )
2098 // If errno was set to one of the two well-known hard errors
2099 // then fail immediately to avoid an infinite loop.
2100 return -1;
2101 else
2102#endif // __WXWINCE__
67612ff1
DE
2103 // still not enough, as we don't know how much we need, double the
2104 // current size of the buffer
2105 size *= 2;
f2bbe5b6 2106#endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
de2589be 2107 }
64f8f94c 2108 else if ( len >= size )
de2589be 2109 {
f2bbe5b6 2110#if wxUSE_WXVSNPRINTF
c95e653c 2111 // we know that our own implementation of wxVsnprintf() returns
f2bbe5b6
VZ
2112 // size+1 when there's not enough space but that's not the size
2113 // of the required buffer!
2114 size *= 2; // so we just double the current size of the buffer
2115#else
64f8f94c
VZ
2116 // some vsnprintf() implementations NUL-terminate the buffer and
2117 // some don't in len == size case, to be safe always add 1
67612ff1
DE
2118 // FIXME: I don't quite understand this comment. The vsnprintf
2119 // function is specifically defined to return the number of
2120 // characters printed not including the null terminator.
2121 // So OF COURSE you need to add 1 to get the right buffer size.
2122 // The following line is definitely correct, no question.
64f8f94c 2123 size = len + 1;
f2bbe5b6 2124#endif
de2589be
VZ
2125 }
2126 else // ok, there was enough space
f6f5941b 2127 {
f6f5941b
VZ
2128 break;
2129 }
f6f5941b
VZ
2130 }
2131
2132 // we could have overshot
2523e9b7
VS
2133 str.Shrink();
2134
2135 return str.length();
2136}
c801d85f 2137
2523e9b7
VS
2138int wxString::PrintfV(const wxString& format, va_list argptr)
2139{
2523e9b7
VS
2140#if wxUSE_UNICODE_UTF8
2141 #if wxUSE_STL_BASED_WXSTRING
2142 typedef wxStringTypeBuffer<char> Utf8Buffer;
2143 #else
6798451b 2144 typedef wxStringInternalBuffer Utf8Buffer;
2523e9b7
VS
2145 #endif
2146#endif
2147
2148#if wxUSE_UTF8_LOCALE_ONLY
c6255a6e 2149 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
2150#else
2151 #if wxUSE_UNICODE_UTF8
2152 if ( wxLocaleIsUtf8 )
c6255a6e 2153 return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
2523e9b7
VS
2154 else
2155 // wxChar* version
c6255a6e 2156 return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);
2523e9b7 2157 #else
c6255a6e 2158 return DoStringPrintfV(*this, format, argptr);
2523e9b7
VS
2159 #endif // UTF8/WCHAR
2160#endif
c801d85f
KB
2161}
2162
097c080b
VZ
2163// ----------------------------------------------------------------------------
2164// misc other operations
2165// ----------------------------------------------------------------------------
0c5d3e1c 2166
d775fa82 2167// returns true if the string matches the pattern which may contain '*' and
0c5d3e1c
VZ
2168// '?' metacharacters (as usual, '?' matches any character and '*' any number
2169// of them)
8a540c88 2170bool wxString::Matches(const wxString& mask) const
097c080b 2171{
d6044f58
VZ
2172 // I disable this code as it doesn't seem to be faster (in fact, it seems
2173 // to be much slower) than the old, hand-written code below and using it
2174 // here requires always linking with libregex even if the user code doesn't
2175 // use it
2176#if 0 // wxUSE_REGEX
706c2ac9
VZ
2177 // first translate the shell-like mask into a regex
2178 wxString pattern;
2179 pattern.reserve(wxStrlen(pszMask));
2180
9a83f860 2181 pattern += wxT('^');
706c2ac9
VZ
2182 while ( *pszMask )
2183 {
2184 switch ( *pszMask )
2185 {
9a83f860
VZ
2186 case wxT('?'):
2187 pattern += wxT('.');
706c2ac9
VZ
2188 break;
2189
9a83f860
VZ
2190 case wxT('*'):
2191 pattern += wxT(".*");
706c2ac9
VZ
2192 break;
2193
9a83f860
VZ
2194 case wxT('^'):
2195 case wxT('.'):
2196 case wxT('$'):
2197 case wxT('('):
2198 case wxT(')'):
2199 case wxT('|'):
2200 case wxT('+'):
2201 case wxT('\\'):
706c2ac9
VZ
2202 // these characters are special in a RE, quote them
2203 // (however note that we don't quote '[' and ']' to allow
2204 // using them for Unix shell like matching)
9a83f860 2205 pattern += wxT('\\');
706c2ac9
VZ
2206 // fall through
2207
2208 default:
2209 pattern += *pszMask;
2210 }
2211
2212 pszMask++;
2213 }
9a83f860 2214 pattern += wxT('$');
706c2ac9
VZ
2215
2216 // and now use it
2217 return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
2218#else // !wxUSE_REGEX
9a4232dc
VZ
2219 // TODO: this is, of course, awfully inefficient...
2220
8a540c88
VS
2221 // FIXME-UTF8: implement using iterators, remove #if
2222#if wxUSE_UNICODE_UTF8
de4983f3
VS
2223 const wxScopedWCharBuffer maskBuf = mask.wc_str();
2224 const wxScopedWCharBuffer txtBuf = wc_str();
8a540c88
VS
2225 const wxChar *pszMask = maskBuf.data();
2226 const wxChar *pszTxt = txtBuf.data();
2227#else
2228 const wxChar *pszMask = mask.wx_str();
9a4232dc 2229 // the char currently being checked
8a540c88
VS
2230 const wxChar *pszTxt = wx_str();
2231#endif
9a4232dc
VZ
2232
2233 // the last location where '*' matched
2234 const wxChar *pszLastStarInText = NULL;
2235 const wxChar *pszLastStarInMask = NULL;
2236
2237match:
2238 for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
097c080b 2239 switch ( *pszMask ) {
223d09f6
KB
2240 case wxT('?'):
2241 if ( *pszTxt == wxT('\0') )
d775fa82 2242 return false;
097c080b 2243
9a4232dc 2244 // pszTxt and pszMask will be incremented in the loop statement
0c5d3e1c 2245
097c080b
VZ
2246 break;
2247
223d09f6 2248 case wxT('*'):
097c080b 2249 {
9a4232dc
VZ
2250 // remember where we started to be able to backtrack later
2251 pszLastStarInText = pszTxt;
2252 pszLastStarInMask = pszMask;
2253
097c080b 2254 // ignore special chars immediately following this one
9a4232dc 2255 // (should this be an error?)
223d09f6 2256 while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
097c080b
VZ
2257 pszMask++;
2258
2259 // if there is nothing more, match
223d09f6 2260 if ( *pszMask == wxT('\0') )
d775fa82 2261 return true;
097c080b
VZ
2262
2263 // are there any other metacharacters in the mask?
c86f1403 2264 size_t uiLenMask;
223d09f6 2265 const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
097c080b
VZ
2266
2267 if ( pEndMask != NULL ) {
2268 // we have to match the string between two metachars
2269 uiLenMask = pEndMask - pszMask;
2270 }
2271 else {
2272 // we have to match the remainder of the string
2bb67b80 2273 uiLenMask = wxStrlen(pszMask);
097c080b
VZ
2274 }
2275
2276 wxString strToMatch(pszMask, uiLenMask);
2bb67b80 2277 const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
097c080b 2278 if ( pMatch == NULL )
d775fa82 2279 return false;
097c080b
VZ
2280
2281 // -1 to compensate "++" in the loop
2282 pszTxt = pMatch + uiLenMask - 1;
2283 pszMask += uiLenMask - 1;
2284 }
2285 break;
2286
2287 default:
2288 if ( *pszMask != *pszTxt )
d775fa82 2289 return false;
097c080b
VZ
2290 break;
2291 }
2292 }
2293
2294 // match only if nothing left
9a4232dc 2295 if ( *pszTxt == wxT('\0') )
d775fa82 2296 return true;
9a4232dc
VZ
2297
2298 // if we failed to match, backtrack if we can
2299 if ( pszLastStarInText ) {
2300 pszTxt = pszLastStarInText + 1;
2301 pszMask = pszLastStarInMask;
2302
2303 pszLastStarInText = NULL;
2304
2305 // don't bother resetting pszLastStarInMask, it's unnecessary
2306
2307 goto match;
2308 }
2309
d775fa82 2310 return false;
706c2ac9 2311#endif // wxUSE_REGEX/!wxUSE_REGEX
097c080b
VZ
2312}
2313
1fc5dd6f 2314// Count the number of chars
c9f78968 2315int wxString::Freq(wxUniChar ch) const
1fc5dd6f
JS
2316{
2317 int count = 0;
8f93a29f 2318 for ( const_iterator i = begin(); i != end(); ++i )
1fc5dd6f 2319 {
8f93a29f 2320 if ( *i == ch )
1fc5dd6f
JS
2321 count ++;
2322 }
2323 return count;
2324}
4e79262f 2325